Mercurial > repos > yating-l > jbrowsearchivecreator
changeset 3:7d1a9a91b989 draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/Python-ast.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,535 @@ +/* File automatically generated by Parser/asdl_c.py. */ + +#include "asdl.h" + +typedef struct _mod *mod_ty; + +typedef struct _stmt *stmt_ty; + +typedef struct _expr *expr_ty; + +typedef enum _expr_context { Load=1, Store=2, Del=3, AugLoad=4, AugStore=5, + Param=6 } expr_context_ty; + +typedef struct _slice *slice_ty; + +typedef enum _boolop { And=1, Or=2 } boolop_ty; + +typedef enum _operator { Add=1, Sub=2, Mult=3, Div=4, Mod=5, Pow=6, LShift=7, + RShift=8, BitOr=9, BitXor=10, BitAnd=11, FloorDiv=12 } + operator_ty; + +typedef enum _unaryop { Invert=1, Not=2, UAdd=3, USub=4 } unaryop_ty; + +typedef enum _cmpop { Eq=1, NotEq=2, Lt=3, LtE=4, Gt=5, GtE=6, Is=7, IsNot=8, + In=9, NotIn=10 } cmpop_ty; + +typedef struct _comprehension *comprehension_ty; + +typedef struct _excepthandler *excepthandler_ty; + +typedef struct _arguments *arguments_ty; + +typedef struct _keyword *keyword_ty; + +typedef struct _alias *alias_ty; + + +enum _mod_kind {Module_kind=1, Interactive_kind=2, Expression_kind=3, + Suite_kind=4}; +struct _mod { + enum _mod_kind kind; + union { + struct { + asdl_seq *body; + } Module; + + struct { + asdl_seq *body; + } Interactive; + + struct { + expr_ty body; + } Expression; + + struct { + asdl_seq *body; + } Suite; + + } v; +}; + +enum _stmt_kind {FunctionDef_kind=1, ClassDef_kind=2, Return_kind=3, + Delete_kind=4, Assign_kind=5, AugAssign_kind=6, Print_kind=7, + For_kind=8, While_kind=9, If_kind=10, With_kind=11, + Raise_kind=12, TryExcept_kind=13, TryFinally_kind=14, + Assert_kind=15, Import_kind=16, ImportFrom_kind=17, + Exec_kind=18, Global_kind=19, Expr_kind=20, Pass_kind=21, + Break_kind=22, Continue_kind=23}; +struct _stmt { + enum _stmt_kind kind; + union { + struct { + identifier name; + arguments_ty args; + asdl_seq *body; + asdl_seq *decorator_list; + } FunctionDef; + + struct { + identifier name; + asdl_seq *bases; + asdl_seq *body; + asdl_seq *decorator_list; + } ClassDef; + + struct { + expr_ty value; + } Return; + + struct { + asdl_seq *targets; + } Delete; + + struct { + asdl_seq *targets; + expr_ty value; + } Assign; + + struct { + expr_ty target; + operator_ty op; + expr_ty value; + } AugAssign; + + struct { + expr_ty dest; + asdl_seq *values; + bool nl; + } Print; + + struct { + expr_ty target; + expr_ty iter; + asdl_seq *body; + asdl_seq *orelse; + } For; + + struct { + expr_ty test; + asdl_seq *body; + asdl_seq *orelse; + } While; + + struct { + expr_ty test; + asdl_seq *body; + asdl_seq *orelse; + } If; + + struct { + expr_ty context_expr; + expr_ty optional_vars; + asdl_seq *body; + } With; + + struct { + expr_ty type; + expr_ty inst; + expr_ty tback; + } Raise; + + struct { + asdl_seq *body; + asdl_seq *handlers; + asdl_seq *orelse; + } TryExcept; + + struct { + asdl_seq *body; + asdl_seq *finalbody; + } TryFinally; + + struct { + expr_ty test; + expr_ty msg; + } Assert; + + struct { + asdl_seq *names; + } Import; + + struct { + identifier module; + asdl_seq *names; + int level; + } ImportFrom; + + struct { + expr_ty body; + expr_ty globals; + expr_ty locals; + } Exec; + + struct { + asdl_seq *names; + } Global; + + struct { + expr_ty value; + } Expr; + + } v; + int lineno; + int col_offset; +}; + +enum _expr_kind {BoolOp_kind=1, BinOp_kind=2, UnaryOp_kind=3, Lambda_kind=4, + IfExp_kind=5, Dict_kind=6, Set_kind=7, ListComp_kind=8, + SetComp_kind=9, DictComp_kind=10, GeneratorExp_kind=11, + Yield_kind=12, Compare_kind=13, Call_kind=14, Repr_kind=15, + Num_kind=16, Str_kind=17, Attribute_kind=18, + Subscript_kind=19, Name_kind=20, List_kind=21, Tuple_kind=22}; +struct _expr { + enum _expr_kind kind; + union { + struct { + boolop_ty op; + asdl_seq *values; + } BoolOp; + + struct { + expr_ty left; + operator_ty op; + expr_ty right; + } BinOp; + + struct { + unaryop_ty op; + expr_ty operand; + } UnaryOp; + + struct { + arguments_ty args; + expr_ty body; + } Lambda; + + struct { + expr_ty test; + expr_ty body; + expr_ty orelse; + } IfExp; + + struct { + asdl_seq *keys; + asdl_seq *values; + } Dict; + + struct { + asdl_seq *elts; + } Set; + + struct { + expr_ty elt; + asdl_seq *generators; + } ListComp; + + struct { + expr_ty elt; + asdl_seq *generators; + } SetComp; + + struct { + expr_ty key; + expr_ty value; + asdl_seq *generators; + } DictComp; + + struct { + expr_ty elt; + asdl_seq *generators; + } GeneratorExp; + + struct { + expr_ty value; + } Yield; + + struct { + expr_ty left; + asdl_int_seq *ops; + asdl_seq *comparators; + } Compare; + + struct { + expr_ty func; + asdl_seq *args; + asdl_seq *keywords; + expr_ty starargs; + expr_ty kwargs; + } Call; + + struct { + expr_ty value; + } Repr; + + struct { + object n; + } Num; + + struct { + string s; + } Str; + + struct { + expr_ty value; + identifier attr; + expr_context_ty ctx; + } Attribute; + + struct { + expr_ty value; + slice_ty slice; + expr_context_ty ctx; + } Subscript; + + struct { + identifier id; + expr_context_ty ctx; + } Name; + + struct { + asdl_seq *elts; + expr_context_ty ctx; + } List; + + struct { + asdl_seq *elts; + expr_context_ty ctx; + } Tuple; + + } v; + int lineno; + int col_offset; +}; + +enum _slice_kind {Ellipsis_kind=1, Slice_kind=2, ExtSlice_kind=3, Index_kind=4}; +struct _slice { + enum _slice_kind kind; + union { + struct { + expr_ty lower; + expr_ty upper; + expr_ty step; + } Slice; + + struct { + asdl_seq *dims; + } ExtSlice; + + struct { + expr_ty value; + } Index; + + } v; +}; + +struct _comprehension { + expr_ty target; + expr_ty iter; + asdl_seq *ifs; +}; + +enum _excepthandler_kind {ExceptHandler_kind=1}; +struct _excepthandler { + enum _excepthandler_kind kind; + union { + struct { + expr_ty type; + expr_ty name; + asdl_seq *body; + } ExceptHandler; + + } v; + int lineno; + int col_offset; +}; + +struct _arguments { + asdl_seq *args; + identifier vararg; + identifier kwarg; + asdl_seq *defaults; +}; + +struct _keyword { + identifier arg; + expr_ty value; +}; + +struct _alias { + identifier name; + identifier asname; +}; + + +#define Module(a0, a1) _Py_Module(a0, a1) +mod_ty _Py_Module(asdl_seq * body, PyArena *arena); +#define Interactive(a0, a1) _Py_Interactive(a0, a1) +mod_ty _Py_Interactive(asdl_seq * body, PyArena *arena); +#define Expression(a0, a1) _Py_Expression(a0, a1) +mod_ty _Py_Expression(expr_ty body, PyArena *arena); +#define Suite(a0, a1) _Py_Suite(a0, a1) +mod_ty _Py_Suite(asdl_seq * body, PyArena *arena); +#define FunctionDef(a0, a1, a2, a3, a4, a5, a6) _Py_FunctionDef(a0, a1, a2, a3, a4, a5, a6) +stmt_ty _Py_FunctionDef(identifier name, arguments_ty args, asdl_seq * body, + asdl_seq * decorator_list, int lineno, int col_offset, + PyArena *arena); +#define ClassDef(a0, a1, a2, a3, a4, a5, a6) _Py_ClassDef(a0, a1, a2, a3, a4, a5, a6) +stmt_ty _Py_ClassDef(identifier name, asdl_seq * bases, asdl_seq * body, + asdl_seq * decorator_list, int lineno, int col_offset, + PyArena *arena); +#define Return(a0, a1, a2, a3) _Py_Return(a0, a1, a2, a3) +stmt_ty _Py_Return(expr_ty value, int lineno, int col_offset, PyArena *arena); +#define Delete(a0, a1, a2, a3) _Py_Delete(a0, a1, a2, a3) +stmt_ty _Py_Delete(asdl_seq * targets, int lineno, int col_offset, PyArena + *arena); +#define Assign(a0, a1, a2, a3, a4) _Py_Assign(a0, a1, a2, a3, a4) +stmt_ty _Py_Assign(asdl_seq * targets, expr_ty value, int lineno, int + col_offset, PyArena *arena); +#define AugAssign(a0, a1, a2, a3, a4, a5) _Py_AugAssign(a0, a1, a2, a3, a4, a5) +stmt_ty _Py_AugAssign(expr_ty target, operator_ty op, expr_ty value, int + lineno, int col_offset, PyArena *arena); +#define Print(a0, a1, a2, a3, a4, a5) _Py_Print(a0, a1, a2, a3, a4, a5) +stmt_ty _Py_Print(expr_ty dest, asdl_seq * values, bool nl, int lineno, int + col_offset, PyArena *arena); +#define For(a0, a1, a2, a3, a4, a5, a6) _Py_For(a0, a1, a2, a3, a4, a5, a6) +stmt_ty _Py_For(expr_ty target, expr_ty iter, asdl_seq * body, asdl_seq * + orelse, int lineno, int col_offset, PyArena *arena); +#define While(a0, a1, a2, a3, a4, a5) _Py_While(a0, a1, a2, a3, a4, a5) +stmt_ty _Py_While(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno, + int col_offset, PyArena *arena); +#define If(a0, a1, a2, a3, a4, a5) _Py_If(a0, a1, a2, a3, a4, a5) +stmt_ty _Py_If(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno, + int col_offset, PyArena *arena); +#define With(a0, a1, a2, a3, a4, a5) _Py_With(a0, a1, a2, a3, a4, a5) +stmt_ty _Py_With(expr_ty context_expr, expr_ty optional_vars, asdl_seq * body, + int lineno, int col_offset, PyArena *arena); +#define Raise(a0, a1, a2, a3, a4, a5) _Py_Raise(a0, a1, a2, a3, a4, a5) +stmt_ty _Py_Raise(expr_ty type, expr_ty inst, expr_ty tback, int lineno, int + col_offset, PyArena *arena); +#define TryExcept(a0, a1, a2, a3, a4, a5) _Py_TryExcept(a0, a1, a2, a3, a4, a5) +stmt_ty _Py_TryExcept(asdl_seq * body, asdl_seq * handlers, asdl_seq * orelse, + int lineno, int col_offset, PyArena *arena); +#define TryFinally(a0, a1, a2, a3, a4) _Py_TryFinally(a0, a1, a2, a3, a4) +stmt_ty _Py_TryFinally(asdl_seq * body, asdl_seq * finalbody, int lineno, int + col_offset, PyArena *arena); +#define Assert(a0, a1, a2, a3, a4) _Py_Assert(a0, a1, a2, a3, a4) +stmt_ty _Py_Assert(expr_ty test, expr_ty msg, int lineno, int col_offset, + PyArena *arena); +#define Import(a0, a1, a2, a3) _Py_Import(a0, a1, a2, a3) +stmt_ty _Py_Import(asdl_seq * names, int lineno, int col_offset, PyArena + *arena); +#define ImportFrom(a0, a1, a2, a3, a4, a5) _Py_ImportFrom(a0, a1, a2, a3, a4, a5) +stmt_ty _Py_ImportFrom(identifier module, asdl_seq * names, int level, int + lineno, int col_offset, PyArena *arena); +#define Exec(a0, a1, a2, a3, a4, a5) _Py_Exec(a0, a1, a2, a3, a4, a5) +stmt_ty _Py_Exec(expr_ty body, expr_ty globals, expr_ty locals, int lineno, int + col_offset, PyArena *arena); +#define Global(a0, a1, a2, a3) _Py_Global(a0, a1, a2, a3) +stmt_ty _Py_Global(asdl_seq * names, int lineno, int col_offset, PyArena + *arena); +#define Expr(a0, a1, a2, a3) _Py_Expr(a0, a1, a2, a3) +stmt_ty _Py_Expr(expr_ty value, int lineno, int col_offset, PyArena *arena); +#define Pass(a0, a1, a2) _Py_Pass(a0, a1, a2) +stmt_ty _Py_Pass(int lineno, int col_offset, PyArena *arena); +#define Break(a0, a1, a2) _Py_Break(a0, a1, a2) +stmt_ty _Py_Break(int lineno, int col_offset, PyArena *arena); +#define Continue(a0, a1, a2) _Py_Continue(a0, a1, a2) +stmt_ty _Py_Continue(int lineno, int col_offset, PyArena *arena); +#define BoolOp(a0, a1, a2, a3, a4) _Py_BoolOp(a0, a1, a2, a3, a4) +expr_ty _Py_BoolOp(boolop_ty op, asdl_seq * values, int lineno, int col_offset, + PyArena *arena); +#define BinOp(a0, a1, a2, a3, a4, a5) _Py_BinOp(a0, a1, a2, a3, a4, a5) +expr_ty _Py_BinOp(expr_ty left, operator_ty op, expr_ty right, int lineno, int + col_offset, PyArena *arena); +#define UnaryOp(a0, a1, a2, a3, a4) _Py_UnaryOp(a0, a1, a2, a3, a4) +expr_ty _Py_UnaryOp(unaryop_ty op, expr_ty operand, int lineno, int col_offset, + PyArena *arena); +#define Lambda(a0, a1, a2, a3, a4) _Py_Lambda(a0, a1, a2, a3, a4) +expr_ty _Py_Lambda(arguments_ty args, expr_ty body, int lineno, int col_offset, + PyArena *arena); +#define IfExp(a0, a1, a2, a3, a4, a5) _Py_IfExp(a0, a1, a2, a3, a4, a5) +expr_ty _Py_IfExp(expr_ty test, expr_ty body, expr_ty orelse, int lineno, int + col_offset, PyArena *arena); +#define Dict(a0, a1, a2, a3, a4) _Py_Dict(a0, a1, a2, a3, a4) +expr_ty _Py_Dict(asdl_seq * keys, asdl_seq * values, int lineno, int + col_offset, PyArena *arena); +#define Set(a0, a1, a2, a3) _Py_Set(a0, a1, a2, a3) +expr_ty _Py_Set(asdl_seq * elts, int lineno, int col_offset, PyArena *arena); +#define ListComp(a0, a1, a2, a3, a4) _Py_ListComp(a0, a1, a2, a3, a4) +expr_ty _Py_ListComp(expr_ty elt, asdl_seq * generators, int lineno, int + col_offset, PyArena *arena); +#define SetComp(a0, a1, a2, a3, a4) _Py_SetComp(a0, a1, a2, a3, a4) +expr_ty _Py_SetComp(expr_ty elt, asdl_seq * generators, int lineno, int + col_offset, PyArena *arena); +#define DictComp(a0, a1, a2, a3, a4, a5) _Py_DictComp(a0, a1, a2, a3, a4, a5) +expr_ty _Py_DictComp(expr_ty key, expr_ty value, asdl_seq * generators, int + lineno, int col_offset, PyArena *arena); +#define GeneratorExp(a0, a1, a2, a3, a4) _Py_GeneratorExp(a0, a1, a2, a3, a4) +expr_ty _Py_GeneratorExp(expr_ty elt, asdl_seq * generators, int lineno, int + col_offset, PyArena *arena); +#define Yield(a0, a1, a2, a3) _Py_Yield(a0, a1, a2, a3) +expr_ty _Py_Yield(expr_ty value, int lineno, int col_offset, PyArena *arena); +#define Compare(a0, a1, a2, a3, a4, a5) _Py_Compare(a0, a1, a2, a3, a4, a5) +expr_ty _Py_Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators, + int lineno, int col_offset, PyArena *arena); +#define Call(a0, a1, a2, a3, a4, a5, a6, a7) _Py_Call(a0, a1, a2, a3, a4, a5, a6, a7) +expr_ty _Py_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, expr_ty + starargs, expr_ty kwargs, int lineno, int col_offset, PyArena + *arena); +#define Repr(a0, a1, a2, a3) _Py_Repr(a0, a1, a2, a3) +expr_ty _Py_Repr(expr_ty value, int lineno, int col_offset, PyArena *arena); +#define Num(a0, a1, a2, a3) _Py_Num(a0, a1, a2, a3) +expr_ty _Py_Num(object n, int lineno, int col_offset, PyArena *arena); +#define Str(a0, a1, a2, a3) _Py_Str(a0, a1, a2, a3) +expr_ty _Py_Str(string s, int lineno, int col_offset, PyArena *arena); +#define Attribute(a0, a1, a2, a3, a4, a5) _Py_Attribute(a0, a1, a2, a3, a4, a5) +expr_ty _Py_Attribute(expr_ty value, identifier attr, expr_context_ty ctx, int + lineno, int col_offset, PyArena *arena); +#define Subscript(a0, a1, a2, a3, a4, a5) _Py_Subscript(a0, a1, a2, a3, a4, a5) +expr_ty _Py_Subscript(expr_ty value, slice_ty slice, expr_context_ty ctx, int + lineno, int col_offset, PyArena *arena); +#define Name(a0, a1, a2, a3, a4) _Py_Name(a0, a1, a2, a3, a4) +expr_ty _Py_Name(identifier id, expr_context_ty ctx, int lineno, int + col_offset, PyArena *arena); +#define List(a0, a1, a2, a3, a4) _Py_List(a0, a1, a2, a3, a4) +expr_ty _Py_List(asdl_seq * elts, expr_context_ty ctx, int lineno, int + col_offset, PyArena *arena); +#define Tuple(a0, a1, a2, a3, a4) _Py_Tuple(a0, a1, a2, a3, a4) +expr_ty _Py_Tuple(asdl_seq * elts, expr_context_ty ctx, int lineno, int + col_offset, PyArena *arena); +#define Ellipsis(a0) _Py_Ellipsis(a0) +slice_ty _Py_Ellipsis(PyArena *arena); +#define Slice(a0, a1, a2, a3) _Py_Slice(a0, a1, a2, a3) +slice_ty _Py_Slice(expr_ty lower, expr_ty upper, expr_ty step, PyArena *arena); +#define ExtSlice(a0, a1) _Py_ExtSlice(a0, a1) +slice_ty _Py_ExtSlice(asdl_seq * dims, PyArena *arena); +#define Index(a0, a1) _Py_Index(a0, a1) +slice_ty _Py_Index(expr_ty value, PyArena *arena); +#define comprehension(a0, a1, a2, a3) _Py_comprehension(a0, a1, a2, a3) +comprehension_ty _Py_comprehension(expr_ty target, expr_ty iter, asdl_seq * + ifs, PyArena *arena); +#define ExceptHandler(a0, a1, a2, a3, a4, a5) _Py_ExceptHandler(a0, a1, a2, a3, a4, a5) +excepthandler_ty _Py_ExceptHandler(expr_ty type, expr_ty name, asdl_seq * body, + int lineno, int col_offset, PyArena *arena); +#define arguments(a0, a1, a2, a3, a4) _Py_arguments(a0, a1, a2, a3, a4) +arguments_ty _Py_arguments(asdl_seq * args, identifier vararg, identifier + kwarg, asdl_seq * defaults, PyArena *arena); +#define keyword(a0, a1, a2) _Py_keyword(a0, a1, a2) +keyword_ty _Py_keyword(identifier arg, expr_ty value, PyArena *arena); +#define alias(a0, a1, a2) _Py_alias(a0, a1, a2) +alias_ty _Py_alias(identifier name, identifier asname, PyArena *arena); + +PyObject* PyAST_mod2obj(mod_ty t); +mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode); +int PyAST_Check(PyObject* obj);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/Python.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,178 @@ +#ifndef Py_PYTHON_H +#define Py_PYTHON_H +/* Since this is a "meta-include" file, no #ifdef __cplusplus / extern "C" { */ + +/* Include nearly all Python header files */ + +#include "patchlevel.h" +#include "pyconfig.h" +#include "pymacconfig.h" + +/* Cyclic gc is always enabled, starting with release 2.3a1. Supply the + * old symbol for the benefit of extension modules written before then + * that may be conditionalizing on it. The core doesn't use it anymore. + */ +#ifndef WITH_CYCLE_GC +#define WITH_CYCLE_GC 1 +#endif + +#include <limits.h> + +#ifndef UCHAR_MAX +#error "Something's broken. UCHAR_MAX should be defined in limits.h." +#endif + +#if UCHAR_MAX != 255 +#error "Python's source code assumes C's unsigned char is an 8-bit type." +#endif + +#if defined(__sgi) && defined(WITH_THREAD) && !defined(_SGI_MP_SOURCE) +#define _SGI_MP_SOURCE +#endif + +#include <stdio.h> +#ifndef NULL +# error "Python.h requires that stdio.h define NULL." +#endif + +#include <string.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#include <stdlib.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +/* For size_t? */ +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +/* CAUTION: Build setups should ensure that NDEBUG is defined on the + * compiler command line when building Python in release mode; else + * assert() calls won't be removed. + */ +#include <assert.h> + +#include "pyport.h" + +/* pyconfig.h or pyport.h may or may not define DL_IMPORT */ +#ifndef DL_IMPORT /* declarations for DLL import/export */ +#define DL_IMPORT(RTYPE) RTYPE +#endif +#ifndef DL_EXPORT /* declarations for DLL import/export */ +#define DL_EXPORT(RTYPE) RTYPE +#endif + +/* Debug-mode build with pymalloc implies PYMALLOC_DEBUG. + * PYMALLOC_DEBUG is in error if pymalloc is not in use. + */ +#if defined(Py_DEBUG) && defined(WITH_PYMALLOC) && !defined(PYMALLOC_DEBUG) +#define PYMALLOC_DEBUG +#endif +#if defined(PYMALLOC_DEBUG) && !defined(WITH_PYMALLOC) +#error "PYMALLOC_DEBUG requires WITH_PYMALLOC" +#endif +#include "pymath.h" +#include "pymem.h" + +#include "object.h" +#include "objimpl.h" + +#include "pydebug.h" + +#include "unicodeobject.h" +#include "intobject.h" +#include "boolobject.h" +#include "longobject.h" +#include "floatobject.h" +#ifndef WITHOUT_COMPLEX +#include "complexobject.h" +#endif +#include "rangeobject.h" +#include "stringobject.h" +#include "memoryobject.h" +#include "bufferobject.h" +#include "bytesobject.h" +#include "bytearrayobject.h" +#include "tupleobject.h" +#include "listobject.h" +#include "dictobject.h" +#include "enumobject.h" +#include "setobject.h" +#include "methodobject.h" +#include "moduleobject.h" +#include "funcobject.h" +#include "classobject.h" +#include "fileobject.h" +#include "cobject.h" +#include "pycapsule.h" +#include "traceback.h" +#include "sliceobject.h" +#include "cellobject.h" +#include "iterobject.h" +#include "genobject.h" +#include "descrobject.h" +#include "warnings.h" +#include "weakrefobject.h" + +#include "codecs.h" +#include "pyerrors.h" + +#include "pystate.h" + +#include "pyarena.h" +#include "modsupport.h" +#include "pythonrun.h" +#include "ceval.h" +#include "sysmodule.h" +#include "intrcheck.h" +#include "import.h" + +#include "abstract.h" + +#include "compile.h" +#include "eval.h" + +#include "pyctype.h" +#include "pystrtod.h" +#include "pystrcmp.h" +#include "dtoa.h" + +/* _Py_Mangle is defined in compile.c */ +PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name); + +/* PyArg_GetInt is deprecated and should not be used, use PyArg_Parse(). */ +#define PyArg_GetInt(v, a) PyArg_Parse((v), "i", (a)) + +/* PyArg_NoArgs should not be necessary. + Set ml_flags in the PyMethodDef to METH_NOARGS. */ +#define PyArg_NoArgs(v) PyArg_Parse(v, "") + +/* Argument must be a char or an int in [-128, 127] or [0, 255]. */ +#define Py_CHARMASK(c) ((unsigned char)((c) & 0xff)) + +#include "pyfpe.h" + +/* These definitions must match corresponding definitions in graminit.h. + There's code in compile.c that checks that they are the same. */ +#define Py_single_input 256 +#define Py_file_input 257 +#define Py_eval_input 258 + +#ifdef HAVE_PTH +/* GNU pth user-space thread support */ +#include <pth.h> +#endif + +/* Define macros for inline documentation. */ +#define PyDoc_VAR(name) static char name[] +#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) +#ifdef WITH_DOC_STRINGS +#define PyDoc_STR(str) str +#else +#define PyDoc_STR(str) "" +#endif + +#endif /* !Py_PYTHON_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/abstract.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,1396 @@ +#ifndef Py_ABSTRACTOBJECT_H +#define Py_ABSTRACTOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef PY_SSIZE_T_CLEAN +#define PyObject_CallFunction _PyObject_CallFunction_SizeT +#define PyObject_CallMethod _PyObject_CallMethod_SizeT +#endif + +/* Abstract Object Interface (many thanks to Jim Fulton) */ + +/* + PROPOSAL: A Generic Python Object Interface for Python C Modules + +Problem + + Python modules written in C that must access Python objects must do + so through routines whose interfaces are described by a set of + include files. Unfortunately, these routines vary according to the + object accessed. To use these routines, the C programmer must check + the type of the object being used and must call a routine based on + the object type. For example, to access an element of a sequence, + the programmer must determine whether the sequence is a list or a + tuple: + + if(is_tupleobject(o)) + e=gettupleitem(o,i) + else if(is_listitem(o)) + e=getlistitem(o,i) + + If the programmer wants to get an item from another type of object + that provides sequence behavior, there is no clear way to do it + correctly. + + The persistent programmer may peruse object.h and find that the + _typeobject structure provides a means of invoking up to (currently + about) 41 special operators. So, for example, a routine can get an + item from any object that provides sequence behavior. However, to + use this mechanism, the programmer must make their code dependent on + the current Python implementation. + + Also, certain semantics, especially memory management semantics, may + differ by the type of object being used. Unfortunately, these + semantics are not clearly described in the current include files. + An abstract interface providing more consistent semantics is needed. + +Proposal + + I propose the creation of a standard interface (with an associated + library of routines and/or macros) for generically obtaining the + services of Python objects. This proposal can be viewed as one + components of a Python C interface consisting of several components. + + From the viewpoint of C access to Python services, we have (as + suggested by Guido in off-line discussions): + + - "Very high level layer": two or three functions that let you exec or + eval arbitrary Python code given as a string in a module whose name is + given, passing C values in and getting C values out using + mkvalue/getargs style format strings. This does not require the user + to declare any variables of type "PyObject *". This should be enough + to write a simple application that gets Python code from the user, + execs it, and returns the output or errors. (Error handling must also + be part of this API.) + + - "Abstract objects layer": which is the subject of this proposal. + It has many functions operating on objects, and lest you do many + things from C that you can also write in Python, without going + through the Python parser. + + - "Concrete objects layer": This is the public type-dependent + interface provided by the standard built-in types, such as floats, + strings, and lists. This interface exists and is currently + documented by the collection of include files provided with the + Python distributions. + + From the point of view of Python accessing services provided by C + modules: + + - "Python module interface": this interface consist of the basic + routines used to define modules and their members. Most of the + current extensions-writing guide deals with this interface. + + - "Built-in object interface": this is the interface that a new + built-in type must provide and the mechanisms and rules that a + developer of a new built-in type must use and follow. + + This proposal is a "first-cut" that is intended to spur + discussion. See especially the lists of notes. + + The Python C object interface will provide four protocols: object, + numeric, sequence, and mapping. Each protocol consists of a + collection of related operations. If an operation that is not + provided by a particular type is invoked, then a standard exception, + NotImplementedError is raised with a operation name as an argument. + In addition, for convenience this interface defines a set of + constructors for building objects of built-in types. This is needed + so new objects can be returned from C functions that otherwise treat + objects generically. + +Memory Management + + For all of the functions described in this proposal, if a function + retains a reference to a Python object passed as an argument, then the + function will increase the reference count of the object. It is + unnecessary for the caller to increase the reference count of an + argument in anticipation of the object's retention. + + All Python objects returned from functions should be treated as new + objects. Functions that return objects assume that the caller will + retain a reference and the reference count of the object has already + been incremented to account for this fact. A caller that does not + retain a reference to an object that is returned from a function + must decrement the reference count of the object (using + DECREF(object)) to prevent memory leaks. + + Note that the behavior mentioned here is different from the current + behavior for some objects (e.g. lists and tuples) when certain + type-specific routines are called directly (e.g. setlistitem). The + proposed abstraction layer will provide a consistent memory + management interface, correcting for inconsistent behavior for some + built-in types. + +Protocols + +xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/ + +/* Object Protocol: */ + + /* Implemented elsewhere: + + int PyObject_Print(PyObject *o, FILE *fp, int flags); + + Print an object, o, on file, fp. Returns -1 on + error. The flags argument is used to enable certain printing + options. The only option currently supported is Py_Print_RAW. + + (What should be said about Py_Print_RAW?) + + */ + + /* Implemented elsewhere: + + int PyObject_HasAttrString(PyObject *o, char *attr_name); + + Returns 1 if o has the attribute attr_name, and 0 otherwise. + This is equivalent to the Python expression: + hasattr(o,attr_name). + + This function always succeeds. + + */ + + /* Implemented elsewhere: + + PyObject* PyObject_GetAttrString(PyObject *o, char *attr_name); + + Retrieve an attributed named attr_name form object o. + Returns the attribute value on success, or NULL on failure. + This is the equivalent of the Python expression: o.attr_name. + + */ + + /* Implemented elsewhere: + + int PyObject_HasAttr(PyObject *o, PyObject *attr_name); + + Returns 1 if o has the attribute attr_name, and 0 otherwise. + This is equivalent to the Python expression: + hasattr(o,attr_name). + + This function always succeeds. + + */ + + /* Implemented elsewhere: + + PyObject* PyObject_GetAttr(PyObject *o, PyObject *attr_name); + + Retrieve an attributed named attr_name form object o. + Returns the attribute value on success, or NULL on failure. + This is the equivalent of the Python expression: o.attr_name. + + */ + + + /* Implemented elsewhere: + + int PyObject_SetAttrString(PyObject *o, char *attr_name, PyObject *v); + + Set the value of the attribute named attr_name, for object o, + to the value, v. Returns -1 on failure. This is + the equivalent of the Python statement: o.attr_name=v. + + */ + + /* Implemented elsewhere: + + int PyObject_SetAttr(PyObject *o, PyObject *attr_name, PyObject *v); + + Set the value of the attribute named attr_name, for object o, + to the value, v. Returns -1 on failure. This is + the equivalent of the Python statement: o.attr_name=v. + + */ + + /* implemented as a macro: + + int PyObject_DelAttrString(PyObject *o, char *attr_name); + + Delete attribute named attr_name, for object o. Returns + -1 on failure. This is the equivalent of the Python + statement: del o.attr_name. + + */ +#define PyObject_DelAttrString(O,A) PyObject_SetAttrString((O),(A),NULL) + + /* implemented as a macro: + + int PyObject_DelAttr(PyObject *o, PyObject *attr_name); + + Delete attribute named attr_name, for object o. Returns -1 + on failure. This is the equivalent of the Python + statement: del o.attr_name. + + */ +#define PyObject_DelAttr(O,A) PyObject_SetAttr((O),(A),NULL) + + PyAPI_FUNC(int) PyObject_Cmp(PyObject *o1, PyObject *o2, int *result); + + /* + Compare the values of o1 and o2 using a routine provided by + o1, if one exists, otherwise with a routine provided by o2. + The result of the comparison is returned in result. Returns + -1 on failure. This is the equivalent of the Python + statement: result=cmp(o1,o2). + + */ + + /* Implemented elsewhere: + + int PyObject_Compare(PyObject *o1, PyObject *o2); + + Compare the values of o1 and o2 using a routine provided by + o1, if one exists, otherwise with a routine provided by o2. + Returns the result of the comparison on success. On error, + the value returned is undefined. This is equivalent to the + Python expression: cmp(o1,o2). + + */ + + /* Implemented elsewhere: + + PyObject *PyObject_Repr(PyObject *o); + + Compute the string representation of object, o. Returns the + string representation on success, NULL on failure. This is + the equivalent of the Python expression: repr(o). + + Called by the repr() built-in function and by reverse quotes. + + */ + + /* Implemented elsewhere: + + PyObject *PyObject_Str(PyObject *o); + + Compute the string representation of object, o. Returns the + string representation on success, NULL on failure. This is + the equivalent of the Python expression: str(o).) + + Called by the str() built-in function and by the print + statement. + + */ + + /* Implemented elsewhere: + + PyObject *PyObject_Unicode(PyObject *o); + + Compute the unicode representation of object, o. Returns the + unicode representation on success, NULL on failure. This is + the equivalent of the Python expression: unistr(o).) + + Called by the unistr() built-in function. + + */ + + /* Declared elsewhere + + PyAPI_FUNC(int) PyCallable_Check(PyObject *o); + + Determine if the object, o, is callable. Return 1 if the + object is callable and 0 otherwise. + + This function always succeeds. + + */ + + + + PyAPI_FUNC(PyObject *) PyObject_Call(PyObject *callable_object, + PyObject *args, PyObject *kw); + + /* + Call a callable Python object, callable_object, with + arguments and keywords arguments. The 'args' argument can not be + NULL, but the 'kw' argument can be NULL. + + */ + + PyAPI_FUNC(PyObject *) PyObject_CallObject(PyObject *callable_object, + PyObject *args); + + /* + Call a callable Python object, callable_object, with + arguments given by the tuple, args. If no arguments are + needed, then args may be NULL. Returns the result of the + call on success, or NULL on failure. This is the equivalent + of the Python expression: apply(o,args). + + */ + + PyAPI_FUNC(PyObject *) PyObject_CallFunction(PyObject *callable_object, + char *format, ...); + + /* + Call a callable Python object, callable_object, with a + variable number of C arguments. The C arguments are described + using a mkvalue-style format string. The format may be NULL, + indicating that no arguments are provided. Returns the + result of the call on success, or NULL on failure. This is + the equivalent of the Python expression: apply(o,args). + + */ + + + PyAPI_FUNC(PyObject *) PyObject_CallMethod(PyObject *o, char *m, + char *format, ...); + + /* + Call the method named m of object o with a variable number of + C arguments. The C arguments are described by a mkvalue + format string. The format may be NULL, indicating that no + arguments are provided. Returns the result of the call on + success, or NULL on failure. This is the equivalent of the + Python expression: o.method(args). + */ + + PyAPI_FUNC(PyObject *) _PyObject_CallFunction_SizeT(PyObject *callable, + char *format, ...); + PyAPI_FUNC(PyObject *) _PyObject_CallMethod_SizeT(PyObject *o, + char *name, + char *format, ...); + + PyAPI_FUNC(PyObject *) PyObject_CallFunctionObjArgs(PyObject *callable, + ...); + + /* + Call a callable Python object, callable_object, with a + variable number of C arguments. The C arguments are provided + as PyObject * values, terminated by a NULL. Returns the + result of the call on success, or NULL on failure. This is + the equivalent of the Python expression: apply(o,args). + */ + + + PyAPI_FUNC(PyObject *) PyObject_CallMethodObjArgs(PyObject *o, + PyObject *m, ...); + + /* + Call the method named m of object o with a variable number of + C arguments. The C arguments are provided as PyObject * + values, terminated by NULL. Returns the result of the call + on success, or NULL on failure. This is the equivalent of + the Python expression: o.method(args). + */ + + + /* Implemented elsewhere: + + long PyObject_Hash(PyObject *o); + + Compute and return the hash, hash_value, of an object, o. On + failure, return -1. This is the equivalent of the Python + expression: hash(o). + + */ + + + /* Implemented elsewhere: + + int PyObject_IsTrue(PyObject *o); + + Returns 1 if the object, o, is considered to be true, 0 if o is + considered to be false and -1 on failure. This is equivalent to the + Python expression: not not o + + */ + + /* Implemented elsewhere: + + int PyObject_Not(PyObject *o); + + Returns 0 if the object, o, is considered to be true, 1 if o is + considered to be false and -1 on failure. This is equivalent to the + Python expression: not o + + */ + + PyAPI_FUNC(PyObject *) PyObject_Type(PyObject *o); + + /* + On success, returns a type object corresponding to the object + type of object o. On failure, returns NULL. This is + equivalent to the Python expression: type(o). + */ + + PyAPI_FUNC(Py_ssize_t) PyObject_Size(PyObject *o); + + /* + Return the size of object o. If the object, o, provides + both sequence and mapping protocols, the sequence size is + returned. On error, -1 is returned. This is the equivalent + to the Python expression: len(o). + + */ + + /* For DLL compatibility */ +#undef PyObject_Length + PyAPI_FUNC(Py_ssize_t) PyObject_Length(PyObject *o); +#define PyObject_Length PyObject_Size + + PyAPI_FUNC(Py_ssize_t) _PyObject_LengthHint(PyObject *o, Py_ssize_t); + + /* + Guess the size of object o using len(o) or o.__length_hint__(). + If neither of those return a non-negative value, then return the + default value. If one of the calls fails, this function returns -1. + */ + + PyAPI_FUNC(PyObject *) PyObject_GetItem(PyObject *o, PyObject *key); + + /* + Return element of o corresponding to the object, key, or NULL + on failure. This is the equivalent of the Python expression: + o[key]. + + */ + + PyAPI_FUNC(int) PyObject_SetItem(PyObject *o, PyObject *key, PyObject *v); + + /* + Map the object, key, to the value, v. Returns + -1 on failure. This is the equivalent of the Python + statement: o[key]=v. + */ + + PyAPI_FUNC(int) PyObject_DelItemString(PyObject *o, char *key); + + /* + Remove the mapping for object, key, from the object *o. + Returns -1 on failure. This is equivalent to + the Python statement: del o[key]. + */ + + PyAPI_FUNC(int) PyObject_DelItem(PyObject *o, PyObject *key); + + /* + Delete the mapping for key from *o. Returns -1 on failure. + This is the equivalent of the Python statement: del o[key]. + */ + + PyAPI_FUNC(int) PyObject_AsCharBuffer(PyObject *obj, + const char **buffer, + Py_ssize_t *buffer_len); + + /* + Takes an arbitrary object which must support the (character, + single segment) buffer interface and returns a pointer to a + read-only memory location useable as character based input + for subsequent processing. + + 0 is returned on success. buffer and buffer_len are only + set in case no error occurs. Otherwise, -1 is returned and + an exception set. + + */ + + PyAPI_FUNC(int) PyObject_CheckReadBuffer(PyObject *obj); + + /* + Checks whether an arbitrary object supports the (character, + single segment) buffer interface. Returns 1 on success, 0 + on failure. + + */ + + PyAPI_FUNC(int) PyObject_AsReadBuffer(PyObject *obj, + const void **buffer, + Py_ssize_t *buffer_len); + + /* + Same as PyObject_AsCharBuffer() except that this API expects + (readable, single segment) buffer interface and returns a + pointer to a read-only memory location which can contain + arbitrary data. + + 0 is returned on success. buffer and buffer_len are only + set in case no error occurs. Otherwise, -1 is returned and + an exception set. + + */ + + PyAPI_FUNC(int) PyObject_AsWriteBuffer(PyObject *obj, + void **buffer, + Py_ssize_t *buffer_len); + + /* + Takes an arbitrary object which must support the (writeable, + single segment) buffer interface and returns a pointer to a + writeable memory location in buffer of size buffer_len. + + 0 is returned on success. buffer and buffer_len are only + set in case no error occurs. Otherwise, -1 is returned and + an exception set. + + */ + + /* new buffer API */ + +#define PyObject_CheckBuffer(obj) \ + (((obj)->ob_type->tp_as_buffer != NULL) && \ + (PyType_HasFeature((obj)->ob_type, Py_TPFLAGS_HAVE_NEWBUFFER)) && \ + ((obj)->ob_type->tp_as_buffer->bf_getbuffer != NULL)) + + /* Return 1 if the getbuffer function is available, otherwise + return 0 */ + + PyAPI_FUNC(int) PyObject_GetBuffer(PyObject *obj, Py_buffer *view, + int flags); + + /* This is a C-API version of the getbuffer function call. It checks + to make sure object has the required function pointer and issues the + call. Returns -1 and raises an error on failure and returns 0 on + success + */ + + + PyAPI_FUNC(void *) PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices); + + /* Get the memory area pointed to by the indices for the buffer given. + Note that view->ndim is the assumed size of indices + */ + + PyAPI_FUNC(int) PyBuffer_SizeFromFormat(const char *); + + /* Return the implied itemsize of the data-format area from a + struct-style description */ + + + + PyAPI_FUNC(int) PyBuffer_ToContiguous(void *buf, Py_buffer *view, + Py_ssize_t len, char fort); + + PyAPI_FUNC(int) PyBuffer_FromContiguous(Py_buffer *view, void *buf, + Py_ssize_t len, char fort); + + + /* Copy len bytes of data from the contiguous chunk of memory + pointed to by buf into the buffer exported by obj. Return + 0 on success and return -1 and raise a PyBuffer_Error on + error (i.e. the object does not have a buffer interface or + it is not working). + + If fort is 'F' and the object is multi-dimensional, + then the data will be copied into the array in + Fortran-style (first dimension varies the fastest). If + fort is 'C', then the data will be copied into the array + in C-style (last dimension varies the fastest). If fort + is 'A', then it does not matter and the copy will be made + in whatever way is more efficient. + + */ + + PyAPI_FUNC(int) PyObject_CopyData(PyObject *dest, PyObject *src); + + /* Copy the data from the src buffer to the buffer of destination + */ + + PyAPI_FUNC(int) PyBuffer_IsContiguous(Py_buffer *view, char fort); + + + PyAPI_FUNC(void) PyBuffer_FillContiguousStrides(int ndims, + Py_ssize_t *shape, + Py_ssize_t *strides, + int itemsize, + char fort); + + /* Fill the strides array with byte-strides of a contiguous + (Fortran-style if fort is 'F' or C-style otherwise) + array of the given shape with the given number of bytes + per element. + */ + + PyAPI_FUNC(int) PyBuffer_FillInfo(Py_buffer *view, PyObject *o, void *buf, + Py_ssize_t len, int readonly, + int flags); + + /* Fills in a buffer-info structure correctly for an exporter + that can only share a contiguous chunk of memory of + "unsigned bytes" of the given length. Returns 0 on success + and -1 (with raising an error) on error. + */ + + PyAPI_FUNC(void) PyBuffer_Release(Py_buffer *view); + + /* Releases a Py_buffer obtained from getbuffer ParseTuple's s*. + */ + + PyAPI_FUNC(PyObject *) PyObject_Format(PyObject* obj, + PyObject *format_spec); + /* + Takes an arbitrary object and returns the result of + calling obj.__format__(format_spec). + */ + +/* Iterators */ + + PyAPI_FUNC(PyObject *) PyObject_GetIter(PyObject *); + /* Takes an object and returns an iterator for it. + This is typically a new iterator but if the argument + is an iterator, this returns itself. */ + +#define PyIter_Check(obj) \ + (PyType_HasFeature((obj)->ob_type, Py_TPFLAGS_HAVE_ITER) && \ + (obj)->ob_type->tp_iternext != NULL && \ + (obj)->ob_type->tp_iternext != &_PyObject_NextNotImplemented) + + PyAPI_FUNC(PyObject *) PyIter_Next(PyObject *); + /* Takes an iterator object and calls its tp_iternext slot, + returning the next value. If the iterator is exhausted, + this returns NULL without setting an exception. + NULL with an exception means an error occurred. */ + +/* Number Protocol:*/ + + PyAPI_FUNC(int) PyNumber_Check(PyObject *o); + + /* + Returns 1 if the object, o, provides numeric protocols, and + false otherwise. + + This function always succeeds. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Add(PyObject *o1, PyObject *o2); + + /* + Returns the result of adding o1 and o2, or null on failure. + This is the equivalent of the Python expression: o1+o2. + + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Subtract(PyObject *o1, PyObject *o2); + + /* + Returns the result of subtracting o2 from o1, or null on + failure. This is the equivalent of the Python expression: + o1-o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Multiply(PyObject *o1, PyObject *o2); + + /* + Returns the result of multiplying o1 and o2, or null on + failure. This is the equivalent of the Python expression: + o1*o2. + + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Divide(PyObject *o1, PyObject *o2); + + /* + Returns the result of dividing o1 by o2, or null on failure. + This is the equivalent of the Python expression: o1/o2. + + + */ + + PyAPI_FUNC(PyObject *) PyNumber_FloorDivide(PyObject *o1, PyObject *o2); + + /* + Returns the result of dividing o1 by o2 giving an integral result, + or null on failure. + This is the equivalent of the Python expression: o1//o2. + + + */ + + PyAPI_FUNC(PyObject *) PyNumber_TrueDivide(PyObject *o1, PyObject *o2); + + /* + Returns the result of dividing o1 by o2 giving a float result, + or null on failure. + This is the equivalent of the Python expression: o1/o2. + + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Remainder(PyObject *o1, PyObject *o2); + + /* + Returns the remainder of dividing o1 by o2, or null on + failure. This is the equivalent of the Python expression: + o1%o2. + + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Divmod(PyObject *o1, PyObject *o2); + + /* + See the built-in function divmod. Returns NULL on failure. + This is the equivalent of the Python expression: + divmod(o1,o2). + + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Power(PyObject *o1, PyObject *o2, + PyObject *o3); + + /* + See the built-in function pow. Returns NULL on failure. + This is the equivalent of the Python expression: + pow(o1,o2,o3), where o3 is optional. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Negative(PyObject *o); + + /* + Returns the negation of o on success, or null on failure. + This is the equivalent of the Python expression: -o. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Positive(PyObject *o); + + /* + Returns the (what?) of o on success, or NULL on failure. + This is the equivalent of the Python expression: +o. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Absolute(PyObject *o); + + /* + Returns the absolute value of o, or null on failure. This is + the equivalent of the Python expression: abs(o). + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Invert(PyObject *o); + + /* + Returns the bitwise negation of o on success, or NULL on + failure. This is the equivalent of the Python expression: + ~o. + + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Lshift(PyObject *o1, PyObject *o2); + + /* + Returns the result of left shifting o1 by o2 on success, or + NULL on failure. This is the equivalent of the Python + expression: o1 << o2. + + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Rshift(PyObject *o1, PyObject *o2); + + /* + Returns the result of right shifting o1 by o2 on success, or + NULL on failure. This is the equivalent of the Python + expression: o1 >> o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_And(PyObject *o1, PyObject *o2); + + /* + Returns the result of bitwise and of o1 and o2 on success, or + NULL on failure. This is the equivalent of the Python + expression: o1&o2. + + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Xor(PyObject *o1, PyObject *o2); + + /* + Returns the bitwise exclusive or of o1 by o2 on success, or + NULL on failure. This is the equivalent of the Python + expression: o1^o2. + + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Or(PyObject *o1, PyObject *o2); + + /* + Returns the result of bitwise or on o1 and o2 on success, or + NULL on failure. This is the equivalent of the Python + expression: o1|o2. + + */ + + /* Implemented elsewhere: + + int PyNumber_Coerce(PyObject **p1, PyObject **p2); + + This function takes the addresses of two variables of type + PyObject*. + + If the objects pointed to by *p1 and *p2 have the same type, + increment their reference count and return 0 (success). + If the objects can be converted to a common numeric type, + replace *p1 and *p2 by their converted value (with 'new' + reference counts), and return 0. + If no conversion is possible, or if some other error occurs, + return -1 (failure) and don't increment the reference counts. + The call PyNumber_Coerce(&o1, &o2) is equivalent to the Python + statement o1, o2 = coerce(o1, o2). + + */ + +#define PyIndex_Check(obj) \ + ((obj)->ob_type->tp_as_number != NULL && \ + PyType_HasFeature((obj)->ob_type, Py_TPFLAGS_HAVE_INDEX) && \ + (obj)->ob_type->tp_as_number->nb_index != NULL) + + PyAPI_FUNC(PyObject *) PyNumber_Index(PyObject *o); + + /* + Returns the object converted to a Python long or int + or NULL with an error raised on failure. + */ + + PyAPI_FUNC(Py_ssize_t) PyNumber_AsSsize_t(PyObject *o, PyObject *exc); + + /* + Returns the Integral instance converted to an int. The + instance is expected to be int or long or have an __int__ + method. Steals integral's reference. error_format will be + used to create the TypeError if integral isn't actually an + Integral instance. error_format should be a format string + that can accept a char* naming integral's type. + */ + + PyAPI_FUNC(PyObject *) _PyNumber_ConvertIntegralToInt( + PyObject *integral, + const char* error_format); + + /* + Returns the object converted to Py_ssize_t by going through + PyNumber_Index first. If an overflow error occurs while + converting the int-or-long to Py_ssize_t, then the second argument + is the error-type to return. If it is NULL, then the overflow error + is cleared and the value is clipped. + */ + + PyAPI_FUNC(PyObject *) PyNumber_Int(PyObject *o); + + /* + Returns the o converted to an integer object on success, or + NULL on failure. This is the equivalent of the Python + expression: int(o). + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Long(PyObject *o); + + /* + Returns the o converted to a long integer object on success, + or NULL on failure. This is the equivalent of the Python + expression: long(o). + + */ + + PyAPI_FUNC(PyObject *) PyNumber_Float(PyObject *o); + + /* + Returns the o converted to a float object on success, or NULL + on failure. This is the equivalent of the Python expression: + float(o). + */ + +/* In-place variants of (some of) the above number protocol functions */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceAdd(PyObject *o1, PyObject *o2); + + /* + Returns the result of adding o2 to o1, possibly in-place, or null + on failure. This is the equivalent of the Python expression: + o1 += o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceSubtract(PyObject *o1, PyObject *o2); + + /* + Returns the result of subtracting o2 from o1, possibly in-place or + null on failure. This is the equivalent of the Python expression: + o1 -= o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceMultiply(PyObject *o1, PyObject *o2); + + /* + Returns the result of multiplying o1 by o2, possibly in-place, or + null on failure. This is the equivalent of the Python expression: + o1 *= o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceDivide(PyObject *o1, PyObject *o2); + + /* + Returns the result of dividing o1 by o2, possibly in-place, or null + on failure. This is the equivalent of the Python expression: + o1 /= o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceFloorDivide(PyObject *o1, + PyObject *o2); + + /* + Returns the result of dividing o1 by o2 giving an integral result, + possibly in-place, or null on failure. + This is the equivalent of the Python expression: + o1 /= o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceTrueDivide(PyObject *o1, + PyObject *o2); + + /* + Returns the result of dividing o1 by o2 giving a float result, + possibly in-place, or null on failure. + This is the equivalent of the Python expression: + o1 /= o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceRemainder(PyObject *o1, PyObject *o2); + + /* + Returns the remainder of dividing o1 by o2, possibly in-place, or + null on failure. This is the equivalent of the Python expression: + o1 %= o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlacePower(PyObject *o1, PyObject *o2, + PyObject *o3); + + /* + Returns the result of raising o1 to the power of o2, possibly + in-place, or null on failure. This is the equivalent of the Python + expression: o1 **= o2, or pow(o1, o2, o3) if o3 is present. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceLshift(PyObject *o1, PyObject *o2); + + /* + Returns the result of left shifting o1 by o2, possibly in-place, or + null on failure. This is the equivalent of the Python expression: + o1 <<= o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceRshift(PyObject *o1, PyObject *o2); + + /* + Returns the result of right shifting o1 by o2, possibly in-place or + null on failure. This is the equivalent of the Python expression: + o1 >>= o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceAnd(PyObject *o1, PyObject *o2); + + /* + Returns the result of bitwise and of o1 and o2, possibly in-place, + or null on failure. This is the equivalent of the Python + expression: o1 &= o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceXor(PyObject *o1, PyObject *o2); + + /* + Returns the bitwise exclusive or of o1 by o2, possibly in-place, or + null on failure. This is the equivalent of the Python expression: + o1 ^= o2. + + */ + + PyAPI_FUNC(PyObject *) PyNumber_InPlaceOr(PyObject *o1, PyObject *o2); + + /* + Returns the result of bitwise or of o1 and o2, possibly in-place, + or null on failure. This is the equivalent of the Python + expression: o1 |= o2. + + */ + + + PyAPI_FUNC(PyObject *) PyNumber_ToBase(PyObject *n, int base); + + /* + Returns the integer n converted to a string with a base, with a base + marker of 0b, 0o or 0x prefixed if applicable. + If n is not an int object, it is converted with PyNumber_Index first. + */ + + +/* Sequence protocol:*/ + + PyAPI_FUNC(int) PySequence_Check(PyObject *o); + + /* + Return 1 if the object provides sequence protocol, and zero + otherwise. + + This function always succeeds. + + */ + + PyAPI_FUNC(Py_ssize_t) PySequence_Size(PyObject *o); + + /* + Return the size of sequence object o, or -1 on failure. + + */ + + /* For DLL compatibility */ +#undef PySequence_Length + PyAPI_FUNC(Py_ssize_t) PySequence_Length(PyObject *o); +#define PySequence_Length PySequence_Size + + + PyAPI_FUNC(PyObject *) PySequence_Concat(PyObject *o1, PyObject *o2); + + /* + Return the concatenation of o1 and o2 on success, and NULL on + failure. This is the equivalent of the Python + expression: o1+o2. + + */ + + PyAPI_FUNC(PyObject *) PySequence_Repeat(PyObject *o, Py_ssize_t count); + + /* + Return the result of repeating sequence object o count times, + or NULL on failure. This is the equivalent of the Python + expression: o1*count. + + */ + + PyAPI_FUNC(PyObject *) PySequence_GetItem(PyObject *o, Py_ssize_t i); + + /* + Return the ith element of o, or NULL on failure. This is the + equivalent of the Python expression: o[i]. + */ + + PyAPI_FUNC(PyObject *) PySequence_GetSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2); + + /* + Return the slice of sequence object o between i1 and i2, or + NULL on failure. This is the equivalent of the Python + expression: o[i1:i2]. + + */ + + PyAPI_FUNC(int) PySequence_SetItem(PyObject *o, Py_ssize_t i, PyObject *v); + + /* + Assign object v to the ith element of o. Returns + -1 on failure. This is the equivalent of the Python + statement: o[i]=v. + + */ + + PyAPI_FUNC(int) PySequence_DelItem(PyObject *o, Py_ssize_t i); + + /* + Delete the ith element of object v. Returns + -1 on failure. This is the equivalent of the Python + statement: del o[i]. + */ + + PyAPI_FUNC(int) PySequence_SetSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2, + PyObject *v); + + /* + Assign the sequence object, v, to the slice in sequence + object, o, from i1 to i2. Returns -1 on failure. This is the + equivalent of the Python statement: o[i1:i2]=v. + */ + + PyAPI_FUNC(int) PySequence_DelSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2); + + /* + Delete the slice in sequence object, o, from i1 to i2. + Returns -1 on failure. This is the equivalent of the Python + statement: del o[i1:i2]. + */ + + PyAPI_FUNC(PyObject *) PySequence_Tuple(PyObject *o); + + /* + Returns the sequence, o, as a tuple on success, and NULL on failure. + This is equivalent to the Python expression: tuple(o) + */ + + + PyAPI_FUNC(PyObject *) PySequence_List(PyObject *o); + /* + Returns the sequence, o, as a list on success, and NULL on failure. + This is equivalent to the Python expression: list(o) + */ + + PyAPI_FUNC(PyObject *) PySequence_Fast(PyObject *o, const char* m); + /* + Return the sequence, o, as a list, unless it's already a + tuple or list. Use PySequence_Fast_GET_ITEM to access the + members of this list, and PySequence_Fast_GET_SIZE to get its length. + + Returns NULL on failure. If the object does not support iteration, + raises a TypeError exception with m as the message text. + */ + +#define PySequence_Fast_GET_SIZE(o) \ + (PyList_Check(o) ? PyList_GET_SIZE(o) : PyTuple_GET_SIZE(o)) + /* + Return the size of o, assuming that o was returned by + PySequence_Fast and is not NULL. + */ + +#define PySequence_Fast_GET_ITEM(o, i)\ + (PyList_Check(o) ? PyList_GET_ITEM(o, i) : PyTuple_GET_ITEM(o, i)) + /* + Return the ith element of o, assuming that o was returned by + PySequence_Fast, and that i is within bounds. + */ + +#define PySequence_ITEM(o, i)\ + ( Py_TYPE(o)->tp_as_sequence->sq_item(o, i) ) + /* Assume tp_as_sequence and sq_item exist and that i does not + need to be corrected for a negative index + */ + +#define PySequence_Fast_ITEMS(sf) \ + (PyList_Check(sf) ? ((PyListObject *)(sf))->ob_item \ + : ((PyTupleObject *)(sf))->ob_item) + /* Return a pointer to the underlying item array for + an object retured by PySequence_Fast */ + + PyAPI_FUNC(Py_ssize_t) PySequence_Count(PyObject *o, PyObject *value); + + /* + Return the number of occurrences on value on o, that is, + return the number of keys for which o[key]==value. On + failure, return -1. This is equivalent to the Python + expression: o.count(value). + */ + + PyAPI_FUNC(int) PySequence_Contains(PyObject *seq, PyObject *ob); + /* + Return -1 if error; 1 if ob in seq; 0 if ob not in seq. + Use __contains__ if possible, else _PySequence_IterSearch(). + */ + +#define PY_ITERSEARCH_COUNT 1 +#define PY_ITERSEARCH_INDEX 2 +#define PY_ITERSEARCH_CONTAINS 3 + PyAPI_FUNC(Py_ssize_t) _PySequence_IterSearch(PyObject *seq, + PyObject *obj, int operation); + /* + Iterate over seq. Result depends on the operation: + PY_ITERSEARCH_COUNT: return # of times obj appears in seq; -1 if + error. + PY_ITERSEARCH_INDEX: return 0-based index of first occurrence of + obj in seq; set ValueError and return -1 if none found; + also return -1 on error. + PY_ITERSEARCH_CONTAINS: return 1 if obj in seq, else 0; -1 on + error. + */ + +/* For DLL-level backwards compatibility */ +#undef PySequence_In + PyAPI_FUNC(int) PySequence_In(PyObject *o, PyObject *value); + +/* For source-level backwards compatibility */ +#define PySequence_In PySequence_Contains + + /* + Determine if o contains value. If an item in o is equal to + X, return 1, otherwise return 0. On error, return -1. This + is equivalent to the Python expression: value in o. + */ + + PyAPI_FUNC(Py_ssize_t) PySequence_Index(PyObject *o, PyObject *value); + + /* + Return the first index for which o[i]=value. On error, + return -1. This is equivalent to the Python + expression: o.index(value). + */ + +/* In-place versions of some of the above Sequence functions. */ + + PyAPI_FUNC(PyObject *) PySequence_InPlaceConcat(PyObject *o1, PyObject *o2); + + /* + Append o2 to o1, in-place when possible. Return the resulting + object, which could be o1, or NULL on failure. This is the + equivalent of the Python expression: o1 += o2. + + */ + + PyAPI_FUNC(PyObject *) PySequence_InPlaceRepeat(PyObject *o, Py_ssize_t count); + + /* + Repeat o1 by count, in-place when possible. Return the resulting + object, which could be o1, or NULL on failure. This is the + equivalent of the Python expression: o1 *= count. + + */ + +/* Mapping protocol:*/ + + PyAPI_FUNC(int) PyMapping_Check(PyObject *o); + + /* + Return 1 if the object provides mapping protocol, and zero + otherwise. + + This function always succeeds. + */ + + PyAPI_FUNC(Py_ssize_t) PyMapping_Size(PyObject *o); + + /* + Returns the number of keys in object o on success, and -1 on + failure. For objects that do not provide sequence protocol, + this is equivalent to the Python expression: len(o). + */ + + /* For DLL compatibility */ +#undef PyMapping_Length + PyAPI_FUNC(Py_ssize_t) PyMapping_Length(PyObject *o); +#define PyMapping_Length PyMapping_Size + + + /* implemented as a macro: + + int PyMapping_DelItemString(PyObject *o, char *key); + + Remove the mapping for object, key, from the object *o. + Returns -1 on failure. This is equivalent to + the Python statement: del o[key]. + */ +#define PyMapping_DelItemString(O,K) PyObject_DelItemString((O),(K)) + + /* implemented as a macro: + + int PyMapping_DelItem(PyObject *o, PyObject *key); + + Remove the mapping for object, key, from the object *o. + Returns -1 on failure. This is equivalent to + the Python statement: del o[key]. + */ +#define PyMapping_DelItem(O,K) PyObject_DelItem((O),(K)) + + PyAPI_FUNC(int) PyMapping_HasKeyString(PyObject *o, char *key); + + /* + On success, return 1 if the mapping object has the key, key, + and 0 otherwise. This is equivalent to the Python expression: + o.has_key(key). + + This function always succeeds. + */ + + PyAPI_FUNC(int) PyMapping_HasKey(PyObject *o, PyObject *key); + + /* + Return 1 if the mapping object has the key, key, + and 0 otherwise. This is equivalent to the Python expression: + o.has_key(key). + + This function always succeeds. + + */ + + /* Implemented as macro: + + PyObject *PyMapping_Keys(PyObject *o); + + On success, return a list of the keys in object o. On + failure, return NULL. This is equivalent to the Python + expression: o.keys(). + */ +#define PyMapping_Keys(O) PyObject_CallMethod(O,"keys",NULL) + + /* Implemented as macro: + + PyObject *PyMapping_Values(PyObject *o); + + On success, return a list of the values in object o. On + failure, return NULL. This is equivalent to the Python + expression: o.values(). + */ +#define PyMapping_Values(O) PyObject_CallMethod(O,"values",NULL) + + /* Implemented as macro: + + PyObject *PyMapping_Items(PyObject *o); + + On success, return a list of the items in object o, where + each item is a tuple containing a key-value pair. On + failure, return NULL. This is equivalent to the Python + expression: o.items(). + + */ +#define PyMapping_Items(O) PyObject_CallMethod(O,"items",NULL) + + PyAPI_FUNC(PyObject *) PyMapping_GetItemString(PyObject *o, char *key); + + /* + Return element of o corresponding to the object, key, or NULL + on failure. This is the equivalent of the Python expression: + o[key]. + */ + + PyAPI_FUNC(int) PyMapping_SetItemString(PyObject *o, char *key, + PyObject *value); + + /* + Map the object, key, to the value, v. Returns + -1 on failure. This is the equivalent of the Python + statement: o[key]=v. + */ + + +PyAPI_FUNC(int) PyObject_IsInstance(PyObject *object, PyObject *typeorclass); + /* isinstance(object, typeorclass) */ + +PyAPI_FUNC(int) PyObject_IsSubclass(PyObject *object, PyObject *typeorclass); + /* issubclass(object, typeorclass) */ + + +PyAPI_FUNC(int) _PyObject_RealIsInstance(PyObject *inst, PyObject *cls); + +PyAPI_FUNC(int) _PyObject_RealIsSubclass(PyObject *derived, PyObject *cls); + + +/* For internal use by buffer API functions */ +PyAPI_FUNC(void) _Py_add_one_to_index_F(int nd, Py_ssize_t *index, + const Py_ssize_t *shape); +PyAPI_FUNC(void) _Py_add_one_to_index_C(int nd, Py_ssize_t *index, + const Py_ssize_t *shape); + + +#ifdef __cplusplus +} +#endif +#endif /* Py_ABSTRACTOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/asdl.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,45 @@ +#ifndef Py_ASDL_H +#define Py_ASDL_H + +typedef PyObject * identifier; +typedef PyObject * string; +typedef PyObject * object; + +#ifndef __cplusplus +typedef enum {false, true} bool; +#endif + +/* It would be nice if the code generated by asdl_c.py was completely + independent of Python, but it is a goal the requires too much work + at this stage. So, for example, I'll represent identifiers as + interned Python strings. +*/ + +/* XXX A sequence should be typed so that its use can be typechecked. */ + +typedef struct { + int size; + void *elements[1]; +} asdl_seq; + +typedef struct { + int size; + int elements[1]; +} asdl_int_seq; + +asdl_seq *asdl_seq_new(int size, PyArena *arena); +asdl_int_seq *asdl_int_seq_new(int size, PyArena *arena); + +#define asdl_seq_GET(S, I) (S)->elements[(I)] +#define asdl_seq_LEN(S) ((S) == NULL ? 0 : (S)->size) +#ifdef Py_DEBUG +#define asdl_seq_SET(S, I, V) { \ + int _asdl_i = (I); \ + assert((S) && _asdl_i < (S)->size); \ + (S)->elements[_asdl_i] = (V); \ +} +#else +#define asdl_seq_SET(S, I, V) (S)->elements[I] = (V) +#endif + +#endif /* !Py_ASDL_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/ast.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,13 @@ +#ifndef Py_AST_H +#define Py_AST_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(mod_ty) PyAST_FromNode(const node *, PyCompilerFlags *flags, + const char *, PyArena *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_AST_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/bitset.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,32 @@ + +#ifndef Py_BITSET_H +#define Py_BITSET_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Bitset interface */ + +#define BYTE char + +typedef BYTE *bitset; + +bitset newbitset(int nbits); +void delbitset(bitset bs); +#define testbit(ss, ibit) (((ss)[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0) +int addbit(bitset bs, int ibit); /* Returns 0 if already set */ +int samebitset(bitset bs1, bitset bs2, int nbits); +void mergebitset(bitset bs1, bitset bs2, int nbits); + +#define BITSPERBYTE (8*sizeof(BYTE)) +#define NBYTES(nbits) (((nbits) + BITSPERBYTE - 1) / BITSPERBYTE) + +#define BIT2BYTE(ibit) ((ibit) / BITSPERBYTE) +#define BIT2SHIFT(ibit) ((ibit) % BITSPERBYTE) +#define BIT2MASK(ibit) (1 << BIT2SHIFT(ibit)) +#define BYTE2BIT(ibyte) ((ibyte) * BITSPERBYTE) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_BITSET_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/boolobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,36 @@ +/* Boolean object interface */ + +#ifndef Py_BOOLOBJECT_H +#define Py_BOOLOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +typedef PyIntObject PyBoolObject; + +PyAPI_DATA(PyTypeObject) PyBool_Type; + +#define PyBool_Check(x) (Py_TYPE(x) == &PyBool_Type) + +/* Py_False and Py_True are the only two bools in existence. +Don't forget to apply Py_INCREF() when returning either!!! */ + +/* Don't use these directly */ +PyAPI_DATA(PyIntObject) _Py_ZeroStruct, _Py_TrueStruct; + +/* Use these macros */ +#define Py_False ((PyObject *) &_Py_ZeroStruct) +#define Py_True ((PyObject *) &_Py_TrueStruct) + +/* Macros for returning Py_True or Py_False, respectively */ +#define Py_RETURN_TRUE return Py_INCREF(Py_True), Py_True +#define Py_RETURN_FALSE return Py_INCREF(Py_False), Py_False + +/* Function to return a bool from a C long */ +PyAPI_FUNC(PyObject *) PyBool_FromLong(long); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_BOOLOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/bufferobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,33 @@ + +/* Buffer object interface */ + +/* Note: the object's structure is private */ + +#ifndef Py_BUFFEROBJECT_H +#define Py_BUFFEROBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +PyAPI_DATA(PyTypeObject) PyBuffer_Type; + +#define PyBuffer_Check(op) (Py_TYPE(op) == &PyBuffer_Type) + +#define Py_END_OF_BUFFER (-1) + +PyAPI_FUNC(PyObject *) PyBuffer_FromObject(PyObject *base, + Py_ssize_t offset, Py_ssize_t size); +PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteObject(PyObject *base, + Py_ssize_t offset, + Py_ssize_t size); + +PyAPI_FUNC(PyObject *) PyBuffer_FromMemory(void *ptr, Py_ssize_t size); +PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteMemory(void *ptr, Py_ssize_t size); + +PyAPI_FUNC(PyObject *) PyBuffer_New(Py_ssize_t size); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_BUFFEROBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/bytearrayobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,57 @@ +/* ByteArray object interface */ + +#ifndef Py_BYTEARRAYOBJECT_H +#define Py_BYTEARRAYOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdarg.h> + +/* Type PyByteArrayObject represents a mutable array of bytes. + * The Python API is that of a sequence; + * the bytes are mapped to ints in [0, 256). + * Bytes are not characters; they may be used to encode characters. + * The only way to go between bytes and str/unicode is via encoding + * and decoding. + * For the convenience of C programmers, the bytes type is considered + * to contain a char pointer, not an unsigned char pointer. + */ + +/* Object layout */ +typedef struct { + PyObject_VAR_HEAD + /* XXX(nnorwitz): should ob_exports be Py_ssize_t? */ + int ob_exports; /* how many buffer exports */ + Py_ssize_t ob_alloc; /* How many bytes allocated */ + char *ob_bytes; +} PyByteArrayObject; + +/* Type object */ +PyAPI_DATA(PyTypeObject) PyByteArray_Type; +PyAPI_DATA(PyTypeObject) PyByteArrayIter_Type; + +/* Type check macros */ +#define PyByteArray_Check(self) PyObject_TypeCheck(self, &PyByteArray_Type) +#define PyByteArray_CheckExact(self) (Py_TYPE(self) == &PyByteArray_Type) + +/* Direct API functions */ +PyAPI_FUNC(PyObject *) PyByteArray_FromObject(PyObject *); +PyAPI_FUNC(PyObject *) PyByteArray_Concat(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyByteArray_FromStringAndSize(const char *, Py_ssize_t); +PyAPI_FUNC(Py_ssize_t) PyByteArray_Size(PyObject *); +PyAPI_FUNC(char *) PyByteArray_AsString(PyObject *); +PyAPI_FUNC(int) PyByteArray_Resize(PyObject *, Py_ssize_t); + +/* Macros, trading safety for speed */ +#define PyByteArray_AS_STRING(self) \ + (assert(PyByteArray_Check(self)), \ + Py_SIZE(self) ? ((PyByteArrayObject *)(self))->ob_bytes : _PyByteArray_empty_string) +#define PyByteArray_GET_SIZE(self) (assert(PyByteArray_Check(self)),Py_SIZE(self)) + +PyAPI_DATA(char) _PyByteArray_empty_string[]; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_BYTEARRAYOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/bytes_methods.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,75 @@ +#ifndef Py_BYTES_CTYPE_H +#define Py_BYTES_CTYPE_H + +/* + * The internal implementation behind PyString (bytes) and PyBytes (buffer) + * methods of the given names, they operate on ASCII byte strings. + */ +extern PyObject* _Py_bytes_isspace(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isalpha(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isalnum(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isdigit(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_islower(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isupper(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_istitle(const char *cptr, Py_ssize_t len); + +/* These store their len sized answer in the given preallocated *result arg. */ +extern void _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len); +extern void _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len); +extern void _Py_bytes_title(char *result, char *s, Py_ssize_t len); +extern void _Py_bytes_capitalize(char *result, char *s, Py_ssize_t len); +extern void _Py_bytes_swapcase(char *result, char *s, Py_ssize_t len); + +/* Shared __doc__ strings. */ +extern const char _Py_isspace__doc__[]; +extern const char _Py_isalpha__doc__[]; +extern const char _Py_isalnum__doc__[]; +extern const char _Py_isdigit__doc__[]; +extern const char _Py_islower__doc__[]; +extern const char _Py_isupper__doc__[]; +extern const char _Py_istitle__doc__[]; +extern const char _Py_lower__doc__[]; +extern const char _Py_upper__doc__[]; +extern const char _Py_title__doc__[]; +extern const char _Py_capitalize__doc__[]; +extern const char _Py_swapcase__doc__[]; + +/* These are left in for backward compatibility and will be removed + in 2.8/3.2 */ +#define ISLOWER(c) Py_ISLOWER(c) +#define ISUPPER(c) Py_ISUPPER(c) +#define ISALPHA(c) Py_ISALPHA(c) +#define ISDIGIT(c) Py_ISDIGIT(c) +#define ISXDIGIT(c) Py_ISXDIGIT(c) +#define ISALNUM(c) Py_ISALNUM(c) +#define ISSPACE(c) Py_ISSPACE(c) + +#undef islower +#define islower(c) undefined_islower(c) +#undef isupper +#define isupper(c) undefined_isupper(c) +#undef isalpha +#define isalpha(c) undefined_isalpha(c) +#undef isdigit +#define isdigit(c) undefined_isdigit(c) +#undef isxdigit +#define isxdigit(c) undefined_isxdigit(c) +#undef isalnum +#define isalnum(c) undefined_isalnum(c) +#undef isspace +#define isspace(c) undefined_isspace(c) + +/* These are left in for backward compatibility and will be removed + in 2.8/3.2 */ +#define TOLOWER(c) Py_TOLOWER(c) +#define TOUPPER(c) Py_TOUPPER(c) + +#undef tolower +#define tolower(c) undefined_tolower(c) +#undef toupper +#define toupper(c) undefined_toupper(c) + +/* this is needed because some docs are shared from the .o, not static */ +#define PyDoc_STRVAR_shared(name,str) const char name[] = PyDoc_STR(str) + +#endif /* !Py_BYTES_CTYPE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/bytesobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,27 @@ +#define PyBytesObject PyStringObject +#define PyBytes_Type PyString_Type + +#define PyBytes_Check PyString_Check +#define PyBytes_CheckExact PyString_CheckExact +#define PyBytes_CHECK_INTERNED PyString_CHECK_INTERNED +#define PyBytes_AS_STRING PyString_AS_STRING +#define PyBytes_GET_SIZE PyString_GET_SIZE +#define Py_TPFLAGS_BYTES_SUBCLASS Py_TPFLAGS_STRING_SUBCLASS + +#define PyBytes_FromStringAndSize PyString_FromStringAndSize +#define PyBytes_FromString PyString_FromString +#define PyBytes_FromFormatV PyString_FromFormatV +#define PyBytes_FromFormat PyString_FromFormat +#define PyBytes_Size PyString_Size +#define PyBytes_AsString PyString_AsString +#define PyBytes_Repr PyString_Repr +#define PyBytes_Concat PyString_Concat +#define PyBytes_ConcatAndDel PyString_ConcatAndDel +#define _PyBytes_Resize _PyString_Resize +#define _PyBytes_Eq _PyString_Eq +#define PyBytes_Format PyString_Format +#define _PyBytes_FormatLong _PyString_FormatLong +#define PyBytes_DecodeEscape PyString_DecodeEscape +#define _PyBytes_Join _PyString_Join +#define PyBytes_AsStringAndSize PyString_AsStringAndSize +#define _PyBytes_InsertThousandsGrouping _PyString_InsertThousandsGrouping
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/cStringIO.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,73 @@ +#ifndef Py_CSTRINGIO_H +#define Py_CSTRINGIO_H +#ifdef __cplusplus +extern "C" { +#endif +/* + + This header provides access to cStringIO objects from C. + Functions are provided for calling cStringIO objects and + macros are provided for testing whether you have cStringIO + objects. + + Before calling any of the functions or macros, you must initialize + the routines with: + + PycString_IMPORT + + This would typically be done in your init function. + +*/ + +#define PycStringIO_CAPSULE_NAME "cStringIO.cStringIO_CAPI" + +#define PycString_IMPORT \ + PycStringIO = ((struct PycStringIO_CAPI*)PyCapsule_Import(\ + PycStringIO_CAPSULE_NAME, 0)) + +/* Basic functions to manipulate cStringIO objects from C */ + +static struct PycStringIO_CAPI { + + /* Read a string from an input object. If the last argument + is -1, the remainder will be read. + */ + int(*cread)(PyObject *, char **, Py_ssize_t); + + /* Read a line from an input object. Returns the length of the read + line as an int and a pointer inside the object buffer as char** (so + the caller doesn't have to provide its own buffer as destination). + */ + int(*creadline)(PyObject *, char **); + + /* Write a string to an output object*/ + int(*cwrite)(PyObject *, const char *, Py_ssize_t); + + /* Get the output object as a Python string (returns new reference). */ + PyObject *(*cgetvalue)(PyObject *); + + /* Create a new output object */ + PyObject *(*NewOutput)(int); + + /* Create an input object from a Python string + (copies the Python string reference). + */ + PyObject *(*NewInput)(PyObject *); + + /* The Python types for cStringIO input and output objects. + Note that you can do input on an output object. + */ + PyTypeObject *InputType, *OutputType; + +} *PycStringIO; + +/* These can be used to test if you have one */ +#define PycStringIO_InputCheck(O) \ + (Py_TYPE(O)==PycStringIO->InputType) +#define PycStringIO_OutputCheck(O) \ + (Py_TYPE(O)==PycStringIO->OutputType) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CSTRINGIO_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/cellobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,28 @@ +/* Cell object interface */ + +#ifndef Py_CELLOBJECT_H +#define Py_CELLOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + PyObject_HEAD + PyObject *ob_ref; /* Content of the cell or NULL when empty */ +} PyCellObject; + +PyAPI_DATA(PyTypeObject) PyCell_Type; + +#define PyCell_Check(op) (Py_TYPE(op) == &PyCell_Type) + +PyAPI_FUNC(PyObject *) PyCell_New(PyObject *); +PyAPI_FUNC(PyObject *) PyCell_Get(PyObject *); +PyAPI_FUNC(int) PyCell_Set(PyObject *, PyObject *); + +#define PyCell_GET(op) (((PyCellObject *)(op))->ob_ref) +#define PyCell_SET(op, v) (((PyCellObject *)(op))->ob_ref = v) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TUPLEOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/ceval.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,153 @@ +#ifndef Py_CEVAL_H +#define Py_CEVAL_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Interface to random parts in ceval.c */ + +PyAPI_FUNC(PyObject *) PyEval_CallObjectWithKeywords( + PyObject *, PyObject *, PyObject *); + +/* Inline this */ +#define PyEval_CallObject(func,arg) \ + PyEval_CallObjectWithKeywords(func, arg, (PyObject *)NULL) + +PyAPI_FUNC(PyObject *) PyEval_CallFunction(PyObject *obj, + const char *format, ...); +PyAPI_FUNC(PyObject *) PyEval_CallMethod(PyObject *obj, + const char *methodname, + const char *format, ...); + +PyAPI_FUNC(void) PyEval_SetProfile(Py_tracefunc, PyObject *); +PyAPI_FUNC(void) PyEval_SetTrace(Py_tracefunc, PyObject *); + +struct _frame; /* Avoid including frameobject.h */ + +PyAPI_FUNC(PyObject *) PyEval_GetBuiltins(void); +PyAPI_FUNC(PyObject *) PyEval_GetGlobals(void); +PyAPI_FUNC(PyObject *) PyEval_GetLocals(void); +PyAPI_FUNC(struct _frame *) PyEval_GetFrame(void); +PyAPI_FUNC(int) PyEval_GetRestricted(void); + +/* Look at the current frame's (if any) code's co_flags, and turn on + the corresponding compiler flags in cf->cf_flags. Return 1 if any + flag was set, else return 0. */ +PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf); + +PyAPI_FUNC(int) Py_FlushLine(void); + +PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg); +PyAPI_FUNC(int) Py_MakePendingCalls(void); + +/* Protection against deeply nested recursive calls */ +PyAPI_FUNC(void) Py_SetRecursionLimit(int); +PyAPI_FUNC(int) Py_GetRecursionLimit(void); + +#define Py_EnterRecursiveCall(where) \ + (_Py_MakeRecCheck(PyThreadState_GET()->recursion_depth) && \ + _Py_CheckRecursiveCall(where)) +#define Py_LeaveRecursiveCall() \ + (--PyThreadState_GET()->recursion_depth) +PyAPI_FUNC(int) _Py_CheckRecursiveCall(const char *where); +PyAPI_DATA(int) _Py_CheckRecursionLimit; +#ifdef USE_STACKCHECK +# define _Py_MakeRecCheck(x) (++(x) > --_Py_CheckRecursionLimit) +#else +# define _Py_MakeRecCheck(x) (++(x) > _Py_CheckRecursionLimit) +#endif + +PyAPI_FUNC(const char *) PyEval_GetFuncName(PyObject *); +PyAPI_FUNC(const char *) PyEval_GetFuncDesc(PyObject *); + +PyAPI_FUNC(PyObject *) PyEval_GetCallStats(PyObject *); +PyAPI_FUNC(PyObject *) PyEval_EvalFrame(struct _frame *); +PyAPI_FUNC(PyObject *) PyEval_EvalFrameEx(struct _frame *f, int exc); + +/* this used to be handled on a per-thread basis - now just two globals */ +PyAPI_DATA(volatile int) _Py_Ticker; +PyAPI_DATA(int) _Py_CheckInterval; + +/* Interface for threads. + + A module that plans to do a blocking system call (or something else + that lasts a long time and doesn't touch Python data) can allow other + threads to run as follows: + + ...preparations here... + Py_BEGIN_ALLOW_THREADS + ...blocking system call here... + Py_END_ALLOW_THREADS + ...interpret result here... + + The Py_BEGIN_ALLOW_THREADS/Py_END_ALLOW_THREADS pair expands to a + {}-surrounded block. + To leave the block in the middle (e.g., with return), you must insert + a line containing Py_BLOCK_THREADS before the return, e.g. + + if (...premature_exit...) { + Py_BLOCK_THREADS + PyErr_SetFromErrno(PyExc_IOError); + return NULL; + } + + An alternative is: + + Py_BLOCK_THREADS + if (...premature_exit...) { + PyErr_SetFromErrno(PyExc_IOError); + return NULL; + } + Py_UNBLOCK_THREADS + + For convenience, that the value of 'errno' is restored across + Py_END_ALLOW_THREADS and Py_BLOCK_THREADS. + + WARNING: NEVER NEST CALLS TO Py_BEGIN_ALLOW_THREADS AND + Py_END_ALLOW_THREADS!!! + + The function PyEval_InitThreads() should be called only from + initthread() in "threadmodule.c". + + Note that not yet all candidates have been converted to use this + mechanism! +*/ + +PyAPI_FUNC(PyThreadState *) PyEval_SaveThread(void); +PyAPI_FUNC(void) PyEval_RestoreThread(PyThreadState *); + +#ifdef WITH_THREAD + +PyAPI_FUNC(int) PyEval_ThreadsInitialized(void); +PyAPI_FUNC(void) PyEval_InitThreads(void); +PyAPI_FUNC(void) PyEval_AcquireLock(void); +PyAPI_FUNC(void) PyEval_ReleaseLock(void); +PyAPI_FUNC(void) PyEval_AcquireThread(PyThreadState *tstate); +PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); +PyAPI_FUNC(void) PyEval_ReInitThreads(void); + +#define Py_BEGIN_ALLOW_THREADS { \ + PyThreadState *_save; \ + _save = PyEval_SaveThread(); +#define Py_BLOCK_THREADS PyEval_RestoreThread(_save); +#define Py_UNBLOCK_THREADS _save = PyEval_SaveThread(); +#define Py_END_ALLOW_THREADS PyEval_RestoreThread(_save); \ + } + +#else /* !WITH_THREAD */ + +#define Py_BEGIN_ALLOW_THREADS { +#define Py_BLOCK_THREADS +#define Py_UNBLOCK_THREADS +#define Py_END_ALLOW_THREADS } + +#endif /* !WITH_THREAD */ + +PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CEVAL_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/classobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,83 @@ + +/* Class object interface */ + +/* Revealing some structures (not for general use) */ + +#ifndef Py_CLASSOBJECT_H +#define Py_CLASSOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + PyObject_HEAD + PyObject *cl_bases; /* A tuple of class objects */ + PyObject *cl_dict; /* A dictionary */ + PyObject *cl_name; /* A string */ + /* The following three are functions or NULL */ + PyObject *cl_getattr; + PyObject *cl_setattr; + PyObject *cl_delattr; + PyObject *cl_weakreflist; /* List of weak references */ +} PyClassObject; + +typedef struct { + PyObject_HEAD + PyClassObject *in_class; /* The class object */ + PyObject *in_dict; /* A dictionary */ + PyObject *in_weakreflist; /* List of weak references */ +} PyInstanceObject; + +typedef struct { + PyObject_HEAD + PyObject *im_func; /* The callable object implementing the method */ + PyObject *im_self; /* The instance it is bound to, or NULL */ + PyObject *im_class; /* The class that asked for the method */ + PyObject *im_weakreflist; /* List of weak references */ +} PyMethodObject; + +PyAPI_DATA(PyTypeObject) PyClass_Type, PyInstance_Type, PyMethod_Type; + +#define PyClass_Check(op) ((op)->ob_type == &PyClass_Type) +#define PyInstance_Check(op) ((op)->ob_type == &PyInstance_Type) +#define PyMethod_Check(op) ((op)->ob_type == &PyMethod_Type) + +PyAPI_FUNC(PyObject *) PyClass_New(PyObject *, PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyInstance_New(PyObject *, PyObject *, + PyObject *); +PyAPI_FUNC(PyObject *) PyInstance_NewRaw(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyMethod_New(PyObject *, PyObject *, PyObject *); + +PyAPI_FUNC(PyObject *) PyMethod_Function(PyObject *); +PyAPI_FUNC(PyObject *) PyMethod_Self(PyObject *); +PyAPI_FUNC(PyObject *) PyMethod_Class(PyObject *); + +/* Look up attribute with name (a string) on instance object pinst, using + * only the instance and base class dicts. If a descriptor is found in + * a class dict, the descriptor is returned without calling it. + * Returns NULL if nothing found, else a borrowed reference to the + * value associated with name in the dict in which name was found. + * The point of this routine is that it never calls arbitrary Python + * code, so is always "safe": all it does is dict lookups. The function + * can't fail, never sets an exception, and NULL is not an error (it just + * means "not found"). + */ +PyAPI_FUNC(PyObject *) _PyInstance_Lookup(PyObject *pinst, PyObject *name); + +/* Macros for direct access to these values. Type checks are *not* + done, so use with care. */ +#define PyMethod_GET_FUNCTION(meth) \ + (((PyMethodObject *)meth) -> im_func) +#define PyMethod_GET_SELF(meth) \ + (((PyMethodObject *)meth) -> im_self) +#define PyMethod_GET_CLASS(meth) \ + (((PyMethodObject *)meth) -> im_class) + +PyAPI_FUNC(int) PyClass_IsSubclass(PyObject *, PyObject *); + +PyAPI_FUNC(int) PyMethod_ClearFreeList(void); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CLASSOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/cobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,89 @@ +/* + CObjects are marked Pending Deprecation as of Python 2.7. + The full schedule for 2.x is as follows: + - CObjects are marked Pending Deprecation in Python 2.7. + - CObjects will be marked Deprecated in Python 2.8 + (if there is one). + - CObjects will be removed in Python 2.9 (if there is one). + + Additionally, for the Python 3.x series: + - CObjects were marked Deprecated in Python 3.1. + - CObjects will be removed in Python 3.2. + + You should switch all use of CObjects to capsules. Capsules + have a safer and more consistent API. For more information, + see Include/pycapsule.h, or read the "Capsules" topic in + the "Python/C API Reference Manual". + + Python 2.7 no longer uses CObjects itself; all objects which + were formerly CObjects are now capsules. Note that this change + does not by itself break binary compatibility with extensions + built for previous versions of Python--PyCObject_AsVoidPtr() + has been changed to also understand capsules. + +*/ + +/* original file header comment follows: */ + +/* C objects to be exported from one extension module to another. + + C objects are used for communication between extension modules. + They provide a way for an extension module to export a C interface + to other extension modules, so that extension modules can use the + Python import mechanism to link to one another. + +*/ + +#ifndef Py_COBJECT_H +#define Py_COBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyCObject_Type; + +#define PyCObject_Check(op) (Py_TYPE(op) == &PyCObject_Type) + +/* Create a PyCObject from a pointer to a C object and an optional + destructor function. If the second argument is non-null, then it + will be called with the first argument if and when the PyCObject is + destroyed. + +*/ +PyAPI_FUNC(PyObject *) PyCObject_FromVoidPtr( + void *cobj, void (*destruct)(void*)); + + +/* Create a PyCObject from a pointer to a C object, a description object, + and an optional destructor function. If the third argument is non-null, + then it will be called with the first and second arguments if and when + the PyCObject is destroyed. +*/ +PyAPI_FUNC(PyObject *) PyCObject_FromVoidPtrAndDesc( + void *cobj, void *desc, void (*destruct)(void*,void*)); + +/* Retrieve a pointer to a C object from a PyCObject. */ +PyAPI_FUNC(void *) PyCObject_AsVoidPtr(PyObject *); + +/* Retrieve a pointer to a description object from a PyCObject. */ +PyAPI_FUNC(void *) PyCObject_GetDesc(PyObject *); + +/* Import a pointer to a C object from a module using a PyCObject. */ +PyAPI_FUNC(void *) PyCObject_Import(char *module_name, char *cobject_name); + +/* Modify a C object. Fails (==0) if object has a destructor. */ +PyAPI_FUNC(int) PyCObject_SetVoidPtr(PyObject *self, void *cobj); + + +typedef struct { + PyObject_HEAD + void *cobject; + void *desc; + void (*destructor)(void *); +} PyCObject; + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_COBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/code.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,116 @@ +/* Definitions for bytecode */ + +#ifndef Py_CODE_H +#define Py_CODE_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Bytecode object */ +typedef struct { + PyObject_HEAD + int co_argcount; /* #arguments, except *args */ + int co_nlocals; /* #local variables */ + int co_stacksize; /* #entries needed for evaluation stack */ + int co_flags; /* CO_..., see below */ + PyObject *co_code; /* instruction opcodes */ + PyObject *co_consts; /* list (constants used) */ + PyObject *co_names; /* list of strings (names used) */ + PyObject *co_varnames; /* tuple of strings (local variable names) */ + PyObject *co_freevars; /* tuple of strings (free variable names) */ + PyObject *co_cellvars; /* tuple of strings (cell variable names) */ + /* The rest doesn't count for hash/cmp */ + PyObject *co_filename; /* string (where it was loaded from) */ + PyObject *co_name; /* string (name, for reference) */ + int co_firstlineno; /* first source line number */ + PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) See + Objects/lnotab_notes.txt for details. */ + void *co_zombieframe; /* for optimization only (see frameobject.c) */ + PyObject *co_weakreflist; /* to support weakrefs to code objects */ +} PyCodeObject; + +/* Masks for co_flags above */ +#define CO_OPTIMIZED 0x0001 +#define CO_NEWLOCALS 0x0002 +#define CO_VARARGS 0x0004 +#define CO_VARKEYWORDS 0x0008 +#define CO_NESTED 0x0010 +#define CO_GENERATOR 0x0020 +/* The CO_NOFREE flag is set if there are no free or cell variables. + This information is redundant, but it allows a single flag test + to determine whether there is any extra work to be done when the + call frame it setup. +*/ +#define CO_NOFREE 0x0040 + +#if 0 +/* This is no longer used. Stopped defining in 2.5, do not re-use. */ +#define CO_GENERATOR_ALLOWED 0x1000 +#endif +#define CO_FUTURE_DIVISION 0x2000 +#define CO_FUTURE_ABSOLUTE_IMPORT 0x4000 /* do absolute imports by default */ +#define CO_FUTURE_WITH_STATEMENT 0x8000 +#define CO_FUTURE_PRINT_FUNCTION 0x10000 +#define CO_FUTURE_UNICODE_LITERALS 0x20000 + +/* This should be defined if a future statement modifies the syntax. + For example, when a keyword is added. +*/ +#if 1 +#define PY_PARSER_REQUIRES_FUTURE_KEYWORD +#endif + +#define CO_MAXBLOCKS 20 /* Max static block nesting within a function */ + +PyAPI_DATA(PyTypeObject) PyCode_Type; + +#define PyCode_Check(op) (Py_TYPE(op) == &PyCode_Type) +#define PyCode_GetNumFree(op) (PyTuple_GET_SIZE((op)->co_freevars)) + +/* Public interface */ +PyAPI_FUNC(PyCodeObject *) PyCode_New( + int, int, int, int, PyObject *, PyObject *, PyObject *, PyObject *, + PyObject *, PyObject *, PyObject *, PyObject *, int, PyObject *); + /* same as struct above */ + +/* Creates a new empty code object with the specified source location. */ +PyAPI_FUNC(PyCodeObject *) +PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno); + +/* Return the line number associated with the specified bytecode index + in this code object. If you just need the line number of a frame, + use PyFrame_GetLineNumber() instead. */ +PyAPI_FUNC(int) PyCode_Addr2Line(PyCodeObject *, int); + +/* for internal use only */ +#define _PyCode_GETCODEPTR(co, pp) \ + ((*Py_TYPE((co)->co_code)->tp_as_buffer->bf_getreadbuffer) \ + ((co)->co_code, 0, (void **)(pp))) + +typedef struct _addr_pair { + int ap_lower; + int ap_upper; +} PyAddrPair; + +/* Update *bounds to describe the first and one-past-the-last instructions in the + same line as lasti. Return the number of that line. +*/ +PyAPI_FUNC(int) _PyCode_CheckLineNumber(PyCodeObject* co, + int lasti, PyAddrPair *bounds); + +/* Create a comparable key used to compare constants taking in account the + * object type. It is used to make sure types are not coerced (e.g., float and + * complex) _and_ to distinguish 0.0 from -0.0 e.g. on IEEE platforms + * + * Return (type(obj), obj, ...): a tuple with variable size (at least 2 items) + * depending on the type and the value. The type is the first item to not + * compare bytes and str which can raise a BytesWarning exception. */ +PyAPI_FUNC(PyObject*) _PyCode_ConstantKey(PyObject *obj); + +PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts, + PyObject *names, PyObject *lineno_obj); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CODE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/codecs.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,212 @@ +#ifndef Py_CODECREGISTRY_H +#define Py_CODECREGISTRY_H +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------------------------------------------------------------ + + Python Codec Registry and support functions + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +Copyright (c) Corporation for National Research Initiatives. + + ------------------------------------------------------------------------ */ + +/* Register a new codec search function. + + As side effect, this tries to load the encodings package, if not + yet done, to make sure that it is always first in the list of + search functions. + + The search_function's refcount is incremented by this function. */ + +PyAPI_FUNC(int) PyCodec_Register( + PyObject *search_function + ); + +/* Codec register lookup API. + + Looks up the given encoding and returns a CodecInfo object with + function attributes which implement the different aspects of + processing the encoding. + + The encoding string is looked up converted to all lower-case + characters. This makes encodings looked up through this mechanism + effectively case-insensitive. + + If no codec is found, a KeyError is set and NULL returned. + + As side effect, this tries to load the encodings package, if not + yet done. This is part of the lazy load strategy for the encodings + package. + + */ + +PyAPI_FUNC(PyObject *) _PyCodec_Lookup( + const char *encoding + ); + +/* Generic codec based encoding API. + + object is passed through the encoder function found for the given + encoding using the error handling method defined by errors. errors + may be NULL to use the default method defined for the codec. + + Raises a LookupError in case no encoder can be found. + + */ + +PyAPI_FUNC(PyObject *) PyCodec_Encode( + PyObject *object, + const char *encoding, + const char *errors + ); + +/* Generic codec based decoding API. + + object is passed through the decoder function found for the given + encoding using the error handling method defined by errors. errors + may be NULL to use the default method defined for the codec. + + Raises a LookupError in case no encoder can be found. + + */ + +PyAPI_FUNC(PyObject *) PyCodec_Decode( + PyObject *object, + const char *encoding, + const char *errors + ); + +/* Text codec specific encoding and decoding API. + + Checks the encoding against a list of codecs which do not + implement a unicode<->bytes encoding before attempting the + operation. + + Please note that these APIs are internal and should not + be used in Python C extensions. + + XXX (ncoghlan): should we make these, or something like them, public + in Python 3.5+? + + */ +PyAPI_FUNC(PyObject *) _PyCodec_LookupTextEncoding( + const char *encoding, + const char *alternate_command + ); + +PyAPI_FUNC(PyObject *) _PyCodec_EncodeText( + PyObject *object, + const char *encoding, + const char *errors + ); + +PyAPI_FUNC(PyObject *) _PyCodec_DecodeText( + PyObject *object, + const char *encoding, + const char *errors + ); + +/* These two aren't actually text encoding specific, but _io.TextIOWrapper + * is the only current API consumer. + */ +PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalDecoder( + PyObject *codec_info, + const char *errors + ); + +PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalEncoder( + PyObject *codec_info, + const char *errors + ); + + + +/* --- Codec Lookup APIs -------------------------------------------------- + + All APIs return a codec object with incremented refcount and are + based on _PyCodec_Lookup(). The same comments w/r to the encoding + name also apply to these APIs. + +*/ + +/* Get an encoder function for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_Encoder( + const char *encoding + ); + +/* Get a decoder function for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_Decoder( + const char *encoding + ); + +/* Get a IncrementalEncoder object for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_IncrementalEncoder( + const char *encoding, + const char *errors + ); + +/* Get a IncrementalDecoder object function for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_IncrementalDecoder( + const char *encoding, + const char *errors + ); + +/* Get a StreamReader factory function for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_StreamReader( + const char *encoding, + PyObject *stream, + const char *errors + ); + +/* Get a StreamWriter factory function for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_StreamWriter( + const char *encoding, + PyObject *stream, + const char *errors + ); + +/* Unicode encoding error handling callback registry API */ + +/* Register the error handling callback function error under the given + name. This function will be called by the codec when it encounters + unencodable characters/undecodable bytes and doesn't know the + callback name, when name is specified as the error parameter + in the call to the encode/decode function. + Return 0 on success, -1 on error */ +PyAPI_FUNC(int) PyCodec_RegisterError(const char *name, PyObject *error); + +/* Lookup the error handling callback function registered under the given + name. As a special case NULL can be passed, in which case + the error handling callback for "strict" will be returned. */ +PyAPI_FUNC(PyObject *) PyCodec_LookupError(const char *name); + +/* raise exc as an exception */ +PyAPI_FUNC(PyObject *) PyCodec_StrictErrors(PyObject *exc); + +/* ignore the unicode error, skipping the faulty input */ +PyAPI_FUNC(PyObject *) PyCodec_IgnoreErrors(PyObject *exc); + +/* replace the unicode encode error with ? or U+FFFD */ +PyAPI_FUNC(PyObject *) PyCodec_ReplaceErrors(PyObject *exc); + +/* replace the unicode encode error with XML character references */ +PyAPI_FUNC(PyObject *) PyCodec_XMLCharRefReplaceErrors(PyObject *exc); + +/* replace the unicode encode error with backslash escapes (\x, \u and \U) */ +PyAPI_FUNC(PyObject *) PyCodec_BackslashReplaceErrors(PyObject *exc); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CODECREGISTRY_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/compile.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,40 @@ + +#ifndef Py_COMPILE_H +#define Py_COMPILE_H + +#include "code.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Public interface */ +struct _node; /* Declare the existence of this type */ +PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *); + +/* Future feature support */ + +typedef struct { + int ff_features; /* flags set by future statements */ + int ff_lineno; /* line number of last future statement */ +} PyFutureFeatures; + +#define FUTURE_NESTED_SCOPES "nested_scopes" +#define FUTURE_GENERATORS "generators" +#define FUTURE_DIVISION "division" +#define FUTURE_ABSOLUTE_IMPORT "absolute_import" +#define FUTURE_WITH_STATEMENT "with_statement" +#define FUTURE_PRINT_FUNCTION "print_function" +#define FUTURE_UNICODE_LITERALS "unicode_literals" + + +struct _mod; /* Declare the existence of this type */ +PyAPI_FUNC(PyCodeObject *) PyAST_Compile(struct _mod *, const char *, + PyCompilerFlags *, PyArena *); +PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST(struct _mod *, const char *); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_COMPILE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/complexobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,66 @@ +/* Complex number structure */ + +#ifndef Py_COMPLEXOBJECT_H +#define Py_COMPLEXOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + double real; + double imag; +} Py_complex; + +/* Operations on complex numbers from complexmodule.c */ + +#define c_sum _Py_c_sum +#define c_diff _Py_c_diff +#define c_neg _Py_c_neg +#define c_prod _Py_c_prod +#define c_quot _Py_c_quot +#define c_pow _Py_c_pow +#define c_abs _Py_c_abs + +PyAPI_FUNC(Py_complex) c_sum(Py_complex, Py_complex); +PyAPI_FUNC(Py_complex) c_diff(Py_complex, Py_complex); +PyAPI_FUNC(Py_complex) c_neg(Py_complex); +PyAPI_FUNC(Py_complex) c_prod(Py_complex, Py_complex); +PyAPI_FUNC(Py_complex) c_quot(Py_complex, Py_complex); +PyAPI_FUNC(Py_complex) c_pow(Py_complex, Py_complex); +PyAPI_FUNC(double) c_abs(Py_complex); + + +/* Complex object interface */ + +/* +PyComplexObject represents a complex number with double-precision +real and imaginary parts. +*/ + +typedef struct { + PyObject_HEAD + Py_complex cval; +} PyComplexObject; + +PyAPI_DATA(PyTypeObject) PyComplex_Type; + +#define PyComplex_Check(op) PyObject_TypeCheck(op, &PyComplex_Type) +#define PyComplex_CheckExact(op) (Py_TYPE(op) == &PyComplex_Type) + +PyAPI_FUNC(PyObject *) PyComplex_FromCComplex(Py_complex); +PyAPI_FUNC(PyObject *) PyComplex_FromDoubles(double real, double imag); + +PyAPI_FUNC(double) PyComplex_RealAsDouble(PyObject *op); +PyAPI_FUNC(double) PyComplex_ImagAsDouble(PyObject *op); +PyAPI_FUNC(Py_complex) PyComplex_AsCComplex(PyObject *op); + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj, + char *format_spec, + Py_ssize_t format_spec_len); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_COMPLEXOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/datetime.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,239 @@ +/* datetime.h + */ + +#ifndef DATETIME_H +#define DATETIME_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Fields are packed into successive bytes, each viewed as unsigned and + * big-endian, unless otherwise noted: + * + * byte offset + * 0 year 2 bytes, 1-9999 + * 2 month 1 byte, 1-12 + * 3 day 1 byte, 1-31 + * 4 hour 1 byte, 0-23 + * 5 minute 1 byte, 0-59 + * 6 second 1 byte, 0-59 + * 7 usecond 3 bytes, 0-999999 + * 10 + */ + +/* # of bytes for year, month, and day. */ +#define _PyDateTime_DATE_DATASIZE 4 + +/* # of bytes for hour, minute, second, and usecond. */ +#define _PyDateTime_TIME_DATASIZE 6 + +/* # of bytes for year, month, day, hour, minute, second, and usecond. */ +#define _PyDateTime_DATETIME_DATASIZE 10 + + +typedef struct +{ + PyObject_HEAD + long hashcode; /* -1 when unknown */ + int days; /* -MAX_DELTA_DAYS <= days <= MAX_DELTA_DAYS */ + int seconds; /* 0 <= seconds < 24*3600 is invariant */ + int microseconds; /* 0 <= microseconds < 1000000 is invariant */ +} PyDateTime_Delta; + +typedef struct +{ + PyObject_HEAD /* a pure abstract base class */ +} PyDateTime_TZInfo; + + +/* The datetime and time types have hashcodes, and an optional tzinfo member, + * present if and only if hastzinfo is true. + */ +#define _PyTZINFO_HEAD \ + PyObject_HEAD \ + long hashcode; \ + char hastzinfo; /* boolean flag */ + +/* No _PyDateTime_BaseTZInfo is allocated; it's just to have something + * convenient to cast to, when getting at the hastzinfo member of objects + * starting with _PyTZINFO_HEAD. + */ +typedef struct +{ + _PyTZINFO_HEAD +} _PyDateTime_BaseTZInfo; + +/* All time objects are of PyDateTime_TimeType, but that can be allocated + * in two ways, with or without a tzinfo member. Without is the same as + * tzinfo == None, but consumes less memory. _PyDateTime_BaseTime is an + * internal struct used to allocate the right amount of space for the + * "without" case. + */ +#define _PyDateTime_TIMEHEAD \ + _PyTZINFO_HEAD \ + unsigned char data[_PyDateTime_TIME_DATASIZE]; + +typedef struct +{ + _PyDateTime_TIMEHEAD +} _PyDateTime_BaseTime; /* hastzinfo false */ + +typedef struct +{ + _PyDateTime_TIMEHEAD + PyObject *tzinfo; +} PyDateTime_Time; /* hastzinfo true */ + + +/* All datetime objects are of PyDateTime_DateTimeType, but that can be + * allocated in two ways too, just like for time objects above. In addition, + * the plain date type is a base class for datetime, so it must also have + * a hastzinfo member (although it's unused there). + */ +typedef struct +{ + _PyTZINFO_HEAD + unsigned char data[_PyDateTime_DATE_DATASIZE]; +} PyDateTime_Date; + +#define _PyDateTime_DATETIMEHEAD \ + _PyTZINFO_HEAD \ + unsigned char data[_PyDateTime_DATETIME_DATASIZE]; + +typedef struct +{ + _PyDateTime_DATETIMEHEAD +} _PyDateTime_BaseDateTime; /* hastzinfo false */ + +typedef struct +{ + _PyDateTime_DATETIMEHEAD + PyObject *tzinfo; +} PyDateTime_DateTime; /* hastzinfo true */ + + +/* Apply for date and datetime instances. */ +#define PyDateTime_GET_YEAR(o) ((((PyDateTime_Date*)o)->data[0] << 8) | \ + ((PyDateTime_Date*)o)->data[1]) +#define PyDateTime_GET_MONTH(o) (((PyDateTime_Date*)o)->data[2]) +#define PyDateTime_GET_DAY(o) (((PyDateTime_Date*)o)->data[3]) + +#define PyDateTime_DATE_GET_HOUR(o) (((PyDateTime_DateTime*)o)->data[4]) +#define PyDateTime_DATE_GET_MINUTE(o) (((PyDateTime_DateTime*)o)->data[5]) +#define PyDateTime_DATE_GET_SECOND(o) (((PyDateTime_DateTime*)o)->data[6]) +#define PyDateTime_DATE_GET_MICROSECOND(o) \ + ((((PyDateTime_DateTime*)o)->data[7] << 16) | \ + (((PyDateTime_DateTime*)o)->data[8] << 8) | \ + ((PyDateTime_DateTime*)o)->data[9]) + +/* Apply for time instances. */ +#define PyDateTime_TIME_GET_HOUR(o) (((PyDateTime_Time*)o)->data[0]) +#define PyDateTime_TIME_GET_MINUTE(o) (((PyDateTime_Time*)o)->data[1]) +#define PyDateTime_TIME_GET_SECOND(o) (((PyDateTime_Time*)o)->data[2]) +#define PyDateTime_TIME_GET_MICROSECOND(o) \ + ((((PyDateTime_Time*)o)->data[3] << 16) | \ + (((PyDateTime_Time*)o)->data[4] << 8) | \ + ((PyDateTime_Time*)o)->data[5]) + + +/* Define structure for C API. */ +typedef struct { + /* type objects */ + PyTypeObject *DateType; + PyTypeObject *DateTimeType; + PyTypeObject *TimeType; + PyTypeObject *DeltaType; + PyTypeObject *TZInfoType; + + /* constructors */ + PyObject *(*Date_FromDate)(int, int, int, PyTypeObject*); + PyObject *(*DateTime_FromDateAndTime)(int, int, int, int, int, int, int, + PyObject*, PyTypeObject*); + PyObject *(*Time_FromTime)(int, int, int, int, PyObject*, PyTypeObject*); + PyObject *(*Delta_FromDelta)(int, int, int, int, PyTypeObject*); + + /* constructors for the DB API */ + PyObject *(*DateTime_FromTimestamp)(PyObject*, PyObject*, PyObject*); + PyObject *(*Date_FromTimestamp)(PyObject*, PyObject*); + +} PyDateTime_CAPI; + +#define PyDateTime_CAPSULE_NAME "datetime.datetime_CAPI" + + +/* "magic" constant used to partially protect against developer mistakes. */ +#define DATETIME_API_MAGIC 0x414548d5 + +#ifdef Py_BUILD_CORE + +/* Macros for type checking when building the Python core. */ +#define PyDate_Check(op) PyObject_TypeCheck(op, &PyDateTime_DateType) +#define PyDate_CheckExact(op) (Py_TYPE(op) == &PyDateTime_DateType) + +#define PyDateTime_Check(op) PyObject_TypeCheck(op, &PyDateTime_DateTimeType) +#define PyDateTime_CheckExact(op) (Py_TYPE(op) == &PyDateTime_DateTimeType) + +#define PyTime_Check(op) PyObject_TypeCheck(op, &PyDateTime_TimeType) +#define PyTime_CheckExact(op) (Py_TYPE(op) == &PyDateTime_TimeType) + +#define PyDelta_Check(op) PyObject_TypeCheck(op, &PyDateTime_DeltaType) +#define PyDelta_CheckExact(op) (Py_TYPE(op) == &PyDateTime_DeltaType) + +#define PyTZInfo_Check(op) PyObject_TypeCheck(op, &PyDateTime_TZInfoType) +#define PyTZInfo_CheckExact(op) (Py_TYPE(op) == &PyDateTime_TZInfoType) + +#else + +/* Define global variable for the C API and a macro for setting it. */ +static PyDateTime_CAPI *PyDateTimeAPI = NULL; + +#define PyDateTime_IMPORT \ + PyDateTimeAPI = (PyDateTime_CAPI *)PyCapsule_Import(PyDateTime_CAPSULE_NAME, 0) + +/* Macros for type checking when not building the Python core. */ +#define PyDate_Check(op) PyObject_TypeCheck(op, PyDateTimeAPI->DateType) +#define PyDate_CheckExact(op) (Py_TYPE(op) == PyDateTimeAPI->DateType) + +#define PyDateTime_Check(op) PyObject_TypeCheck(op, PyDateTimeAPI->DateTimeType) +#define PyDateTime_CheckExact(op) (Py_TYPE(op) == PyDateTimeAPI->DateTimeType) + +#define PyTime_Check(op) PyObject_TypeCheck(op, PyDateTimeAPI->TimeType) +#define PyTime_CheckExact(op) (Py_TYPE(op) == PyDateTimeAPI->TimeType) + +#define PyDelta_Check(op) PyObject_TypeCheck(op, PyDateTimeAPI->DeltaType) +#define PyDelta_CheckExact(op) (Py_TYPE(op) == PyDateTimeAPI->DeltaType) + +#define PyTZInfo_Check(op) PyObject_TypeCheck(op, PyDateTimeAPI->TZInfoType) +#define PyTZInfo_CheckExact(op) (Py_TYPE(op) == PyDateTimeAPI->TZInfoType) + +/* Macros for accessing constructors in a simplified fashion. */ +#define PyDate_FromDate(year, month, day) \ + PyDateTimeAPI->Date_FromDate(year, month, day, PyDateTimeAPI->DateType) + +#define PyDateTime_FromDateAndTime(year, month, day, hour, min, sec, usec) \ + PyDateTimeAPI->DateTime_FromDateAndTime(year, month, day, hour, \ + min, sec, usec, Py_None, PyDateTimeAPI->DateTimeType) + +#define PyTime_FromTime(hour, minute, second, usecond) \ + PyDateTimeAPI->Time_FromTime(hour, minute, second, usecond, \ + Py_None, PyDateTimeAPI->TimeType) + +#define PyDelta_FromDSU(days, seconds, useconds) \ + PyDateTimeAPI->Delta_FromDelta(days, seconds, useconds, 1, \ + PyDateTimeAPI->DeltaType) + +/* Macros supporting the DB API. */ +#define PyDateTime_FromTimestamp(args) \ + PyDateTimeAPI->DateTime_FromTimestamp( \ + (PyObject*) (PyDateTimeAPI->DateTimeType), args, NULL) + +#define PyDate_FromTimestamp(args) \ + PyDateTimeAPI->Date_FromTimestamp( \ + (PyObject*) (PyDateTimeAPI->DateType), args) + +#endif /* Py_BUILD_CORE */ + +#ifdef __cplusplus +} +#endif +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/descrobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,94 @@ +/* Descriptors */ +#ifndef Py_DESCROBJECT_H +#define Py_DESCROBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef PyObject *(*getter)(PyObject *, void *); +typedef int (*setter)(PyObject *, PyObject *, void *); + +typedef struct PyGetSetDef { + char *name; + getter get; + setter set; + char *doc; + void *closure; +} PyGetSetDef; + +typedef PyObject *(*wrapperfunc)(PyObject *self, PyObject *args, + void *wrapped); + +typedef PyObject *(*wrapperfunc_kwds)(PyObject *self, PyObject *args, + void *wrapped, PyObject *kwds); + +struct wrapperbase { + char *name; + int offset; + void *function; + wrapperfunc wrapper; + char *doc; + int flags; + PyObject *name_strobj; +}; + +/* Flags for above struct */ +#define PyWrapperFlag_KEYWORDS 1 /* wrapper function takes keyword args */ + +/* Various kinds of descriptor objects */ + +#define PyDescr_COMMON \ + PyObject_HEAD \ + PyTypeObject *d_type; \ + PyObject *d_name + +typedef struct { + PyDescr_COMMON; +} PyDescrObject; + +typedef struct { + PyDescr_COMMON; + PyMethodDef *d_method; +} PyMethodDescrObject; + +typedef struct { + PyDescr_COMMON; + struct PyMemberDef *d_member; +} PyMemberDescrObject; + +typedef struct { + PyDescr_COMMON; + PyGetSetDef *d_getset; +} PyGetSetDescrObject; + +typedef struct { + PyDescr_COMMON; + struct wrapperbase *d_base; + void *d_wrapped; /* This can be any function pointer */ +} PyWrapperDescrObject; + +PyAPI_DATA(PyTypeObject) PyWrapperDescr_Type; +PyAPI_DATA(PyTypeObject) PyDictProxy_Type; +PyAPI_DATA(PyTypeObject) PyGetSetDescr_Type; +PyAPI_DATA(PyTypeObject) PyMemberDescr_Type; + +PyAPI_FUNC(PyObject *) PyDescr_NewMethod(PyTypeObject *, PyMethodDef *); +PyAPI_FUNC(PyObject *) PyDescr_NewClassMethod(PyTypeObject *, PyMethodDef *); +PyAPI_FUNC(PyObject *) PyDescr_NewMember(PyTypeObject *, + struct PyMemberDef *); +PyAPI_FUNC(PyObject *) PyDescr_NewGetSet(PyTypeObject *, + struct PyGetSetDef *); +PyAPI_FUNC(PyObject *) PyDescr_NewWrapper(PyTypeObject *, + struct wrapperbase *, void *); +#define PyDescr_IsData(d) (Py_TYPE(d)->tp_descr_set != NULL) + +PyAPI_FUNC(PyObject *) PyDictProxy_New(PyObject *); +PyAPI_FUNC(PyObject *) PyWrapper_New(PyObject *, PyObject *); + + +PyAPI_DATA(PyTypeObject) PyProperty_Type; +#ifdef __cplusplus +} +#endif +#endif /* !Py_DESCROBJECT_H */ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/dictobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,157 @@ +#ifndef Py_DICTOBJECT_H +#define Py_DICTOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Dictionary object type -- mapping from hashable object to object */ + +/* The distribution includes a separate file, Objects/dictnotes.txt, + describing explorations into dictionary design and optimization. + It covers typical dictionary use patterns, the parameters for + tuning dictionaries, and several ideas for possible optimizations. +*/ + +/* +There are three kinds of slots in the table: + +1. Unused. me_key == me_value == NULL + Does not hold an active (key, value) pair now and never did. Unused can + transition to Active upon key insertion. This is the only case in which + me_key is NULL, and is each slot's initial state. + +2. Active. me_key != NULL and me_key != dummy and me_value != NULL + Holds an active (key, value) pair. Active can transition to Dummy upon + key deletion. This is the only case in which me_value != NULL. + +3. Dummy. me_key == dummy and me_value == NULL + Previously held an active (key, value) pair, but that was deleted and an + active pair has not yet overwritten the slot. Dummy can transition to + Active upon key insertion. Dummy slots cannot be made Unused again + (cannot have me_key set to NULL), else the probe sequence in case of + collision would have no way to know they were once active. + +Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to +hold a search finger. The me_hash field of Unused or Dummy slots has no +meaning otherwise. +*/ + +/* PyDict_MINSIZE is the minimum size of a dictionary. This many slots are + * allocated directly in the dict object (in the ma_smalltable member). + * It must be a power of 2, and at least 4. 8 allows dicts with no more + * than 5 active entries to live in ma_smalltable (and so avoid an + * additional malloc); instrumentation suggested this suffices for the + * majority of dicts (consisting mostly of usually-small instance dicts and + * usually-small dicts created to pass keyword arguments). + */ +#define PyDict_MINSIZE 8 + +typedef struct { + /* Cached hash code of me_key. Note that hash codes are C longs. + * We have to use Py_ssize_t instead because dict_popitem() abuses + * me_hash to hold a search finger. + */ + Py_ssize_t me_hash; + PyObject *me_key; + PyObject *me_value; +} PyDictEntry; + +/* +To ensure the lookup algorithm terminates, there must be at least one Unused +slot (NULL key) in the table. +The value ma_fill is the number of non-NULL keys (sum of Active and Dummy); +ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL +values == the number of Active items). +To avoid slowing down lookups on a near-full table, we resize the table when +it's two-thirds full. +*/ +typedef struct _dictobject PyDictObject; +struct _dictobject { + PyObject_HEAD + Py_ssize_t ma_fill; /* # Active + # Dummy */ + Py_ssize_t ma_used; /* # Active */ + + /* The table contains ma_mask + 1 slots, and that's a power of 2. + * We store the mask instead of the size because the mask is more + * frequently needed. + */ + Py_ssize_t ma_mask; + + /* ma_table points to ma_smalltable for small tables, else to + * additional malloc'ed memory. ma_table is never NULL! This rule + * saves repeated runtime null-tests in the workhorse getitem and + * setitem calls. + */ + PyDictEntry *ma_table; + PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, long hash); + PyDictEntry ma_smalltable[PyDict_MINSIZE]; +}; + +PyAPI_DATA(PyTypeObject) PyDict_Type; +PyAPI_DATA(PyTypeObject) PyDictIterKey_Type; +PyAPI_DATA(PyTypeObject) PyDictIterValue_Type; +PyAPI_DATA(PyTypeObject) PyDictIterItem_Type; +PyAPI_DATA(PyTypeObject) PyDictKeys_Type; +PyAPI_DATA(PyTypeObject) PyDictItems_Type; +PyAPI_DATA(PyTypeObject) PyDictValues_Type; + +#define PyDict_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_DICT_SUBCLASS) +#define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type) +#define PyDictKeys_Check(op) (Py_TYPE(op) == &PyDictKeys_Type) +#define PyDictItems_Check(op) (Py_TYPE(op) == &PyDictItems_Type) +#define PyDictValues_Check(op) (Py_TYPE(op) == &PyDictValues_Type) +/* This excludes Values, since they are not sets. */ +# define PyDictViewSet_Check(op) \ + (PyDictKeys_Check(op) || PyDictItems_Check(op)) + +PyAPI_FUNC(PyObject *) PyDict_New(void); +PyAPI_FUNC(PyObject *) PyDict_GetItem(PyObject *mp, PyObject *key); +PyAPI_FUNC(PyObject *) _PyDict_GetItemWithError(PyObject *mp, PyObject *key); +PyAPI_FUNC(int) PyDict_SetItem(PyObject *mp, PyObject *key, PyObject *item); +PyAPI_FUNC(int) PyDict_DelItem(PyObject *mp, PyObject *key); +PyAPI_FUNC(void) PyDict_Clear(PyObject *mp); +PyAPI_FUNC(int) PyDict_Next( + PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value); +PyAPI_FUNC(int) _PyDict_Next( + PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, long *hash); +PyAPI_FUNC(PyObject *) PyDict_Keys(PyObject *mp); +PyAPI_FUNC(PyObject *) PyDict_Values(PyObject *mp); +PyAPI_FUNC(PyObject *) PyDict_Items(PyObject *mp); +PyAPI_FUNC(Py_ssize_t) PyDict_Size(PyObject *mp); +PyAPI_FUNC(PyObject *) PyDict_Copy(PyObject *mp); +PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key); +PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, long hash); +PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused); +PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp); + +/* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */ +PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other); + +/* PyDict_Merge updates/merges from a mapping object (an object that + supports PyMapping_Keys() and PyObject_GetItem()). If override is true, + the last occurrence of a key wins, else the first. The Python + dict.update(other) is equivalent to PyDict_Merge(dict, other, 1). +*/ +PyAPI_FUNC(int) PyDict_Merge(PyObject *mp, + PyObject *other, + int override); + +/* PyDict_MergeFromSeq2 updates/merges from an iterable object producing + iterable objects of length 2. If override is true, the last occurrence + of a key wins, else the first. The Python dict constructor dict(seq2) + is equivalent to dict={}; PyDict_MergeFromSeq(dict, seq2, 1). +*/ +PyAPI_FUNC(int) PyDict_MergeFromSeq2(PyObject *d, + PyObject *seq2, + int override); + +PyAPI_FUNC(PyObject *) PyDict_GetItemString(PyObject *dp, const char *key); +PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *item); +PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_DICTOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/dtoa.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,15 @@ +#ifndef PY_NO_SHORT_FLOAT_REPR +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(double) _Py_dg_strtod(const char *str, char **ptr); +PyAPI_FUNC(char *) _Py_dg_dtoa(double d, int mode, int ndigits, + int *decpt, int *sign, char **rve); +PyAPI_FUNC(void) _Py_dg_freedtoa(char *s); + + +#ifdef __cplusplus +} +#endif +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/enumobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,17 @@ +#ifndef Py_ENUMOBJECT_H +#define Py_ENUMOBJECT_H + +/* Enumerate Object */ + +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyEnum_Type; +PyAPI_DATA(PyTypeObject) PyReversed_Type; + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_ENUMOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/errcode.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,36 @@ +#ifndef Py_ERRCODE_H +#define Py_ERRCODE_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Error codes passed around between file input, tokenizer, parser and + interpreter. This is necessary so we can turn them into Python + exceptions at a higher level. Note that some errors have a + slightly different meaning when passed from the tokenizer to the + parser than when passed from the parser to the interpreter; e.g. + the parser only returns E_EOF when it hits EOF immediately, and it + never returns E_OK. */ + +#define E_OK 10 /* No error */ +#define E_EOF 11 /* End Of File */ +#define E_INTR 12 /* Interrupted */ +#define E_TOKEN 13 /* Bad token */ +#define E_SYNTAX 14 /* Syntax error */ +#define E_NOMEM 15 /* Ran out of memory */ +#define E_DONE 16 /* Parsing complete */ +#define E_ERROR 17 /* Execution error */ +#define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */ +#define E_OVERFLOW 19 /* Node had too many children */ +#define E_TOODEEP 20 /* Too many indentation levels */ +#define E_DEDENT 21 /* No matching outer block for dedent */ +#define E_DECODE 22 /* Error in decoding into Unicode */ +#define E_EOFS 23 /* EOF in triple-quoted string */ +#define E_EOLS 24 /* EOL in single-quoted string */ +#define E_LINECONT 25 /* Unexpected characters after a line continuation */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_ERRCODE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/eval.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,25 @@ + +/* Interface to execute compiled code */ + +#ifndef Py_EVAL_H +#define Py_EVAL_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(PyObject *) PyEval_EvalCode(PyCodeObject *, PyObject *, PyObject *); + +PyAPI_FUNC(PyObject *) PyEval_EvalCodeEx(PyCodeObject *co, + PyObject *globals, + PyObject *locals, + PyObject **args, int argc, + PyObject **kwds, int kwdc, + PyObject **defs, int defc, + PyObject *closure); + +PyAPI_FUNC(PyObject *) _PyEval_CallTracing(PyObject *func, PyObject *args); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_EVAL_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/fileobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,97 @@ + +/* File object interface */ + +#ifndef Py_FILEOBJECT_H +#define Py_FILEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + PyObject_HEAD + FILE *f_fp; + PyObject *f_name; + PyObject *f_mode; + int (*f_close)(FILE *); + int f_softspace; /* Flag used by 'print' command */ + int f_binary; /* Flag which indicates whether the file is + open in binary (1) or text (0) mode */ + char* f_buf; /* Allocated readahead buffer */ + char* f_bufend; /* Points after last occupied position */ + char* f_bufptr; /* Current buffer position */ + char *f_setbuf; /* Buffer for setbuf(3) and setvbuf(3) */ + int f_univ_newline; /* Handle any newline convention */ + int f_newlinetypes; /* Types of newlines seen */ + int f_skipnextlf; /* Skip next \n */ + PyObject *f_encoding; + PyObject *f_errors; + PyObject *weakreflist; /* List of weak references */ + int unlocked_count; /* Num. currently running sections of code + using f_fp with the GIL released. */ + int readable; + int writable; +} PyFileObject; + +PyAPI_DATA(PyTypeObject) PyFile_Type; + +#define PyFile_Check(op) PyObject_TypeCheck(op, &PyFile_Type) +#define PyFile_CheckExact(op) (Py_TYPE(op) == &PyFile_Type) + +PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *); +PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int); +PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *); +PyAPI_FUNC(int) PyFile_SetEncodingAndErrors(PyObject *, const char *, char *errors); +PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *, + int (*)(FILE *)); +PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *); +PyAPI_FUNC(void) PyFile_IncUseCount(PyFileObject *); +PyAPI_FUNC(void) PyFile_DecUseCount(PyFileObject *); +PyAPI_FUNC(PyObject *) PyFile_Name(PyObject *); +PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int); +PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int); +PyAPI_FUNC(int) PyFile_SoftSpace(PyObject *, int); +PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *); +PyAPI_FUNC(int) PyObject_AsFileDescriptor(PyObject *); + +/* The default encoding used by the platform file system APIs + If non-NULL, this is different than the default encoding for strings +*/ +PyAPI_DATA(const char *) Py_FileSystemDefaultEncoding; + +/* Routines to replace fread() and fgets() which accept any of \r, \n + or \r\n as line terminators. +*/ +#define PY_STDIOTEXTMODE "b" +char *Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *); +size_t Py_UniversalNewlineFread(char *, size_t, FILE *, PyObject *); + +/* A routine to do sanity checking on the file mode string. returns + non-zero on if an exception occurred +*/ +int _PyFile_SanitizeMode(char *mode); + +#if defined _MSC_VER && _MSC_VER >= 1400 +/* A routine to check if a file descriptor is valid on Windows. Returns 0 + * and sets errno to EBADF if it isn't. This is to avoid Assertions + * from various functions in the Windows CRT beginning with + * Visual Studio 2005 + */ +int _PyVerify_fd(int fd); +#elif defined _MSC_VER && _MSC_VER >= 1200 +/* fdopen doesn't set errno EBADF and crashes for large fd on debug build */ +#define _PyVerify_fd(fd) (_get_osfhandle(fd) >= 0) +#else +#define _PyVerify_fd(A) (1) /* dummy */ +#endif + +/* A routine to check if a file descriptor can be select()-ed. */ +#ifdef HAVE_SELECT + #define _PyIsSelectable_fd(FD) (((FD) >= 0) && ((FD) < FD_SETSIZE)) +#else + #define _PyIsSelectable_fd(FD) (1) +#endif /* HAVE_SELECT */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_FILEOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/floatobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,140 @@ + +/* Float object interface */ + +/* +PyFloatObject represents a (double precision) floating point number. +*/ + +#ifndef Py_FLOATOBJECT_H +#define Py_FLOATOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + PyObject_HEAD + double ob_fval; +} PyFloatObject; + +PyAPI_DATA(PyTypeObject) PyFloat_Type; + +#define PyFloat_Check(op) PyObject_TypeCheck(op, &PyFloat_Type) +#define PyFloat_CheckExact(op) (Py_TYPE(op) == &PyFloat_Type) + +/* The str() precision PyFloat_STR_PRECISION is chosen so that in most cases, + the rounding noise created by various operations is suppressed, while + giving plenty of precision for practical use. */ + +#define PyFloat_STR_PRECISION 12 + +#ifdef Py_NAN +#define Py_RETURN_NAN return PyFloat_FromDouble(Py_NAN) +#endif + +#define Py_RETURN_INF(sign) do \ + if (copysign(1., sign) == 1.) { \ + return PyFloat_FromDouble(Py_HUGE_VAL); \ + } else { \ + return PyFloat_FromDouble(-Py_HUGE_VAL); \ + } while(0) + +PyAPI_FUNC(double) PyFloat_GetMax(void); +PyAPI_FUNC(double) PyFloat_GetMin(void); +PyAPI_FUNC(PyObject *) PyFloat_GetInfo(void); + +/* Return Python float from string PyObject. Second argument ignored on + input, and, if non-NULL, NULL is stored into *junk (this tried to serve a + purpose once but can't be made to work as intended). */ +PyAPI_FUNC(PyObject *) PyFloat_FromString(PyObject*, char** junk); + +/* Return Python float from C double. */ +PyAPI_FUNC(PyObject *) PyFloat_FromDouble(double); + +/* Extract C double from Python float. The macro version trades safety for + speed. */ +PyAPI_FUNC(double) PyFloat_AsDouble(PyObject *); +#define PyFloat_AS_DOUBLE(op) (((PyFloatObject *)(op))->ob_fval) + +/* Write repr(v) into the char buffer argument, followed by null byte. The + buffer must be "big enough"; >= 100 is very safe. + PyFloat_AsReprString(buf, x) strives to print enough digits so that + PyFloat_FromString(buf) then reproduces x exactly. */ +PyAPI_FUNC(void) PyFloat_AsReprString(char*, PyFloatObject *v); + +/* Write str(v) into the char buffer argument, followed by null byte. The + buffer must be "big enough"; >= 100 is very safe. Note that it's + unusual to be able to get back the float you started with from + PyFloat_AsString's result -- use PyFloat_AsReprString() if you want to + preserve precision across conversions. */ +PyAPI_FUNC(void) PyFloat_AsString(char*, PyFloatObject *v); + +/* _PyFloat_{Pack,Unpack}{4,8} + * + * The struct and pickle (at least) modules need an efficient platform- + * independent way to store floating-point values as byte strings. + * The Pack routines produce a string from a C double, and the Unpack + * routines produce a C double from such a string. The suffix (4 or 8) + * specifies the number of bytes in the string. + * + * On platforms that appear to use (see _PyFloat_Init()) IEEE-754 formats + * these functions work by copying bits. On other platforms, the formats the + * 4- byte format is identical to the IEEE-754 single precision format, and + * the 8-byte format to the IEEE-754 double precision format, although the + * packing of INFs and NaNs (if such things exist on the platform) isn't + * handled correctly, and attempting to unpack a string containing an IEEE + * INF or NaN will raise an exception. + * + * On non-IEEE platforms with more precision, or larger dynamic range, than + * 754 supports, not all values can be packed; on non-IEEE platforms with less + * precision, or smaller dynamic range, not all values can be unpacked. What + * happens in such cases is partly accidental (alas). + */ + +/* The pack routines write 4 or 8 bytes, starting at p. le is a bool + * argument, true if you want the string in little-endian format (exponent + * last, at p+3 or p+7), false if you want big-endian format (exponent + * first, at p). + * Return value: 0 if all is OK, -1 if error (and an exception is + * set, most likely OverflowError). + * There are two problems on non-IEEE platforms: + * 1): What this does is undefined if x is a NaN or infinity. + * 2): -0.0 and +0.0 produce the same string. + */ +PyAPI_FUNC(int) _PyFloat_Pack4(double x, unsigned char *p, int le); +PyAPI_FUNC(int) _PyFloat_Pack8(double x, unsigned char *p, int le); + +/* Used to get the important decimal digits of a double */ +PyAPI_FUNC(int) _PyFloat_Digits(char *buf, double v, int *signum); +PyAPI_FUNC(void) _PyFloat_DigitsInit(void); + +/* The unpack routines read 4 or 8 bytes, starting at p. le is a bool + * argument, true if the string is in little-endian format (exponent + * last, at p+3 or p+7), false if big-endian (exponent first, at p). + * Return value: The unpacked double. On error, this is -1.0 and + * PyErr_Occurred() is true (and an exception is set, most likely + * OverflowError). Note that on a non-IEEE platform this will refuse + * to unpack a string that represents a NaN or infinity. + */ +PyAPI_FUNC(double) _PyFloat_Unpack4(const unsigned char *p, int le); +PyAPI_FUNC(double) _PyFloat_Unpack8(const unsigned char *p, int le); + +/* free list api */ +PyAPI_FUNC(int) PyFloat_ClearFreeList(void); + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj, + char *format_spec, + Py_ssize_t format_spec_len); + +/* Round a C double x to the closest multiple of 10**-ndigits. Returns a + Python float on success, or NULL (with an appropriate exception set) on + failure. Used in builtin_round in bltinmodule.c. */ +PyAPI_FUNC(PyObject *) _Py_double_round(double x, int ndigits); + + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_FLOATOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/frameobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,89 @@ + +/* Frame object interface */ + +#ifndef Py_FRAMEOBJECT_H +#define Py_FRAMEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int b_type; /* what kind of block this is */ + int b_handler; /* where to jump to find handler */ + int b_level; /* value stack level to pop to */ +} PyTryBlock; + +typedef struct _frame { + PyObject_VAR_HEAD + struct _frame *f_back; /* previous frame, or NULL */ + PyCodeObject *f_code; /* code segment */ + PyObject *f_builtins; /* builtin symbol table (PyDictObject) */ + PyObject *f_globals; /* global symbol table (PyDictObject) */ + PyObject *f_locals; /* local symbol table (any mapping) */ + PyObject **f_valuestack; /* points after the last local */ + /* Next free slot in f_valuestack. Frame creation sets to f_valuestack. + Frame evaluation usually NULLs it, but a frame that yields sets it + to the current stack top. */ + PyObject **f_stacktop; + PyObject *f_trace; /* Trace function */ + + /* If an exception is raised in this frame, the next three are used to + * record the exception info (if any) originally in the thread state. See + * comments before set_exc_info() -- it's not obvious. + * Invariant: if _type is NULL, then so are _value and _traceback. + * Desired invariant: all three are NULL, or all three are non-NULL. That + * one isn't currently true, but "should be". + */ + PyObject *f_exc_type, *f_exc_value, *f_exc_traceback; + + PyThreadState *f_tstate; + int f_lasti; /* Last instruction if called */ + /* Call PyFrame_GetLineNumber() instead of reading this field + directly. As of 2.3 f_lineno is only valid when tracing is + active (i.e. when f_trace is set). At other times we use + PyCode_Addr2Line to calculate the line from the current + bytecode index. */ + int f_lineno; /* Current line number */ + int f_iblock; /* index in f_blockstack */ + PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */ + PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */ +} PyFrameObject; + + +/* Standard object interface */ + +PyAPI_DATA(PyTypeObject) PyFrame_Type; + +#define PyFrame_Check(op) ((op)->ob_type == &PyFrame_Type) +#define PyFrame_IsRestricted(f) \ + ((f)->f_builtins != (f)->f_tstate->interp->builtins) + +PyAPI_FUNC(PyFrameObject *) PyFrame_New(PyThreadState *, PyCodeObject *, + PyObject *, PyObject *); + + +/* The rest of the interface is specific for frame objects */ + +/* Block management functions */ + +PyAPI_FUNC(void) PyFrame_BlockSetup(PyFrameObject *, int, int, int); +PyAPI_FUNC(PyTryBlock *) PyFrame_BlockPop(PyFrameObject *); + +/* Extend the value stack */ + +PyAPI_FUNC(PyObject **) PyFrame_ExtendStack(PyFrameObject *, int, int); + +/* Conversions between "fast locals" and locals in dictionary */ + +PyAPI_FUNC(void) PyFrame_LocalsToFast(PyFrameObject *, int); +PyAPI_FUNC(void) PyFrame_FastToLocals(PyFrameObject *); + +PyAPI_FUNC(int) PyFrame_ClearFreeList(void); + +/* Return the line of code the frame is currently executing. */ +PyAPI_FUNC(int) PyFrame_GetLineNumber(PyFrameObject *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_FRAMEOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/funcobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,76 @@ + +/* Function object interface */ + +#ifndef Py_FUNCOBJECT_H +#define Py_FUNCOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Function objects and code objects should not be confused with each other: + * + * Function objects are created by the execution of the 'def' statement. + * They reference a code object in their func_code attribute, which is a + * purely syntactic object, i.e. nothing more than a compiled version of some + * source code lines. There is one code object per source code "fragment", + * but each code object can be referenced by zero or many function objects + * depending only on how many times the 'def' statement in the source was + * executed so far. + */ + +typedef struct { + PyObject_HEAD + PyObject *func_code; /* A code object */ + PyObject *func_globals; /* A dictionary (other mappings won't do) */ + PyObject *func_defaults; /* NULL or a tuple */ + PyObject *func_closure; /* NULL or a tuple of cell objects */ + PyObject *func_doc; /* The __doc__ attribute, can be anything */ + PyObject *func_name; /* The __name__ attribute, a string object */ + PyObject *func_dict; /* The __dict__ attribute, a dict or NULL */ + PyObject *func_weakreflist; /* List of weak references */ + PyObject *func_module; /* The __module__ attribute, can be anything */ + + /* Invariant: + * func_closure contains the bindings for func_code->co_freevars, so + * PyTuple_Size(func_closure) == PyCode_GetNumFree(func_code) + * (func_closure may be NULL if PyCode_GetNumFree(func_code) == 0). + */ +} PyFunctionObject; + +PyAPI_DATA(PyTypeObject) PyFunction_Type; + +#define PyFunction_Check(op) (Py_TYPE(op) == &PyFunction_Type) + +PyAPI_FUNC(PyObject *) PyFunction_New(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_GetCode(PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_GetGlobals(PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_GetModule(PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_GetDefaults(PyObject *); +PyAPI_FUNC(int) PyFunction_SetDefaults(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_GetClosure(PyObject *); +PyAPI_FUNC(int) PyFunction_SetClosure(PyObject *, PyObject *); + +/* Macros for direct access to these values. Type checks are *not* + done, so use with care. */ +#define PyFunction_GET_CODE(func) \ + (((PyFunctionObject *)func) -> func_code) +#define PyFunction_GET_GLOBALS(func) \ + (((PyFunctionObject *)func) -> func_globals) +#define PyFunction_GET_MODULE(func) \ + (((PyFunctionObject *)func) -> func_module) +#define PyFunction_GET_DEFAULTS(func) \ + (((PyFunctionObject *)func) -> func_defaults) +#define PyFunction_GET_CLOSURE(func) \ + (((PyFunctionObject *)func) -> func_closure) + +/* The classmethod and staticmethod types lives here, too */ +PyAPI_DATA(PyTypeObject) PyClassMethod_Type; +PyAPI_DATA(PyTypeObject) PyStaticMethod_Type; + +PyAPI_FUNC(PyObject *) PyClassMethod_New(PyObject *); +PyAPI_FUNC(PyObject *) PyStaticMethod_New(PyObject *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_FUNCOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/genobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,40 @@ + +/* Generator object interface */ + +#ifndef Py_GENOBJECT_H +#define Py_GENOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +struct _frame; /* Avoid including frameobject.h */ + +typedef struct { + PyObject_HEAD + /* The gi_ prefix is intended to remind of generator-iterator. */ + + /* Note: gi_frame can be NULL if the generator is "finished" */ + struct _frame *gi_frame; + + /* True if generator is being executed. */ + int gi_running; + + /* The code object backing the generator */ + PyObject *gi_code; + + /* List of weak reference. */ + PyObject *gi_weakreflist; +} PyGenObject; + +PyAPI_DATA(PyTypeObject) PyGen_Type; + +#define PyGen_Check(op) PyObject_TypeCheck(op, &PyGen_Type) +#define PyGen_CheckExact(op) (Py_TYPE(op) == &PyGen_Type) + +PyAPI_FUNC(PyObject *) PyGen_New(struct _frame *); +PyAPI_FUNC(int) PyGen_NeedsFinalizing(PyGenObject *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_GENOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/graminit.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,87 @@ +/* Generated by Parser/pgen */ + +#define single_input 256 +#define file_input 257 +#define eval_input 258 +#define decorator 259 +#define decorators 260 +#define decorated 261 +#define funcdef 262 +#define parameters 263 +#define varargslist 264 +#define fpdef 265 +#define fplist 266 +#define stmt 267 +#define simple_stmt 268 +#define small_stmt 269 +#define expr_stmt 270 +#define augassign 271 +#define print_stmt 272 +#define del_stmt 273 +#define pass_stmt 274 +#define flow_stmt 275 +#define break_stmt 276 +#define continue_stmt 277 +#define return_stmt 278 +#define yield_stmt 279 +#define raise_stmt 280 +#define import_stmt 281 +#define import_name 282 +#define import_from 283 +#define import_as_name 284 +#define dotted_as_name 285 +#define import_as_names 286 +#define dotted_as_names 287 +#define dotted_name 288 +#define global_stmt 289 +#define exec_stmt 290 +#define assert_stmt 291 +#define compound_stmt 292 +#define if_stmt 293 +#define while_stmt 294 +#define for_stmt 295 +#define try_stmt 296 +#define with_stmt 297 +#define with_item 298 +#define except_clause 299 +#define suite 300 +#define testlist_safe 301 +#define old_test 302 +#define old_lambdef 303 +#define test 304 +#define or_test 305 +#define and_test 306 +#define not_test 307 +#define comparison 308 +#define comp_op 309 +#define expr 310 +#define xor_expr 311 +#define and_expr 312 +#define shift_expr 313 +#define arith_expr 314 +#define term 315 +#define factor 316 +#define power 317 +#define atom 318 +#define listmaker 319 +#define testlist_comp 320 +#define lambdef 321 +#define trailer 322 +#define subscriptlist 323 +#define subscript 324 +#define sliceop 325 +#define exprlist 326 +#define testlist 327 +#define dictorsetmaker 328 +#define classdef 329 +#define arglist 330 +#define argument 331 +#define list_iter 332 +#define list_for 333 +#define list_if 334 +#define comp_iter 335 +#define comp_for 336 +#define comp_if 337 +#define testlist1 338 +#define encoding_decl 339 +#define yield_expr 340
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/grammar.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,93 @@ + +/* Grammar interface */ + +#ifndef Py_GRAMMAR_H +#define Py_GRAMMAR_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "bitset.h" /* Sigh... */ + +/* A label of an arc */ + +typedef struct { + int lb_type; + char *lb_str; +} label; + +#define EMPTY 0 /* Label number 0 is by definition the empty label */ + +/* A list of labels */ + +typedef struct { + int ll_nlabels; + label *ll_label; +} labellist; + +/* An arc from one state to another */ + +typedef struct { + short a_lbl; /* Label of this arc */ + short a_arrow; /* State where this arc goes to */ +} arc; + +/* A state in a DFA */ + +typedef struct { + int s_narcs; + arc *s_arc; /* Array of arcs */ + + /* Optional accelerators */ + int s_lower; /* Lowest label index */ + int s_upper; /* Highest label index */ + int *s_accel; /* Accelerator */ + int s_accept; /* Nonzero for accepting state */ +} state; + +/* A DFA */ + +typedef struct { + int d_type; /* Non-terminal this represents */ + char *d_name; /* For printing */ + int d_initial; /* Initial state */ + int d_nstates; + state *d_state; /* Array of states */ + bitset d_first; +} dfa; + +/* A grammar */ + +typedef struct { + int g_ndfas; + dfa *g_dfa; /* Array of DFAs */ + labellist g_ll; + int g_start; /* Start symbol of the grammar */ + int g_accel; /* Set if accelerators present */ +} grammar; + +/* FUNCTIONS */ + +grammar *newgrammar(int start); +dfa *adddfa(grammar *g, int type, char *name); +int addstate(dfa *d); +void addarc(dfa *d, int from, int to, int lbl); +dfa *PyGrammar_FindDFA(grammar *g, int type); + +int addlabel(labellist *ll, int type, char *str); +int findlabel(labellist *ll, int type, char *str); +char *PyGrammar_LabelRepr(label *lb); +void translatelabels(grammar *g); + +void addfirstsets(grammar *g); + +void PyGrammar_AddAccelerators(grammar *g); +void PyGrammar_RemoveAccelerators(grammar *); + +void printgrammar(grammar *g, FILE *fp); +void printnonterminals(grammar *g, FILE *fp); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_GRAMMAR_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/import.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,71 @@ + +/* Module definition and import interface */ + +#ifndef Py_IMPORT_H +#define Py_IMPORT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(long) PyImport_GetMagicNumber(void); +PyAPI_FUNC(PyObject *) PyImport_ExecCodeModule(char *name, PyObject *co); +PyAPI_FUNC(PyObject *) PyImport_ExecCodeModuleEx( + char *name, PyObject *co, char *pathname); +PyAPI_FUNC(PyObject *) PyImport_GetModuleDict(void); +PyAPI_FUNC(PyObject *) PyImport_AddModule(const char *name); +PyAPI_FUNC(PyObject *) PyImport_ImportModule(const char *name); +PyAPI_FUNC(PyObject *) PyImport_ImportModuleNoBlock(const char *); +PyAPI_FUNC(PyObject *) PyImport_ImportModuleLevel(char *name, + PyObject *globals, PyObject *locals, PyObject *fromlist, int level); + +#define PyImport_ImportModuleEx(n, g, l, f) \ + PyImport_ImportModuleLevel(n, g, l, f, -1) + +PyAPI_FUNC(PyObject *) PyImport_GetImporter(PyObject *path); +PyAPI_FUNC(PyObject *) PyImport_Import(PyObject *name); +PyAPI_FUNC(PyObject *) PyImport_ReloadModule(PyObject *m); +PyAPI_FUNC(void) PyImport_Cleanup(void); +PyAPI_FUNC(int) PyImport_ImportFrozenModule(char *); + +#ifdef WITH_THREAD +PyAPI_FUNC(void) _PyImport_AcquireLock(void); +PyAPI_FUNC(int) _PyImport_ReleaseLock(void); +#else +#define _PyImport_AcquireLock() +#define _PyImport_ReleaseLock() 1 +#endif + +PyAPI_FUNC(struct filedescr *) _PyImport_FindModule( + const char *, PyObject *, char *, size_t, FILE **, PyObject **); +PyAPI_FUNC(int) _PyImport_IsScript(struct filedescr *); +PyAPI_FUNC(void) _PyImport_ReInitLock(void); + +PyAPI_FUNC(PyObject *) _PyImport_FindExtension(char *, char *); +PyAPI_FUNC(PyObject *) _PyImport_FixupExtension(char *, char *); + +struct _inittab { + char *name; + void (*initfunc)(void); +}; + +PyAPI_DATA(PyTypeObject) PyNullImporter_Type; +PyAPI_DATA(struct _inittab *) PyImport_Inittab; + +PyAPI_FUNC(int) PyImport_AppendInittab(const char *name, void (*initfunc)(void)); +PyAPI_FUNC(int) PyImport_ExtendInittab(struct _inittab *newtab); + +struct _frozen { + char *name; + unsigned char *code; + int size; +}; + +/* Embedding apps may change this pointer to point to their favorite + collection of frozen modules: */ + +PyAPI_DATA(struct _frozen *) PyImport_FrozenModules; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_IMPORT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/intobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,81 @@ + +/* Integer object interface */ + +/* +PyIntObject represents a (long) integer. This is an immutable object; +an integer cannot change its value after creation. + +There are functions to create new integer objects, to test an object +for integer-ness, and to get the integer value. The latter functions +returns -1 and sets errno to EBADF if the object is not an PyIntObject. +None of the functions should be applied to nil objects. + +The type PyIntObject is (unfortunately) exposed here so we can declare +_Py_TrueStruct and _Py_ZeroStruct in boolobject.h; don't use this. +*/ + +#ifndef Py_INTOBJECT_H +#define Py_INTOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + PyObject_HEAD + long ob_ival; +} PyIntObject; + +PyAPI_DATA(PyTypeObject) PyInt_Type; + +#define PyInt_Check(op) \ + PyType_FastSubclass((op)->ob_type, Py_TPFLAGS_INT_SUBCLASS) +#define PyInt_CheckExact(op) ((op)->ob_type == &PyInt_Type) + +PyAPI_FUNC(PyObject *) PyInt_FromString(char*, char**, int); +#ifdef Py_USING_UNICODE +PyAPI_FUNC(PyObject *) PyInt_FromUnicode(Py_UNICODE*, Py_ssize_t, int); +#endif +PyAPI_FUNC(PyObject *) PyInt_FromLong(long); +PyAPI_FUNC(PyObject *) PyInt_FromSize_t(size_t); +PyAPI_FUNC(PyObject *) PyInt_FromSsize_t(Py_ssize_t); +PyAPI_FUNC(long) PyInt_AsLong(PyObject *); +PyAPI_FUNC(Py_ssize_t) PyInt_AsSsize_t(PyObject *); +PyAPI_FUNC(int) _PyInt_AsInt(PyObject *); +PyAPI_FUNC(unsigned long) PyInt_AsUnsignedLongMask(PyObject *); +#ifdef HAVE_LONG_LONG +PyAPI_FUNC(unsigned PY_LONG_LONG) PyInt_AsUnsignedLongLongMask(PyObject *); +#endif + +PyAPI_FUNC(long) PyInt_GetMax(void); + +/* Macro, trading safety for speed */ +#define PyInt_AS_LONG(op) (((PyIntObject *)(op))->ob_ival) + +/* These aren't really part of the Int object, but they're handy; the protos + * are necessary for systems that need the magic of PyAPI_FUNC and that want + * to have stropmodule as a dynamically loaded module instead of building it + * into the main Python shared library/DLL. Guido thinks I'm weird for + * building it this way. :-) [cjh] + */ +PyAPI_FUNC(unsigned long) PyOS_strtoul(char *, char **, int); +PyAPI_FUNC(long) PyOS_strtol(char *, char **, int); + +/* free list api */ +PyAPI_FUNC(int) PyInt_ClearFreeList(void); + +/* Convert an integer to the given base. Returns a string. + If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. + If newstyle is zero, then use the pre-2.6 behavior of octal having + a leading "0" */ +PyAPI_FUNC(PyObject*) _PyInt_Format(PyIntObject* v, int base, int newstyle); + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyInt_FormatAdvanced(PyObject *obj, + char *format_spec, + Py_ssize_t format_spec_len); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/intrcheck.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,15 @@ + +#ifndef Py_INTRCHECK_H +#define Py_INTRCHECK_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(int) PyOS_InterruptOccurred(void); +PyAPI_FUNC(void) PyOS_InitInterrupts(void); +PyAPI_FUNC(void) PyOS_AfterFork(void); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTRCHECK_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/iterobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,23 @@ +#ifndef Py_ITEROBJECT_H +#define Py_ITEROBJECT_H +/* Iterators (the basic kind, over a sequence) */ +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PySeqIter_Type; + +#define PySeqIter_Check(op) (Py_TYPE(op) == &PySeqIter_Type) + +PyAPI_FUNC(PyObject *) PySeqIter_New(PyObject *); + +PyAPI_DATA(PyTypeObject) PyCallIter_Type; + +#define PyCallIter_Check(op) (Py_TYPE(op) == &PyCallIter_Type) + +PyAPI_FUNC(PyObject *) PyCallIter_New(PyObject *, PyObject *); +#ifdef __cplusplus +} +#endif +#endif /* !Py_ITEROBJECT_H */ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/listobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,68 @@ + +/* List object interface */ + +/* +Another generally useful object type is a list of object pointers. +This is a mutable type: the list items can be changed, and items can be +added or removed. Out-of-range indices or non-list objects are ignored. + +*** WARNING *** PyList_SetItem does not increment the new item's reference +count, but does decrement the reference count of the item it replaces, +if not nil. It does *decrement* the reference count if it is *not* +inserted in the list. Similarly, PyList_GetItem does not increment the +returned item's reference count. +*/ + +#ifndef Py_LISTOBJECT_H +#define Py_LISTOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + PyObject_VAR_HEAD + /* Vector of pointers to list elements. list[0] is ob_item[0], etc. */ + PyObject **ob_item; + + /* ob_item contains space for 'allocated' elements. The number + * currently in use is ob_size. + * Invariants: + * 0 <= ob_size <= allocated + * len(list) == ob_size + * ob_item == NULL implies ob_size == allocated == 0 + * list.sort() temporarily sets allocated to -1 to detect mutations. + * + * Items must normally not be NULL, except during construction when + * the list is not yet visible outside the function that builds it. + */ + Py_ssize_t allocated; +} PyListObject; + +PyAPI_DATA(PyTypeObject) PyList_Type; + +#define PyList_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_LIST_SUBCLASS) +#define PyList_CheckExact(op) (Py_TYPE(op) == &PyList_Type) + +PyAPI_FUNC(PyObject *) PyList_New(Py_ssize_t size); +PyAPI_FUNC(Py_ssize_t) PyList_Size(PyObject *); +PyAPI_FUNC(PyObject *) PyList_GetItem(PyObject *, Py_ssize_t); +PyAPI_FUNC(int) PyList_SetItem(PyObject *, Py_ssize_t, PyObject *); +PyAPI_FUNC(int) PyList_Insert(PyObject *, Py_ssize_t, PyObject *); +PyAPI_FUNC(int) PyList_Append(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyList_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t); +PyAPI_FUNC(int) PyList_SetSlice(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *); +PyAPI_FUNC(int) PyList_Sort(PyObject *); +PyAPI_FUNC(int) PyList_Reverse(PyObject *); +PyAPI_FUNC(PyObject *) PyList_AsTuple(PyObject *); +PyAPI_FUNC(PyObject *) _PyList_Extend(PyListObject *, PyObject *); + +/* Macro, trading safety for speed */ +#define PyList_GET_ITEM(op, i) (((PyListObject *)(op))->ob_item[i]) +#define PyList_SET_ITEM(op, i, v) (((PyListObject *)(op))->ob_item[i] = (v)) +#define PyList_GET_SIZE(op) Py_SIZE(op) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_LISTOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/longintrepr.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,103 @@ +#ifndef Py_LONGINTREPR_H +#define Py_LONGINTREPR_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* This is published for the benefit of "friend" marshal.c only. */ + +/* Parameters of the long integer representation. There are two different + sets of parameters: one set for 30-bit digits, stored in an unsigned 32-bit + integer type, and one set for 15-bit digits with each digit stored in an + unsigned short. The value of PYLONG_BITS_IN_DIGIT, defined either at + configure time or in pyport.h, is used to decide which digit size to use. + + Type 'digit' should be able to hold 2*PyLong_BASE-1, and type 'twodigits' + should be an unsigned integer type able to hold all integers up to + PyLong_BASE*PyLong_BASE-1. x_sub assumes that 'digit' is an unsigned type, + and that overflow is handled by taking the result modulo 2**N for some N > + PyLong_SHIFT. The majority of the code doesn't care about the precise + value of PyLong_SHIFT, but there are some notable exceptions: + + - long_pow() requires that PyLong_SHIFT be divisible by 5 + + - PyLong_{As,From}ByteArray require that PyLong_SHIFT be at least 8 + + - long_hash() requires that PyLong_SHIFT is *strictly* less than the number + of bits in an unsigned long, as do the PyLong <-> long (or unsigned long) + conversion functions + + - the long <-> size_t/Py_ssize_t conversion functions expect that + PyLong_SHIFT is strictly less than the number of bits in a size_t + + - the marshal code currently expects that PyLong_SHIFT is a multiple of 15 + + The values 15 and 30 should fit all of the above requirements, on any + platform. +*/ + +#if PYLONG_BITS_IN_DIGIT == 30 +#if !(defined HAVE_UINT64_T && defined HAVE_UINT32_T && \ + defined HAVE_INT64_T && defined HAVE_INT32_T) +#error "30-bit long digits requested, but the necessary types are not available on this platform" +#endif +typedef PY_UINT32_T digit; +typedef PY_INT32_T sdigit; /* signed variant of digit */ +typedef PY_UINT64_T twodigits; +typedef PY_INT64_T stwodigits; /* signed variant of twodigits */ +#define PyLong_SHIFT 30 +#define _PyLong_DECIMAL_SHIFT 9 /* max(e such that 10**e fits in a digit) */ +#define _PyLong_DECIMAL_BASE ((digit)1000000000) /* 10 ** DECIMAL_SHIFT */ +#elif PYLONG_BITS_IN_DIGIT == 15 +typedef unsigned short digit; +typedef short sdigit; /* signed variant of digit */ +typedef unsigned long twodigits; +typedef long stwodigits; /* signed variant of twodigits */ +#define PyLong_SHIFT 15 +#define _PyLong_DECIMAL_SHIFT 4 /* max(e such that 10**e fits in a digit) */ +#define _PyLong_DECIMAL_BASE ((digit)10000) /* 10 ** DECIMAL_SHIFT */ +#else +#error "PYLONG_BITS_IN_DIGIT should be 15 or 30" +#endif +#define PyLong_BASE ((digit)1 << PyLong_SHIFT) +#define PyLong_MASK ((digit)(PyLong_BASE - 1)) + +/* b/w compatibility with Python 2.5 */ +#define SHIFT PyLong_SHIFT +#define BASE PyLong_BASE +#define MASK PyLong_MASK + +#if PyLong_SHIFT % 5 != 0 +#error "longobject.c requires that PyLong_SHIFT be divisible by 5" +#endif + +/* Long integer representation. + The absolute value of a number is equal to + SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) + Negative numbers are represented with ob_size < 0; + zero is represented by ob_size == 0. + In a normalized number, ob_digit[abs(ob_size)-1] (the most significant + digit) is never zero. Also, in all cases, for all valid i, + 0 <= ob_digit[i] <= MASK. + The allocation function takes care of allocating extra memory + so that ob_digit[0] ... ob_digit[abs(ob_size)-1] are actually available. + + CAUTION: Generic code manipulating subtypes of PyVarObject has to + aware that longs abuse ob_size's sign bit. +*/ + +struct _longobject { + PyObject_VAR_HEAD + digit ob_digit[1]; +}; + +PyAPI_FUNC(PyLongObject *) _PyLong_New(Py_ssize_t); + +/* Return a copy of src. */ +PyAPI_FUNC(PyObject *) _PyLong_Copy(PyLongObject *src); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_LONGINTREPR_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/longobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,135 @@ +#ifndef Py_LONGOBJECT_H +#define Py_LONGOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Long (arbitrary precision) integer object interface */ + +typedef struct _longobject PyLongObject; /* Revealed in longintrepr.h */ + +PyAPI_DATA(PyTypeObject) PyLong_Type; + +#define PyLong_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_LONG_SUBCLASS) +#define PyLong_CheckExact(op) (Py_TYPE(op) == &PyLong_Type) + +PyAPI_FUNC(PyObject *) PyLong_FromLong(long); +PyAPI_FUNC(PyObject *) PyLong_FromUnsignedLong(unsigned long); +PyAPI_FUNC(PyObject *) PyLong_FromDouble(double); +PyAPI_FUNC(PyObject *) PyLong_FromSize_t(size_t); +PyAPI_FUNC(PyObject *) PyLong_FromSsize_t(Py_ssize_t); +PyAPI_FUNC(long) PyLong_AsLong(PyObject *); +PyAPI_FUNC(long) PyLong_AsLongAndOverflow(PyObject *, int *); +PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLong(PyObject *); +PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLongMask(PyObject *); +PyAPI_FUNC(Py_ssize_t) PyLong_AsSsize_t(PyObject *); +PyAPI_FUNC(int) _PyLong_AsInt(PyObject *); +PyAPI_FUNC(PyObject *) PyLong_GetInfo(void); + +/* For use by intobject.c only */ +#define _PyLong_AsSsize_t PyLong_AsSsize_t +#define _PyLong_FromSize_t PyLong_FromSize_t +#define _PyLong_FromSsize_t PyLong_FromSsize_t +PyAPI_DATA(int) _PyLong_DigitValue[256]; + +/* _PyLong_Frexp returns a double x and an exponent e such that the + true value is approximately equal to x * 2**e. e is >= 0. x is + 0.0 if and only if the input is 0 (in which case, e and x are both + zeroes); otherwise, 0.5 <= abs(x) < 1.0. On overflow, which is + possible if the number of bits doesn't fit into a Py_ssize_t, sets + OverflowError and returns -1.0 for x, 0 for e. */ +PyAPI_FUNC(double) _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e); + +PyAPI_FUNC(double) PyLong_AsDouble(PyObject *); +PyAPI_FUNC(PyObject *) PyLong_FromVoidPtr(void *); +PyAPI_FUNC(void *) PyLong_AsVoidPtr(PyObject *); + +#ifdef HAVE_LONG_LONG +PyAPI_FUNC(PyObject *) PyLong_FromLongLong(PY_LONG_LONG); +PyAPI_FUNC(PyObject *) PyLong_FromUnsignedLongLong(unsigned PY_LONG_LONG); +PyAPI_FUNC(PY_LONG_LONG) PyLong_AsLongLong(PyObject *); +PyAPI_FUNC(unsigned PY_LONG_LONG) PyLong_AsUnsignedLongLong(PyObject *); +PyAPI_FUNC(unsigned PY_LONG_LONG) PyLong_AsUnsignedLongLongMask(PyObject *); +PyAPI_FUNC(PY_LONG_LONG) PyLong_AsLongLongAndOverflow(PyObject *, int *); +#endif /* HAVE_LONG_LONG */ + +PyAPI_FUNC(PyObject *) PyLong_FromString(char *, char **, int); +#ifdef Py_USING_UNICODE +PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int); +#endif + +/* _PyLong_Sign. Return 0 if v is 0, -1 if v < 0, +1 if v > 0. + v must not be NULL, and must be a normalized long. + There are no error cases. +*/ +PyAPI_FUNC(int) _PyLong_Sign(PyObject *v); + + +/* _PyLong_NumBits. Return the number of bits needed to represent the + absolute value of a long. For example, this returns 1 for 1 and -1, 2 + for 2 and -2, and 2 for 3 and -3. It returns 0 for 0. + v must not be NULL, and must be a normalized long. + (size_t)-1 is returned and OverflowError set if the true result doesn't + fit in a size_t. +*/ +PyAPI_FUNC(size_t) _PyLong_NumBits(PyObject *v); + +/* _PyLong_FromByteArray: View the n unsigned bytes as a binary integer in + base 256, and return a Python long with the same numeric value. + If n is 0, the integer is 0. Else: + If little_endian is 1/true, bytes[n-1] is the MSB and bytes[0] the LSB; + else (little_endian is 0/false) bytes[0] is the MSB and bytes[n-1] the + LSB. + If is_signed is 0/false, view the bytes as a non-negative integer. + If is_signed is 1/true, view the bytes as a 2's-complement integer, + non-negative if bit 0x80 of the MSB is clear, negative if set. + Error returns: + + Return NULL with the appropriate exception set if there's not + enough memory to create the Python long. +*/ +PyAPI_FUNC(PyObject *) _PyLong_FromByteArray( + const unsigned char* bytes, size_t n, + int little_endian, int is_signed); + +/* _PyLong_AsByteArray: Convert the least-significant 8*n bits of long + v to a base-256 integer, stored in array bytes. Normally return 0, + return -1 on error. + If little_endian is 1/true, store the MSB at bytes[n-1] and the LSB at + bytes[0]; else (little_endian is 0/false) store the MSB at bytes[0] and + the LSB at bytes[n-1]. + If is_signed is 0/false, it's an error if v < 0; else (v >= 0) n bytes + are filled and there's nothing special about bit 0x80 of the MSB. + If is_signed is 1/true, bytes is filled with the 2's-complement + representation of v's value. Bit 0x80 of the MSB is the sign bit. + Error returns (-1): + + is_signed is 0 and v < 0. TypeError is set in this case, and bytes + isn't altered. + + n isn't big enough to hold the full mathematical value of v. For + example, if is_signed is 0 and there are more digits in the v than + fit in n; or if is_signed is 1, v < 0, and n is just 1 bit shy of + being large enough to hold a sign bit. OverflowError is set in this + case, but bytes holds the least-signficant n bytes of the true value. +*/ +PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v, + unsigned char* bytes, size_t n, + int little_endian, int is_signed); + +/* _PyLong_Format: Convert the long to a string object with given base, + appending a base prefix of 0[box] if base is 2, 8 or 16. + Add a trailing "L" if addL is non-zero. + If newstyle is zero, then use the pre-2.6 behavior of octal having + a leading "0", instead of the prefix "0o" */ +PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base, int addL, int newstyle); + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, + char *format_spec, + Py_ssize_t format_spec_len); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_LONGOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/marshal.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,25 @@ + +/* Interface for marshal.c */ + +#ifndef Py_MARSHAL_H +#define Py_MARSHAL_H +#ifdef __cplusplus +extern "C" { +#endif + +#define Py_MARSHAL_VERSION 2 + +PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *, int); +PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *, int); +PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *, int); + +PyAPI_FUNC(long) PyMarshal_ReadLongFromFile(FILE *); +PyAPI_FUNC(int) PyMarshal_ReadShortFromFile(FILE *); +PyAPI_FUNC(PyObject *) PyMarshal_ReadObjectFromFile(FILE *); +PyAPI_FUNC(PyObject *) PyMarshal_ReadLastObjectFromFile(FILE *); +PyAPI_FUNC(PyObject *) PyMarshal_ReadObjectFromString(char *, Py_ssize_t); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_MARSHAL_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/memoryobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,74 @@ +/* Memory view object. In Python this is available as "memoryview". */ + +#ifndef Py_MEMORYOBJECT_H +#define Py_MEMORYOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyMemoryView_Type; + +#define PyMemoryView_Check(op) (Py_TYPE(op) == &PyMemoryView_Type) + +/* Get a pointer to the underlying Py_buffer of a memoryview object. */ +#define PyMemoryView_GET_BUFFER(op) (&((PyMemoryViewObject *)(op))->view) +/* Get a pointer to the PyObject from which originates a memoryview object. */ +#define PyMemoryView_GET_BASE(op) (((PyMemoryViewObject *)(op))->view.obj) + + +PyAPI_FUNC(PyObject *) PyMemoryView_GetContiguous(PyObject *base, + int buffertype, + char fort); + + /* Return a contiguous chunk of memory representing the buffer + from an object in a memory view object. If a copy is made then the + base object for the memory view will be a *new* bytes object. + + Otherwise, the base-object will be the object itself and no + data-copying will be done. + + The buffertype argument can be PyBUF_READ, PyBUF_WRITE, + PyBUF_SHADOW to determine whether the returned buffer + should be READONLY, WRITABLE, or set to update the + original buffer if a copy must be made. If buffertype is + PyBUF_WRITE and the buffer is not contiguous an error will + be raised. In this circumstance, the user can use + PyBUF_SHADOW to ensure that a writable temporary + contiguous buffer is returned. The contents of this + contiguous buffer will be copied back into the original + object after the memoryview object is deleted as long as + the original object is writable and allows setting an + exclusive write lock. If this is not allowed by the + original object, then a BufferError is raised. + + If the object is multi-dimensional and if fortran is 'F', + the first dimension of the underlying array will vary the + fastest in the buffer. If fortran is 'C', then the last + dimension will vary the fastest (C-style contiguous). If + fortran is 'A', then it does not matter and you will get + whatever the object decides is more efficient. + + A new reference is returned that must be DECREF'd when finished. + */ + +PyAPI_FUNC(PyObject *) PyMemoryView_FromObject(PyObject *base); + +PyAPI_FUNC(PyObject *) PyMemoryView_FromBuffer(Py_buffer *info); + /* create new if bufptr is NULL + will be a new bytesobject in base */ + + +/* The struct is declared here so that macros can work, but it shouldn't + be considered public. Don't access those fields directly, use the macros + and functions instead! */ +typedef struct { + PyObject_HEAD + PyObject *base; + Py_buffer view; +} PyMemoryViewObject; + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_MEMORYOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/metagrammar.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,18 @@ +#ifndef Py_METAGRAMMAR_H +#define Py_METAGRAMMAR_H +#ifdef __cplusplus +extern "C" { +#endif + + +#define MSTART 256 +#define RULE 257 +#define RHS 258 +#define ALT 259 +#define ITEM 260 +#define ATOM 261 + +#ifdef __cplusplus +} +#endif +#endif /* !Py_METAGRAMMAR_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/methodobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,93 @@ + +/* Method object interface */ + +#ifndef Py_METHODOBJECT_H +#define Py_METHODOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* This is about the type 'builtin_function_or_method', + not Python methods in user-defined classes. See classobject.h + for the latter. */ + +PyAPI_DATA(PyTypeObject) PyCFunction_Type; + +#define PyCFunction_Check(op) (Py_TYPE(op) == &PyCFunction_Type) + +typedef PyObject *(*PyCFunction)(PyObject *, PyObject *); +typedef PyObject *(*PyCFunctionWithKeywords)(PyObject *, PyObject *, + PyObject *); +typedef PyObject *(*PyNoArgsFunction)(PyObject *); + +PyAPI_FUNC(PyCFunction) PyCFunction_GetFunction(PyObject *); +PyAPI_FUNC(PyObject *) PyCFunction_GetSelf(PyObject *); +PyAPI_FUNC(int) PyCFunction_GetFlags(PyObject *); + +/* Macros for direct access to these values. Type checks are *not* + done, so use with care. */ +#define PyCFunction_GET_FUNCTION(func) \ + (((PyCFunctionObject *)func) -> m_ml -> ml_meth) +#define PyCFunction_GET_SELF(func) \ + (((PyCFunctionObject *)func) -> m_self) +#define PyCFunction_GET_FLAGS(func) \ + (((PyCFunctionObject *)func) -> m_ml -> ml_flags) +PyAPI_FUNC(PyObject *) PyCFunction_Call(PyObject *, PyObject *, PyObject *); + +struct PyMethodDef { + const char *ml_name; /* The name of the built-in function/method */ + PyCFunction ml_meth; /* The C function that implements it */ + int ml_flags; /* Combination of METH_xxx flags, which mostly + describe the args expected by the C func */ + const char *ml_doc; /* The __doc__ attribute, or NULL */ +}; +typedef struct PyMethodDef PyMethodDef; + +PyAPI_FUNC(PyObject *) Py_FindMethod(PyMethodDef[], PyObject *, const char *); + +#define PyCFunction_New(ML, SELF) PyCFunction_NewEx((ML), (SELF), NULL) +PyAPI_FUNC(PyObject *) PyCFunction_NewEx(PyMethodDef *, PyObject *, + PyObject *); + +/* Flag passed to newmethodobject */ +#define METH_OLDARGS 0x0000 +#define METH_VARARGS 0x0001 +#define METH_KEYWORDS 0x0002 +/* METH_NOARGS and METH_O must not be combined with the flags above. */ +#define METH_NOARGS 0x0004 +#define METH_O 0x0008 + +/* METH_CLASS and METH_STATIC are a little different; these control + the construction of methods for a class. These cannot be used for + functions in modules. */ +#define METH_CLASS 0x0010 +#define METH_STATIC 0x0020 + +/* METH_COEXIST allows a method to be entered eventhough a slot has + already filled the entry. When defined, the flag allows a separate + method, "__contains__" for example, to coexist with a defined + slot like sq_contains. */ + +#define METH_COEXIST 0x0040 + +typedef struct PyMethodChain { + PyMethodDef *methods; /* Methods of this type */ + struct PyMethodChain *link; /* NULL or base type */ +} PyMethodChain; + +PyAPI_FUNC(PyObject *) Py_FindMethodInChain(PyMethodChain *, PyObject *, + const char *); + +typedef struct { + PyObject_HEAD + PyMethodDef *m_ml; /* Description of the C function to call */ + PyObject *m_self; /* Passed as 'self' arg to the C func, can be NULL */ + PyObject *m_module; /* The __module__ attribute, can be anything */ +} PyCFunctionObject; + +PyAPI_FUNC(int) PyCFunction_ClearFreeList(void); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_METHODOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/modsupport.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,134 @@ + +#ifndef Py_MODSUPPORT_H +#define Py_MODSUPPORT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Module support interface */ + +#include <stdarg.h> + +/* If PY_SSIZE_T_CLEAN is defined, each functions treats #-specifier + to mean Py_ssize_t */ +#ifdef PY_SSIZE_T_CLEAN +#define PyArg_Parse _PyArg_Parse_SizeT +#define PyArg_ParseTuple _PyArg_ParseTuple_SizeT +#define PyArg_ParseTupleAndKeywords _PyArg_ParseTupleAndKeywords_SizeT +#define PyArg_VaParse _PyArg_VaParse_SizeT +#define PyArg_VaParseTupleAndKeywords _PyArg_VaParseTupleAndKeywords_SizeT +#define Py_BuildValue _Py_BuildValue_SizeT +#define Py_VaBuildValue _Py_VaBuildValue_SizeT +#else +PyAPI_FUNC(PyObject *) _Py_VaBuildValue_SizeT(const char *, va_list); +#endif + +PyAPI_FUNC(int) PyArg_Parse(PyObject *, const char *, ...); +PyAPI_FUNC(int) PyArg_ParseTuple(PyObject *, const char *, ...) Py_FORMAT_PARSETUPLE(PyArg_ParseTuple, 2, 3); +PyAPI_FUNC(int) PyArg_ParseTupleAndKeywords(PyObject *, PyObject *, + const char *, char **, ...); +PyAPI_FUNC(int) PyArg_UnpackTuple(PyObject *, const char *, Py_ssize_t, Py_ssize_t, ...); +PyAPI_FUNC(PyObject *) Py_BuildValue(const char *, ...); +PyAPI_FUNC(PyObject *) _Py_BuildValue_SizeT(const char *, ...); +PyAPI_FUNC(int) _PyArg_NoKeywords(const char *funcname, PyObject *kw); + +PyAPI_FUNC(int) PyArg_VaParse(PyObject *, const char *, va_list); +PyAPI_FUNC(int) PyArg_VaParseTupleAndKeywords(PyObject *, PyObject *, + const char *, char **, va_list); +PyAPI_FUNC(PyObject *) Py_VaBuildValue(const char *, va_list); + +PyAPI_FUNC(int) PyModule_AddObject(PyObject *, const char *, PyObject *); +PyAPI_FUNC(int) PyModule_AddIntConstant(PyObject *, const char *, long); +PyAPI_FUNC(int) PyModule_AddStringConstant(PyObject *, const char *, const char *); +#define PyModule_AddIntMacro(m, c) PyModule_AddIntConstant(m, #c, c) +#define PyModule_AddStringMacro(m, c) PyModule_AddStringConstant(m, #c, c) + +#define PYTHON_API_VERSION 1013 +#define PYTHON_API_STRING "1013" +/* The API version is maintained (independently from the Python version) + so we can detect mismatches between the interpreter and dynamically + loaded modules. These are diagnosed by an error message but + the module is still loaded (because the mismatch can only be tested + after loading the module). The error message is intended to + explain the core dump a few seconds later. + + The symbol PYTHON_API_STRING defines the same value as a string + literal. *** PLEASE MAKE SURE THE DEFINITIONS MATCH. *** + + Please add a line or two to the top of this log for each API + version change: + + 22-Feb-2006 MvL 1013 PEP 353 - long indices for sequence lengths + + 19-Aug-2002 GvR 1012 Changes to string object struct for + interning changes, saving 3 bytes. + + 17-Jul-2001 GvR 1011 Descr-branch, just to be on the safe side + + 25-Jan-2001 FLD 1010 Parameters added to PyCode_New() and + PyFrame_New(); Python 2.1a2 + + 14-Mar-2000 GvR 1009 Unicode API added + + 3-Jan-1999 GvR 1007 Decided to change back! (Don't reuse 1008!) + + 3-Dec-1998 GvR 1008 Python 1.5.2b1 + + 18-Jan-1997 GvR 1007 string interning and other speedups + + 11-Oct-1996 GvR renamed Py_Ellipses to Py_Ellipsis :-( + + 30-Jul-1996 GvR Slice and ellipses syntax added + + 23-Jul-1996 GvR For 1.4 -- better safe than sorry this time :-) + + 7-Nov-1995 GvR Keyword arguments (should've been done at 1.3 :-( ) + + 10-Jan-1995 GvR Renamed globals to new naming scheme + + 9-Jan-1995 GvR Initial version (incompatible with older API) +*/ + +#ifdef MS_WINDOWS +/* Special defines for Windows versions used to live here. Things + have changed, and the "Version" is now in a global string variable. + Reason for this is that this for easier branding of a "custom DLL" + without actually needing a recompile. */ +#endif /* MS_WINDOWS */ + +#if SIZEOF_SIZE_T != SIZEOF_INT +/* On a 64-bit system, rename the Py_InitModule4 so that 2.4 + modules cannot get loaded into a 2.5 interpreter */ +#define Py_InitModule4 Py_InitModule4_64 +#endif + +#ifdef Py_TRACE_REFS + /* When we are tracing reference counts, rename Py_InitModule4 so + modules compiled with incompatible settings will generate a + link-time error. */ + #if SIZEOF_SIZE_T != SIZEOF_INT + #undef Py_InitModule4 + #define Py_InitModule4 Py_InitModule4TraceRefs_64 + #else + #define Py_InitModule4 Py_InitModule4TraceRefs + #endif +#endif + +PyAPI_FUNC(PyObject *) Py_InitModule4(const char *name, PyMethodDef *methods, + const char *doc, PyObject *self, + int apiver); + +#define Py_InitModule(name, methods) \ + Py_InitModule4(name, methods, (char *)NULL, (PyObject *)NULL, \ + PYTHON_API_VERSION) + +#define Py_InitModule3(name, methods, doc) \ + Py_InitModule4(name, methods, doc, (PyObject *)NULL, \ + PYTHON_API_VERSION) + +PyAPI_DATA(char *) _Py_PackageContext; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_MODSUPPORT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/moduleobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,24 @@ + +/* Module object interface */ + +#ifndef Py_MODULEOBJECT_H +#define Py_MODULEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyModule_Type; + +#define PyModule_Check(op) PyObject_TypeCheck(op, &PyModule_Type) +#define PyModule_CheckExact(op) (Py_TYPE(op) == &PyModule_Type) + +PyAPI_FUNC(PyObject *) PyModule_New(const char *); +PyAPI_FUNC(PyObject *) PyModule_GetDict(PyObject *); +PyAPI_FUNC(char *) PyModule_GetName(PyObject *); +PyAPI_FUNC(char *) PyModule_GetFilename(PyObject *); +PyAPI_FUNC(void) _PyModule_Clear(PyObject *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_MODULEOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/node.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,41 @@ + +/* Parse tree node interface */ + +#ifndef Py_NODE_H +#define Py_NODE_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _node { + short n_type; + char *n_str; + int n_lineno; + int n_col_offset; + int n_nchildren; + struct _node *n_child; +} node; + +PyAPI_FUNC(node *) PyNode_New(int type); +PyAPI_FUNC(int) PyNode_AddChild(node *n, int type, + char *str, int lineno, int col_offset); +PyAPI_FUNC(void) PyNode_Free(node *n); +PyAPI_FUNC(Py_ssize_t) _PyNode_SizeOf(node *n); + +/* Node access functions */ +#define NCH(n) ((n)->n_nchildren) + +#define CHILD(n, i) (&(n)->n_child[i]) +#define RCHILD(n, i) (CHILD(n, NCH(n) + i)) +#define TYPE(n) ((n)->n_type) +#define STR(n) ((n)->n_str) + +/* Assert that the type of a node is what we expect */ +#define REQ(n, type) assert(TYPE(n) == (type)) + +PyAPI_FUNC(void) PyNode_ListTree(node *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_NODE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/object.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,1046 @@ +#ifndef Py_OBJECT_H +#define Py_OBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Object and type object interface */ + +/* +Objects are structures allocated on the heap. Special rules apply to +the use of objects to ensure they are properly garbage-collected. +Objects are never allocated statically or on the stack; they must be +accessed through special macros and functions only. (Type objects are +exceptions to the first rule; the standard types are represented by +statically initialized type objects, although work on type/class unification +for Python 2.2 made it possible to have heap-allocated type objects too). + +An object has a 'reference count' that is increased or decreased when a +pointer to the object is copied or deleted; when the reference count +reaches zero there are no references to the object left and it can be +removed from the heap. + +An object has a 'type' that determines what it represents and what kind +of data it contains. An object's type is fixed when it is created. +Types themselves are represented as objects; an object contains a +pointer to the corresponding type object. The type itself has a type +pointer pointing to the object representing the type 'type', which +contains a pointer to itself!). + +Objects do not float around in memory; once allocated an object keeps +the same size and address. Objects that must hold variable-size data +can contain pointers to variable-size parts of the object. Not all +objects of the same type have the same size; but the size cannot change +after allocation. (These restrictions are made so a reference to an +object can be simply a pointer -- moving an object would require +updating all the pointers, and changing an object's size would require +moving it if there was another object right next to it.) + +Objects are always accessed through pointers of the type 'PyObject *'. +The type 'PyObject' is a structure that only contains the reference count +and the type pointer. The actual memory allocated for an object +contains other data that can only be accessed after casting the pointer +to a pointer to a longer structure type. This longer type must start +with the reference count and type fields; the macro PyObject_HEAD should be +used for this (to accommodate for future changes). The implementation +of a particular object type can cast the object pointer to the proper +type and back. + +A standard interface exists for objects that contain an array of items +whose size is determined when the object is allocated. +*/ + +/* Py_DEBUG implies Py_TRACE_REFS. */ +#if defined(Py_DEBUG) && !defined(Py_TRACE_REFS) +#define Py_TRACE_REFS +#endif + +/* Py_TRACE_REFS implies Py_REF_DEBUG. */ +#if defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) +#define Py_REF_DEBUG +#endif + +#ifdef Py_TRACE_REFS +/* Define pointers to support a doubly-linked list of all live heap objects. */ +#define _PyObject_HEAD_EXTRA \ + struct _object *_ob_next; \ + struct _object *_ob_prev; + +#define _PyObject_EXTRA_INIT 0, 0, + +#else +#define _PyObject_HEAD_EXTRA +#define _PyObject_EXTRA_INIT +#endif + +/* PyObject_HEAD defines the initial segment of every PyObject. */ +#define PyObject_HEAD \ + _PyObject_HEAD_EXTRA \ + Py_ssize_t ob_refcnt; \ + struct _typeobject *ob_type; + +#define PyObject_HEAD_INIT(type) \ + _PyObject_EXTRA_INIT \ + 1, type, + +#define PyVarObject_HEAD_INIT(type, size) \ + PyObject_HEAD_INIT(type) size, + +/* PyObject_VAR_HEAD defines the initial segment of all variable-size + * container objects. These end with a declaration of an array with 1 + * element, but enough space is malloc'ed so that the array actually + * has room for ob_size elements. Note that ob_size is an element count, + * not necessarily a byte count. + */ +#define PyObject_VAR_HEAD \ + PyObject_HEAD \ + Py_ssize_t ob_size; /* Number of items in variable part */ +#define Py_INVALID_SIZE (Py_ssize_t)-1 + +/* Nothing is actually declared to be a PyObject, but every pointer to + * a Python object can be cast to a PyObject*. This is inheritance built + * by hand. Similarly every pointer to a variable-size Python object can, + * in addition, be cast to PyVarObject*. + */ +typedef struct _object { + PyObject_HEAD +} PyObject; + +typedef struct { + PyObject_VAR_HEAD +} PyVarObject; + +#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) + +/* +Type objects contain a string containing the type name (to help somewhat +in debugging), the allocation parameters (see PyObject_New() and +PyObject_NewVar()), +and methods for accessing objects of the type. Methods are optional, a +nil pointer meaning that particular kind of access is not available for +this type. The Py_DECREF() macro uses the tp_dealloc method without +checking for a nil pointer; it should always be implemented except if +the implementation can guarantee that the reference count will never +reach zero (e.g., for statically allocated type objects). + +NB: the methods for certain type groups are now contained in separate +method blocks. +*/ + +typedef PyObject * (*unaryfunc)(PyObject *); +typedef PyObject * (*binaryfunc)(PyObject *, PyObject *); +typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); +typedef int (*inquiry)(PyObject *); +typedef Py_ssize_t (*lenfunc)(PyObject *); +typedef int (*coercion)(PyObject **, PyObject **); +typedef PyObject *(*intargfunc)(PyObject *, int) Py_DEPRECATED(2.5); +typedef PyObject *(*intintargfunc)(PyObject *, int, int) Py_DEPRECATED(2.5); +typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t); +typedef PyObject *(*ssizessizeargfunc)(PyObject *, Py_ssize_t, Py_ssize_t); +typedef int(*intobjargproc)(PyObject *, int, PyObject *); +typedef int(*intintobjargproc)(PyObject *, int, int, PyObject *); +typedef int(*ssizeobjargproc)(PyObject *, Py_ssize_t, PyObject *); +typedef int(*ssizessizeobjargproc)(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *); +typedef int(*objobjargproc)(PyObject *, PyObject *, PyObject *); + + + +/* int-based buffer interface */ +typedef int (*getreadbufferproc)(PyObject *, int, void **); +typedef int (*getwritebufferproc)(PyObject *, int, void **); +typedef int (*getsegcountproc)(PyObject *, int *); +typedef int (*getcharbufferproc)(PyObject *, int, char **); +/* ssize_t-based buffer interface */ +typedef Py_ssize_t (*readbufferproc)(PyObject *, Py_ssize_t, void **); +typedef Py_ssize_t (*writebufferproc)(PyObject *, Py_ssize_t, void **); +typedef Py_ssize_t (*segcountproc)(PyObject *, Py_ssize_t *); +typedef Py_ssize_t (*charbufferproc)(PyObject *, Py_ssize_t, char **); + + +/* Py3k buffer interface */ +typedef struct bufferinfo { + void *buf; + PyObject *obj; /* owned reference */ + Py_ssize_t len; + Py_ssize_t itemsize; /* This is Py_ssize_t so it can be + pointed to by strides in simple case.*/ + int readonly; + int ndim; + char *format; + Py_ssize_t *shape; + Py_ssize_t *strides; + Py_ssize_t *suboffsets; + Py_ssize_t smalltable[2]; /* static store for shape and strides of + mono-dimensional buffers. */ + void *internal; +} Py_buffer; + +typedef int (*getbufferproc)(PyObject *, Py_buffer *, int); +typedef void (*releasebufferproc)(PyObject *, Py_buffer *); + + /* Flags for getting buffers */ +#define PyBUF_SIMPLE 0 +#define PyBUF_WRITABLE 0x0001 +/* we used to include an E, backwards compatible alias */ +#define PyBUF_WRITEABLE PyBUF_WRITABLE +#define PyBUF_FORMAT 0x0004 +#define PyBUF_ND 0x0008 +#define PyBUF_STRIDES (0x0010 | PyBUF_ND) +#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES) +#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES) +#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES) +#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES) + +#define PyBUF_CONTIG (PyBUF_ND | PyBUF_WRITABLE) +#define PyBUF_CONTIG_RO (PyBUF_ND) + +#define PyBUF_STRIDED (PyBUF_STRIDES | PyBUF_WRITABLE) +#define PyBUF_STRIDED_RO (PyBUF_STRIDES) + +#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_WRITABLE | PyBUF_FORMAT) +#define PyBUF_RECORDS_RO (PyBUF_STRIDES | PyBUF_FORMAT) + +#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_WRITABLE | PyBUF_FORMAT) +#define PyBUF_FULL_RO (PyBUF_INDIRECT | PyBUF_FORMAT) + + +#define PyBUF_READ 0x100 +#define PyBUF_WRITE 0x200 +#define PyBUF_SHADOW 0x400 +/* end Py3k buffer interface */ + +typedef int (*objobjproc)(PyObject *, PyObject *); +typedef int (*visitproc)(PyObject *, void *); +typedef int (*traverseproc)(PyObject *, visitproc, void *); + +typedef struct { + /* For numbers without flag bit Py_TPFLAGS_CHECKTYPES set, all + arguments are guaranteed to be of the object's type (modulo + coercion hacks -- i.e. if the type's coercion function + returns other types, then these are allowed as well). Numbers that + have the Py_TPFLAGS_CHECKTYPES flag bit set should check *both* + arguments for proper type and implement the necessary conversions + in the slot functions themselves. */ + + binaryfunc nb_add; + binaryfunc nb_subtract; + binaryfunc nb_multiply; + binaryfunc nb_divide; + binaryfunc nb_remainder; + binaryfunc nb_divmod; + ternaryfunc nb_power; + unaryfunc nb_negative; + unaryfunc nb_positive; + unaryfunc nb_absolute; + inquiry nb_nonzero; + unaryfunc nb_invert; + binaryfunc nb_lshift; + binaryfunc nb_rshift; + binaryfunc nb_and; + binaryfunc nb_xor; + binaryfunc nb_or; + coercion nb_coerce; + unaryfunc nb_int; + unaryfunc nb_long; + unaryfunc nb_float; + unaryfunc nb_oct; + unaryfunc nb_hex; + /* Added in release 2.0 */ + binaryfunc nb_inplace_add; + binaryfunc nb_inplace_subtract; + binaryfunc nb_inplace_multiply; + binaryfunc nb_inplace_divide; + binaryfunc nb_inplace_remainder; + ternaryfunc nb_inplace_power; + binaryfunc nb_inplace_lshift; + binaryfunc nb_inplace_rshift; + binaryfunc nb_inplace_and; + binaryfunc nb_inplace_xor; + binaryfunc nb_inplace_or; + + /* Added in release 2.2 */ + /* The following require the Py_TPFLAGS_HAVE_CLASS flag */ + binaryfunc nb_floor_divide; + binaryfunc nb_true_divide; + binaryfunc nb_inplace_floor_divide; + binaryfunc nb_inplace_true_divide; + + /* Added in release 2.5 */ + unaryfunc nb_index; +} PyNumberMethods; + +typedef struct { + lenfunc sq_length; + binaryfunc sq_concat; + ssizeargfunc sq_repeat; + ssizeargfunc sq_item; + ssizessizeargfunc sq_slice; + ssizeobjargproc sq_ass_item; + ssizessizeobjargproc sq_ass_slice; + objobjproc sq_contains; + /* Added in release 2.0 */ + binaryfunc sq_inplace_concat; + ssizeargfunc sq_inplace_repeat; +} PySequenceMethods; + +typedef struct { + lenfunc mp_length; + binaryfunc mp_subscript; + objobjargproc mp_ass_subscript; +} PyMappingMethods; + +typedef struct { + readbufferproc bf_getreadbuffer; + writebufferproc bf_getwritebuffer; + segcountproc bf_getsegcount; + charbufferproc bf_getcharbuffer; + getbufferproc bf_getbuffer; + releasebufferproc bf_releasebuffer; +} PyBufferProcs; + + +typedef void (*freefunc)(void *); +typedef void (*destructor)(PyObject *); +typedef int (*printfunc)(PyObject *, FILE *, int); +typedef PyObject *(*getattrfunc)(PyObject *, char *); +typedef PyObject *(*getattrofunc)(PyObject *, PyObject *); +typedef int (*setattrfunc)(PyObject *, char *, PyObject *); +typedef int (*setattrofunc)(PyObject *, PyObject *, PyObject *); +typedef int (*cmpfunc)(PyObject *, PyObject *); +typedef PyObject *(*reprfunc)(PyObject *); +typedef long (*hashfunc)(PyObject *); +typedef PyObject *(*richcmpfunc) (PyObject *, PyObject *, int); +typedef PyObject *(*getiterfunc) (PyObject *); +typedef PyObject *(*iternextfunc) (PyObject *); +typedef PyObject *(*descrgetfunc) (PyObject *, PyObject *, PyObject *); +typedef int (*descrsetfunc) (PyObject *, PyObject *, PyObject *); +typedef int (*initproc)(PyObject *, PyObject *, PyObject *); +typedef PyObject *(*newfunc)(struct _typeobject *, PyObject *, PyObject *); +typedef PyObject *(*allocfunc)(struct _typeobject *, Py_ssize_t); + +typedef struct _typeobject { + PyObject_VAR_HEAD + const char *tp_name; /* For printing, in format "<module>.<name>" */ + Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */ + + /* Methods to implement standard operations */ + + destructor tp_dealloc; + printfunc tp_print; + getattrfunc tp_getattr; + setattrfunc tp_setattr; + cmpfunc tp_compare; + reprfunc tp_repr; + + /* Method suites for standard classes */ + + PyNumberMethods *tp_as_number; + PySequenceMethods *tp_as_sequence; + PyMappingMethods *tp_as_mapping; + + /* More standard operations (here for binary compatibility) */ + + hashfunc tp_hash; + ternaryfunc tp_call; + reprfunc tp_str; + getattrofunc tp_getattro; + setattrofunc tp_setattro; + + /* Functions to access object as input/output buffer */ + PyBufferProcs *tp_as_buffer; + + /* Flags to define presence of optional/expanded features */ + long tp_flags; + + const char *tp_doc; /* Documentation string */ + + /* Assigned meaning in release 2.0 */ + /* call function for all accessible objects */ + traverseproc tp_traverse; + + /* delete references to contained objects */ + inquiry tp_clear; + + /* Assigned meaning in release 2.1 */ + /* rich comparisons */ + richcmpfunc tp_richcompare; + + /* weak reference enabler */ + Py_ssize_t tp_weaklistoffset; + + /* Added in release 2.2 */ + /* Iterators */ + getiterfunc tp_iter; + iternextfunc tp_iternext; + + /* Attribute descriptor and subclassing stuff */ + struct PyMethodDef *tp_methods; + struct PyMemberDef *tp_members; + struct PyGetSetDef *tp_getset; + struct _typeobject *tp_base; + PyObject *tp_dict; + descrgetfunc tp_descr_get; + descrsetfunc tp_descr_set; + Py_ssize_t tp_dictoffset; + initproc tp_init; + allocfunc tp_alloc; + newfunc tp_new; + freefunc tp_free; /* Low-level free-memory routine */ + inquiry tp_is_gc; /* For PyObject_IS_GC */ + PyObject *tp_bases; + PyObject *tp_mro; /* method resolution order */ + PyObject *tp_cache; + PyObject *tp_subclasses; + PyObject *tp_weaklist; + destructor tp_del; + + /* Type attribute cache version tag. Added in version 2.6 */ + unsigned int tp_version_tag; + +#ifdef COUNT_ALLOCS + /* these must be last and never explicitly initialized */ + Py_ssize_t tp_allocs; + Py_ssize_t tp_frees; + Py_ssize_t tp_maxalloc; + struct _typeobject *tp_prev; + struct _typeobject *tp_next; +#endif +} PyTypeObject; + + +/* The *real* layout of a type object when allocated on the heap */ +typedef struct _heaptypeobject { + /* Note: there's a dependency on the order of these members + in slotptr() in typeobject.c . */ + PyTypeObject ht_type; + PyNumberMethods as_number; + PyMappingMethods as_mapping; + PySequenceMethods as_sequence; /* as_sequence comes after as_mapping, + so that the mapping wins when both + the mapping and the sequence define + a given operator (e.g. __getitem__). + see add_operators() in typeobject.c . */ + PyBufferProcs as_buffer; + PyObject *ht_name, *ht_slots; + /* here are optional user slots, followed by the members. */ +} PyHeapTypeObject; + +/* access macro to the members which are floating "behind" the object */ +#define PyHeapType_GET_MEMBERS(etype) \ + ((PyMemberDef *)(((char *)etype) + Py_TYPE(etype)->tp_basicsize)) + + +/* Generic type check */ +PyAPI_FUNC(int) PyType_IsSubtype(PyTypeObject *, PyTypeObject *); +#define PyObject_TypeCheck(ob, tp) \ + (Py_TYPE(ob) == (tp) || PyType_IsSubtype(Py_TYPE(ob), (tp))) + +PyAPI_DATA(PyTypeObject) PyType_Type; /* built-in 'type' */ +PyAPI_DATA(PyTypeObject) PyBaseObject_Type; /* built-in 'object' */ +PyAPI_DATA(PyTypeObject) PySuper_Type; /* built-in 'super' */ + +#define PyType_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_TYPE_SUBCLASS) +#define PyType_CheckExact(op) (Py_TYPE(op) == &PyType_Type) + +PyAPI_FUNC(int) PyType_Ready(PyTypeObject *); +PyAPI_FUNC(PyObject *) PyType_GenericAlloc(PyTypeObject *, Py_ssize_t); +PyAPI_FUNC(PyObject *) PyType_GenericNew(PyTypeObject *, + PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *); +PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, char *, PyObject **); +PyAPI_FUNC(unsigned int) PyType_ClearCache(void); +PyAPI_FUNC(void) PyType_Modified(PyTypeObject *); + +/* Generic operations on objects */ +PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int); +PyAPI_FUNC(void) _PyObject_Dump(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_Repr(PyObject *); +PyAPI_FUNC(PyObject *) _PyObject_Str(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_Str(PyObject *); +#define PyObject_Bytes PyObject_Str +#ifdef Py_USING_UNICODE +PyAPI_FUNC(PyObject *) PyObject_Unicode(PyObject *); +#endif +PyAPI_FUNC(int) PyObject_Compare(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyObject_RichCompare(PyObject *, PyObject *, int); +PyAPI_FUNC(int) PyObject_RichCompareBool(PyObject *, PyObject *, int); +PyAPI_FUNC(PyObject *) PyObject_GetAttrString(PyObject *, const char *); +PyAPI_FUNC(int) PyObject_SetAttrString(PyObject *, const char *, PyObject *); +PyAPI_FUNC(int) PyObject_HasAttrString(PyObject *, const char *); +PyAPI_FUNC(PyObject *) PyObject_GetAttr(PyObject *, PyObject *); +PyAPI_FUNC(int) PyObject_SetAttr(PyObject *, PyObject *, PyObject *); +PyAPI_FUNC(int) PyObject_HasAttr(PyObject *, PyObject *); +PyAPI_FUNC(PyObject **) _PyObject_GetDictPtr(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_SelfIter(PyObject *); +PyAPI_FUNC(PyObject *) _PyObject_NextNotImplemented(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_GenericGetAttr(PyObject *, PyObject *); +PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *, + PyObject *, PyObject *); +PyAPI_FUNC(long) PyObject_Hash(PyObject *); +PyAPI_FUNC(long) PyObject_HashNotImplemented(PyObject *); +PyAPI_FUNC(int) PyObject_IsTrue(PyObject *); +PyAPI_FUNC(int) PyObject_Not(PyObject *); +PyAPI_FUNC(int) PyCallable_Check(PyObject *); +PyAPI_FUNC(int) PyNumber_Coerce(PyObject **, PyObject **); +PyAPI_FUNC(int) PyNumber_CoerceEx(PyObject **, PyObject **); + +PyAPI_FUNC(void) PyObject_ClearWeakRefs(PyObject *); + +/* A slot function whose address we need to compare */ +extern int _PyObject_SlotCompare(PyObject *, PyObject *); +/* Same as PyObject_Generic{Get,Set}Attr, but passing the attributes + dict as the last parameter. */ +PyAPI_FUNC(PyObject *) +_PyObject_GenericGetAttrWithDict(PyObject *, PyObject *, PyObject *); +PyAPI_FUNC(int) +_PyObject_GenericSetAttrWithDict(PyObject *, PyObject *, + PyObject *, PyObject *); + + +/* PyObject_Dir(obj) acts like Python __builtin__.dir(obj), returning a + list of strings. PyObject_Dir(NULL) is like __builtin__.dir(), + returning the names of the current locals. In this case, if there are + no current locals, NULL is returned, and PyErr_Occurred() is false. +*/ +PyAPI_FUNC(PyObject *) PyObject_Dir(PyObject *); + + +/* Helpers for printing recursive container types */ +PyAPI_FUNC(int) Py_ReprEnter(PyObject *); +PyAPI_FUNC(void) Py_ReprLeave(PyObject *); + +/* Helpers for hash functions */ +PyAPI_FUNC(long) _Py_HashDouble(double); +PyAPI_FUNC(long) _Py_HashPointer(void*); + +typedef struct { + long prefix; + long suffix; +} _Py_HashSecret_t; +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; + +#ifdef Py_DEBUG +PyAPI_DATA(int) _Py_HashSecret_Initialized; +#endif + +/* Helper for passing objects to printf and the like. + Leaks refcounts. Don't use it! +*/ +#define PyObject_REPR(obj) PyString_AS_STRING(PyObject_Repr(obj)) + +/* Flag bits for printing: */ +#define Py_PRINT_RAW 1 /* No string quotes etc. */ + +/* +`Type flags (tp_flags) + +These flags are used to extend the type structure in a backwards-compatible +fashion. Extensions can use the flags to indicate (and test) when a given +type structure contains a new feature. The Python core will use these when +introducing new functionality between major revisions (to avoid mid-version +changes in the PYTHON_API_VERSION). + +Arbitration of the flag bit positions will need to be coordinated among +all extension writers who publically release their extensions (this will +be fewer than you might expect!).. + +Python 1.5.2 introduced the bf_getcharbuffer slot into PyBufferProcs. + +Type definitions should use Py_TPFLAGS_DEFAULT for their tp_flags value. + +Code can use PyType_HasFeature(type_ob, flag_value) to test whether the +given type object has a specified feature. + +NOTE: when building the core, Py_TPFLAGS_DEFAULT includes +Py_TPFLAGS_HAVE_VERSION_TAG; outside the core, it doesn't. This is so +that extensions that modify tp_dict of their own types directly don't +break, since this was allowed in 2.5. In 3.0 they will have to +manually remove this flag though! +*/ + +/* PyBufferProcs contains bf_getcharbuffer */ +#define Py_TPFLAGS_HAVE_GETCHARBUFFER (1L<<0) + +/* PySequenceMethods contains sq_contains */ +#define Py_TPFLAGS_HAVE_SEQUENCE_IN (1L<<1) + +/* This is here for backwards compatibility. Extensions that use the old GC + * API will still compile but the objects will not be tracked by the GC. */ +#define Py_TPFLAGS_GC 0 /* used to be (1L<<2) */ + +/* PySequenceMethods and PyNumberMethods contain in-place operators */ +#define Py_TPFLAGS_HAVE_INPLACEOPS (1L<<3) + +/* PyNumberMethods do their own coercion */ +#define Py_TPFLAGS_CHECKTYPES (1L<<4) + +/* tp_richcompare is defined */ +#define Py_TPFLAGS_HAVE_RICHCOMPARE (1L<<5) + +/* Objects which are weakly referencable if their tp_weaklistoffset is >0 */ +#define Py_TPFLAGS_HAVE_WEAKREFS (1L<<6) + +/* tp_iter is defined */ +#define Py_TPFLAGS_HAVE_ITER (1L<<7) + +/* New members introduced by Python 2.2 exist */ +#define Py_TPFLAGS_HAVE_CLASS (1L<<8) + +/* Set if the type object is dynamically allocated */ +#define Py_TPFLAGS_HEAPTYPE (1L<<9) + +/* Set if the type allows subclassing */ +#define Py_TPFLAGS_BASETYPE (1L<<10) + +/* Set if the type is 'ready' -- fully initialized */ +#define Py_TPFLAGS_READY (1L<<12) + +/* Set while the type is being 'readied', to prevent recursive ready calls */ +#define Py_TPFLAGS_READYING (1L<<13) + +/* Objects support garbage collection (see objimp.h) */ +#define Py_TPFLAGS_HAVE_GC (1L<<14) + +/* These two bits are preserved for Stackless Python, next after this is 17 */ +#ifdef STACKLESS +#define Py_TPFLAGS_HAVE_STACKLESS_EXTENSION (3L<<15) +#else +#define Py_TPFLAGS_HAVE_STACKLESS_EXTENSION 0 +#endif + +/* Objects support nb_index in PyNumberMethods */ +#define Py_TPFLAGS_HAVE_INDEX (1L<<17) + +/* Objects support type attribute cache */ +#define Py_TPFLAGS_HAVE_VERSION_TAG (1L<<18) +#define Py_TPFLAGS_VALID_VERSION_TAG (1L<<19) + +/* Type is abstract and cannot be instantiated */ +#define Py_TPFLAGS_IS_ABSTRACT (1L<<20) + +/* Has the new buffer protocol */ +#define Py_TPFLAGS_HAVE_NEWBUFFER (1L<<21) + +/* These flags are used to determine if a type is a subclass. */ +#define Py_TPFLAGS_INT_SUBCLASS (1L<<23) +#define Py_TPFLAGS_LONG_SUBCLASS (1L<<24) +#define Py_TPFLAGS_LIST_SUBCLASS (1L<<25) +#define Py_TPFLAGS_TUPLE_SUBCLASS (1L<<26) +#define Py_TPFLAGS_STRING_SUBCLASS (1L<<27) +#define Py_TPFLAGS_UNICODE_SUBCLASS (1L<<28) +#define Py_TPFLAGS_DICT_SUBCLASS (1L<<29) +#define Py_TPFLAGS_BASE_EXC_SUBCLASS (1L<<30) +#define Py_TPFLAGS_TYPE_SUBCLASS (1L<<31) + +#define Py_TPFLAGS_DEFAULT_EXTERNAL ( \ + Py_TPFLAGS_HAVE_GETCHARBUFFER | \ + Py_TPFLAGS_HAVE_SEQUENCE_IN | \ + Py_TPFLAGS_HAVE_INPLACEOPS | \ + Py_TPFLAGS_HAVE_RICHCOMPARE | \ + Py_TPFLAGS_HAVE_WEAKREFS | \ + Py_TPFLAGS_HAVE_ITER | \ + Py_TPFLAGS_HAVE_CLASS | \ + Py_TPFLAGS_HAVE_STACKLESS_EXTENSION | \ + Py_TPFLAGS_HAVE_INDEX | \ + 0) +#define Py_TPFLAGS_DEFAULT_CORE (Py_TPFLAGS_DEFAULT_EXTERNAL | \ + Py_TPFLAGS_HAVE_VERSION_TAG) + +#ifdef Py_BUILD_CORE +#define Py_TPFLAGS_DEFAULT Py_TPFLAGS_DEFAULT_CORE +#else +#define Py_TPFLAGS_DEFAULT Py_TPFLAGS_DEFAULT_EXTERNAL +#endif + +#define PyType_HasFeature(t,f) (((t)->tp_flags & (f)) != 0) +#define PyType_FastSubclass(t,f) PyType_HasFeature(t,f) + + +/* +The macros Py_INCREF(op) and Py_DECREF(op) are used to increment or decrement +reference counts. Py_DECREF calls the object's deallocator function when +the refcount falls to 0; for +objects that don't contain references to other objects or heap memory +this can be the standard function free(). Both macros can be used +wherever a void expression is allowed. The argument must not be a +NULL pointer. If it may be NULL, use Py_XINCREF/Py_XDECREF instead. +The macro _Py_NewReference(op) initialize reference counts to 1, and +in special builds (Py_REF_DEBUG, Py_TRACE_REFS) performs additional +bookkeeping appropriate to the special build. + +We assume that the reference count field can never overflow; this can +be proven when the size of the field is the same as the pointer size, so +we ignore the possibility. Provided a C int is at least 32 bits (which +is implicitly assumed in many parts of this code), that's enough for +about 2**31 references to an object. + +XXX The following became out of date in Python 2.2, but I'm not sure +XXX what the full truth is now. Certainly, heap-allocated type objects +XXX can and should be deallocated. +Type objects should never be deallocated; the type pointer in an object +is not considered to be a reference to the type object, to save +complications in the deallocation function. (This is actually a +decision that's up to the implementer of each new type so if you want, +you can count such references to the type object.) + +*** WARNING*** The Py_DECREF macro must have a side-effect-free argument +since it may evaluate its argument multiple times. (The alternative +would be to mace it a proper function or assign it to a global temporary +variable first, both of which are slower; and in a multi-threaded +environment the global variable trick is not safe.) +*/ + +/* First define a pile of simple helper macros, one set per special + * build symbol. These either expand to the obvious things, or to + * nothing at all when the special mode isn't in effect. The main + * macros can later be defined just once then, yet expand to different + * things depending on which special build options are and aren't in effect. + * Trust me <wink>: while painful, this is 20x easier to understand than, + * e.g, defining _Py_NewReference five different times in a maze of nested + * #ifdefs (we used to do that -- it was impenetrable). + */ +#ifdef Py_REF_DEBUG +PyAPI_DATA(Py_ssize_t) _Py_RefTotal; +PyAPI_FUNC(void) _Py_NegativeRefcount(const char *fname, + int lineno, PyObject *op); +PyAPI_FUNC(PyObject *) _PyDict_Dummy(void); +PyAPI_FUNC(PyObject *) _PySet_Dummy(void); +PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void); +#define _Py_INC_REFTOTAL _Py_RefTotal++ +#define _Py_DEC_REFTOTAL _Py_RefTotal-- +#define _Py_REF_DEBUG_COMMA , +#define _Py_CHECK_REFCNT(OP) \ +{ if (((PyObject*)OP)->ob_refcnt < 0) \ + _Py_NegativeRefcount(__FILE__, __LINE__, \ + (PyObject *)(OP)); \ +} +#else +#define _Py_INC_REFTOTAL +#define _Py_DEC_REFTOTAL +#define _Py_REF_DEBUG_COMMA +#define _Py_CHECK_REFCNT(OP) /* a semicolon */; +#endif /* Py_REF_DEBUG */ + +#ifdef COUNT_ALLOCS +PyAPI_FUNC(void) inc_count(PyTypeObject *); +PyAPI_FUNC(void) dec_count(PyTypeObject *); +#define _Py_INC_TPALLOCS(OP) inc_count(Py_TYPE(OP)) +#define _Py_INC_TPFREES(OP) dec_count(Py_TYPE(OP)) +#define _Py_DEC_TPFREES(OP) Py_TYPE(OP)->tp_frees-- +#define _Py_COUNT_ALLOCS_COMMA , +#else +#define _Py_INC_TPALLOCS(OP) +#define _Py_INC_TPFREES(OP) +#define _Py_DEC_TPFREES(OP) +#define _Py_COUNT_ALLOCS_COMMA +#endif /* COUNT_ALLOCS */ + +#ifdef Py_TRACE_REFS +/* Py_TRACE_REFS is such major surgery that we call external routines. */ +PyAPI_FUNC(void) _Py_NewReference(PyObject *); +PyAPI_FUNC(void) _Py_ForgetReference(PyObject *); +PyAPI_FUNC(void) _Py_Dealloc(PyObject *); +PyAPI_FUNC(void) _Py_PrintReferences(FILE *); +PyAPI_FUNC(void) _Py_PrintReferenceAddresses(FILE *); +PyAPI_FUNC(void) _Py_AddToAllObjects(PyObject *, int force); + +#else +/* Without Py_TRACE_REFS, there's little enough to do that we expand code + * inline. + */ +#define _Py_NewReference(op) ( \ + _Py_INC_TPALLOCS(op) _Py_COUNT_ALLOCS_COMMA \ + _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA \ + Py_REFCNT(op) = 1) + +#define _Py_ForgetReference(op) _Py_INC_TPFREES(op) + +#define _Py_Dealloc(op) ( \ + _Py_INC_TPFREES(op) _Py_COUNT_ALLOCS_COMMA \ + (*Py_TYPE(op)->tp_dealloc)((PyObject *)(op))) +#endif /* !Py_TRACE_REFS */ + +#define Py_INCREF(op) ( \ + _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA \ + ((PyObject*)(op))->ob_refcnt++) + +#define Py_DECREF(op) \ + do { \ + if (_Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA \ + --((PyObject*)(op))->ob_refcnt != 0) \ + _Py_CHECK_REFCNT(op) \ + else \ + _Py_Dealloc((PyObject *)(op)); \ + } while (0) + +/* Safely decref `op` and set `op` to NULL, especially useful in tp_clear + * and tp_dealloc implementatons. + * + * Note that "the obvious" code can be deadly: + * + * Py_XDECREF(op); + * op = NULL; + * + * Typically, `op` is something like self->containee, and `self` is done + * using its `containee` member. In the code sequence above, suppose + * `containee` is non-NULL with a refcount of 1. Its refcount falls to + * 0 on the first line, which can trigger an arbitrary amount of code, + * possibly including finalizers (like __del__ methods or weakref callbacks) + * coded in Python, which in turn can release the GIL and allow other threads + * to run, etc. Such code may even invoke methods of `self` again, or cause + * cyclic gc to trigger, but-- oops! --self->containee still points to the + * object being torn down, and it may be in an insane state while being torn + * down. This has in fact been a rich historic source of miserable (rare & + * hard-to-diagnose) segfaulting (and other) bugs. + * + * The safe way is: + * + * Py_CLEAR(op); + * + * That arranges to set `op` to NULL _before_ decref'ing, so that any code + * triggered as a side-effect of `op` getting torn down no longer believes + * `op` points to a valid object. + * + * There are cases where it's safe to use the naive code, but they're brittle. + * For example, if `op` points to a Python integer, you know that destroying + * one of those can't cause problems -- but in part that relies on that + * Python integers aren't currently weakly referencable. Best practice is + * to use Py_CLEAR() even if you can't think of a reason for why you need to. + */ +#define Py_CLEAR(op) \ + do { \ + if (op) { \ + PyObject *_py_tmp = (PyObject *)(op); \ + (op) = NULL; \ + Py_DECREF(_py_tmp); \ + } \ + } while (0) + +/* Macros to use in case the object pointer may be NULL: */ +#define Py_XINCREF(op) do { if ((op) == NULL) ; else Py_INCREF(op); } while (0) +#define Py_XDECREF(op) do { if ((op) == NULL) ; else Py_DECREF(op); } while (0) + +/* Safely decref `op` and set `op` to `op2`. + * + * As in case of Py_CLEAR "the obvious" code can be deadly: + * + * Py_DECREF(op); + * op = op2; + * + * The safe way is: + * + * Py_SETREF(op, op2); + * + * That arranges to set `op` to `op2` _before_ decref'ing, so that any code + * triggered as a side-effect of `op` getting torn down no longer believes + * `op` points to a valid object. + * + * Py_XSETREF is a variant of Py_SETREF that uses Py_XDECREF instead of + * Py_DECREF. + */ + +#define Py_SETREF(op, op2) \ + do { \ + PyObject *_py_tmp = (PyObject *)(op); \ + (op) = (op2); \ + Py_DECREF(_py_tmp); \ + } while (0) + +#define Py_XSETREF(op, op2) \ + do { \ + PyObject *_py_tmp = (PyObject *)(op); \ + (op) = (op2); \ + Py_XDECREF(_py_tmp); \ + } while (0) + +/* +These are provided as conveniences to Python runtime embedders, so that +they can have object code that is not dependent on Python compilation flags. +*/ +PyAPI_FUNC(void) Py_IncRef(PyObject *); +PyAPI_FUNC(void) Py_DecRef(PyObject *); + +/* +_Py_NoneStruct is an object of undefined type which can be used in contexts +where NULL (nil) is not suitable (since NULL often means 'error'). + +Don't forget to apply Py_INCREF() when returning this value!!! +*/ +PyAPI_DATA(PyObject) _Py_NoneStruct; /* Don't use this directly */ +#define Py_None (&_Py_NoneStruct) + +/* Macro for returning Py_None from a function */ +#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None + +/* +Py_NotImplemented is a singleton used to signal that an operation is +not implemented for a given type combination. +*/ +PyAPI_DATA(PyObject) _Py_NotImplementedStruct; /* Don't use this directly */ +#define Py_NotImplemented (&_Py_NotImplementedStruct) + +/* Rich comparison opcodes */ +#define Py_LT 0 +#define Py_LE 1 +#define Py_EQ 2 +#define Py_NE 3 +#define Py_GT 4 +#define Py_GE 5 + +/* Maps Py_LT to Py_GT, ..., Py_GE to Py_LE. + * Defined in object.c. + */ +PyAPI_DATA(int) _Py_SwappedOp[]; + +/* +Define staticforward and statichere for source compatibility with old +C extensions. + +The staticforward define was needed to support certain broken C +compilers (notably SCO ODT 3.0, perhaps early AIX as well) botched the +static keyword when it was used with a forward declaration of a static +initialized structure. Standard C allows the forward declaration with +static, and we've decided to stop catering to broken C compilers. +(In fact, we expect that the compilers are all fixed eight years later.) +*/ + +#define staticforward static +#define statichere static + + +/* +More conventions +================ + +Argument Checking +----------------- + +Functions that take objects as arguments normally don't check for nil +arguments, but they do check the type of the argument, and return an +error if the function doesn't apply to the type. + +Failure Modes +------------- + +Functions may fail for a variety of reasons, including running out of +memory. This is communicated to the caller in two ways: an error string +is set (see errors.h), and the function result differs: functions that +normally return a pointer return NULL for failure, functions returning +an integer return -1 (which could be a legal return value too!), and +other functions return 0 for success and -1 for failure. +Callers should always check for errors before using the result. If +an error was set, the caller must either explicitly clear it, or pass +the error on to its caller. + +Reference Counts +---------------- + +It takes a while to get used to the proper usage of reference counts. + +Functions that create an object set the reference count to 1; such new +objects must be stored somewhere or destroyed again with Py_DECREF(). +Some functions that 'store' objects, such as PyTuple_SetItem() and +PyList_SetItem(), +don't increment the reference count of the object, since the most +frequent use is to store a fresh object. Functions that 'retrieve' +objects, such as PyTuple_GetItem() and PyDict_GetItemString(), also +don't increment +the reference count, since most frequently the object is only looked at +quickly. Thus, to retrieve an object and store it again, the caller +must call Py_INCREF() explicitly. + +NOTE: functions that 'consume' a reference count, like +PyList_SetItem(), consume the reference even if the object wasn't +successfully stored, to simplify error handling. + +It seems attractive to make other functions that take an object as +argument consume a reference count; however, this may quickly get +confusing (even the current practice is already confusing). Consider +it carefully, it may save lots of calls to Py_INCREF() and Py_DECREF() at +times. +*/ + + +/* Trashcan mechanism, thanks to Christian Tismer. + +When deallocating a container object, it's possible to trigger an unbounded +chain of deallocations, as each Py_DECREF in turn drops the refcount on "the +next" object in the chain to 0. This can easily lead to stack faults, and +especially in threads (which typically have less stack space to work with). + +A container object that participates in cyclic gc can avoid this by +bracketing the body of its tp_dealloc function with a pair of macros: + +static void +mytype_dealloc(mytype *p) +{ + ... declarations go here ... + + PyObject_GC_UnTrack(p); // must untrack first + Py_TRASHCAN_SAFE_BEGIN(p) + ... The body of the deallocator goes here, including all calls ... + ... to Py_DECREF on contained objects. ... + Py_TRASHCAN_SAFE_END(p) +} + +CAUTION: Never return from the middle of the body! If the body needs to +"get out early", put a label immediately before the Py_TRASHCAN_SAFE_END +call, and goto it. Else the call-depth counter (see below) will stay +above 0 forever, and the trashcan will never get emptied. + +How it works: The BEGIN macro increments a call-depth counter. So long +as this counter is small, the body of the deallocator is run directly without +further ado. But if the counter gets large, it instead adds p to a list of +objects to be deallocated later, skips the body of the deallocator, and +resumes execution after the END macro. The tp_dealloc routine then returns +without deallocating anything (and so unbounded call-stack depth is avoided). + +When the call stack finishes unwinding again, code generated by the END macro +notices this, and calls another routine to deallocate all the objects that +may have been added to the list of deferred deallocations. In effect, a +chain of N deallocations is broken into N / PyTrash_UNWIND_LEVEL pieces, +with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL. +*/ + +/* This is the old private API, invoked by the macros before 2.7.4. + Kept for binary compatibility of extensions. */ +PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*); +PyAPI_FUNC(void) _PyTrash_destroy_chain(void); +PyAPI_DATA(int) _PyTrash_delete_nesting; +PyAPI_DATA(PyObject *) _PyTrash_delete_later; + +/* The new thread-safe private API, invoked by the macros below. */ +PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyObject*); +PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(void); + +#define PyTrash_UNWIND_LEVEL 50 + +/* Note the workaround for when the thread state is NULL (issue #17703) */ +#define Py_TRASHCAN_SAFE_BEGIN(op) \ + do { \ + PyThreadState *_tstate = PyThreadState_GET(); \ + if (!_tstate || \ + _tstate->trash_delete_nesting < PyTrash_UNWIND_LEVEL) { \ + if (_tstate) \ + ++_tstate->trash_delete_nesting; + /* The body of the deallocator is here. */ +#define Py_TRASHCAN_SAFE_END(op) \ + if (_tstate) { \ + --_tstate->trash_delete_nesting; \ + if (_tstate->trash_delete_later \ + && _tstate->trash_delete_nesting <= 0) \ + _PyTrash_thread_destroy_chain(); \ + } \ + } \ + else \ + _PyTrash_thread_deposit_object((PyObject*)op); \ + } while (0); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/objimpl.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,354 @@ +/* The PyObject_ memory family: high-level object memory interfaces. + See pymem.h for the low-level PyMem_ family. +*/ + +#ifndef Py_OBJIMPL_H +#define Py_OBJIMPL_H + +#include "pymem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* BEWARE: + + Each interface exports both functions and macros. Extension modules should + use the functions, to ensure binary compatibility across Python versions. + Because the Python implementation is free to change internal details, and + the macros may (or may not) expose details for speed, if you do use the + macros you must recompile your extensions with each Python release. + + Never mix calls to PyObject_ memory functions with calls to the platform + malloc/realloc/ calloc/free, or with calls to PyMem_. +*/ + +/* +Functions and macros for modules that implement new object types. + + - PyObject_New(type, typeobj) allocates memory for a new object of the given + type, and initializes part of it. 'type' must be the C structure type used + to represent the object, and 'typeobj' the address of the corresponding + type object. Reference count and type pointer are filled in; the rest of + the bytes of the object are *undefined*! The resulting expression type is + 'type *'. The size of the object is determined by the tp_basicsize field + of the type object. + + - PyObject_NewVar(type, typeobj, n) is similar but allocates a variable-size + object with room for n items. In addition to the refcount and type pointer + fields, this also fills in the ob_size field. + + - PyObject_Del(op) releases the memory allocated for an object. It does not + run a destructor -- it only frees the memory. PyObject_Free is identical. + + - PyObject_Init(op, typeobj) and PyObject_InitVar(op, typeobj, n) don't + allocate memory. Instead of a 'type' parameter, they take a pointer to a + new object (allocated by an arbitrary allocator), and initialize its object + header fields. + +Note that objects created with PyObject_{New, NewVar} are allocated using the +specialized Python allocator (implemented in obmalloc.c), if WITH_PYMALLOC is +enabled. In addition, a special debugging allocator is used if PYMALLOC_DEBUG +is also #defined. + +In case a specific form of memory management is needed (for example, if you +must use the platform malloc heap(s), or shared memory, or C++ local storage or +operator new), you must first allocate the object with your custom allocator, +then pass its pointer to PyObject_{Init, InitVar} for filling in its Python- +specific fields: reference count, type pointer, possibly others. You should +be aware that Python no control over these objects because they don't +cooperate with the Python memory manager. Such objects may not be eligible +for automatic garbage collection and you have to make sure that they are +released accordingly whenever their destructor gets called (cf. the specific +form of memory management you're using). + +Unless you have specific memory management requirements, use +PyObject_{New, NewVar, Del}. +*/ + +/* + * Raw object memory interface + * =========================== + */ + +/* Functions to call the same malloc/realloc/free as used by Python's + object allocator. If WITH_PYMALLOC is enabled, these may differ from + the platform malloc/realloc/free. The Python object allocator is + designed for fast, cache-conscious allocation of many "small" objects, + and with low hidden memory overhead. + + PyObject_Malloc(0) returns a unique non-NULL pointer if possible. + + PyObject_Realloc(NULL, n) acts like PyObject_Malloc(n). + PyObject_Realloc(p != NULL, 0) does not return NULL, or free the memory + at p. + + Returned pointers must be checked for NULL explicitly; no action is + performed on failure other than to return NULL (no warning it printed, no + exception is set, etc). + + For allocating objects, use PyObject_{New, NewVar} instead whenever + possible. The PyObject_{Malloc, Realloc, Free} family is exposed + so that you can exploit Python's small-block allocator for non-object + uses. If you must use these routines to allocate object memory, make sure + the object gets initialized via PyObject_{Init, InitVar} after obtaining + the raw memory. +*/ +PyAPI_FUNC(void *) PyObject_Malloc(size_t); +PyAPI_FUNC(void *) PyObject_Realloc(void *, size_t); +PyAPI_FUNC(void) PyObject_Free(void *); + + +/* Macros */ +#ifdef WITH_PYMALLOC +#ifdef PYMALLOC_DEBUG /* WITH_PYMALLOC && PYMALLOC_DEBUG */ +PyAPI_FUNC(void *) _PyObject_DebugMalloc(size_t nbytes); +PyAPI_FUNC(void *) _PyObject_DebugRealloc(void *p, size_t nbytes); +PyAPI_FUNC(void) _PyObject_DebugFree(void *p); +PyAPI_FUNC(void) _PyObject_DebugDumpAddress(const void *p); +PyAPI_FUNC(void) _PyObject_DebugCheckAddress(const void *p); +PyAPI_FUNC(void) _PyObject_DebugMallocStats(void); +PyAPI_FUNC(void *) _PyObject_DebugMallocApi(char api, size_t nbytes); +PyAPI_FUNC(void *) _PyObject_DebugReallocApi(char api, void *p, size_t nbytes); +PyAPI_FUNC(void) _PyObject_DebugFreeApi(char api, void *p); +PyAPI_FUNC(void) _PyObject_DebugCheckAddressApi(char api, const void *p); +PyAPI_FUNC(void *) _PyMem_DebugMalloc(size_t nbytes); +PyAPI_FUNC(void *) _PyMem_DebugRealloc(void *p, size_t nbytes); +PyAPI_FUNC(void) _PyMem_DebugFree(void *p); +#define PyObject_MALLOC _PyObject_DebugMalloc +#define PyObject_Malloc _PyObject_DebugMalloc +#define PyObject_REALLOC _PyObject_DebugRealloc +#define PyObject_Realloc _PyObject_DebugRealloc +#define PyObject_FREE _PyObject_DebugFree +#define PyObject_Free _PyObject_DebugFree + +#else /* WITH_PYMALLOC && ! PYMALLOC_DEBUG */ +#define PyObject_MALLOC PyObject_Malloc +#define PyObject_REALLOC PyObject_Realloc +#define PyObject_FREE PyObject_Free +#endif + +#else /* ! WITH_PYMALLOC */ +#define PyObject_MALLOC PyMem_MALLOC +#define PyObject_REALLOC PyMem_REALLOC +#define PyObject_FREE PyMem_FREE + +#endif /* WITH_PYMALLOC */ + +#define PyObject_Del PyObject_Free +#define PyObject_DEL PyObject_FREE + +/* for source compatibility with 2.2 */ +#define _PyObject_Del PyObject_Free + +/* + * Generic object allocator interface + * ================================== + */ + +/* Functions */ +PyAPI_FUNC(PyObject *) PyObject_Init(PyObject *, PyTypeObject *); +PyAPI_FUNC(PyVarObject *) PyObject_InitVar(PyVarObject *, + PyTypeObject *, Py_ssize_t); +PyAPI_FUNC(PyObject *) _PyObject_New(PyTypeObject *); +PyAPI_FUNC(PyVarObject *) _PyObject_NewVar(PyTypeObject *, Py_ssize_t); + +#define PyObject_New(type, typeobj) \ + ( (type *) _PyObject_New(typeobj) ) +#define PyObject_NewVar(type, typeobj, n) \ + ( (type *) _PyObject_NewVar((typeobj), (n)) ) + +/* Macros trading binary compatibility for speed. See also pymem.h. + Note that these macros expect non-NULL object pointers.*/ +#define PyObject_INIT(op, typeobj) \ + ( Py_TYPE(op) = (typeobj), _Py_NewReference((PyObject *)(op)), (op) ) +#define PyObject_INIT_VAR(op, typeobj, size) \ + ( Py_SIZE(op) = (size), PyObject_INIT((op), (typeobj)) ) + +#define _PyObject_SIZE(typeobj) ( (typeobj)->tp_basicsize ) + +/* _PyObject_VAR_SIZE returns the number of bytes (as size_t) allocated for a + vrbl-size object with nitems items, exclusive of gc overhead (if any). The + value is rounded up to the closest multiple of sizeof(void *), in order to + ensure that pointer fields at the end of the object are correctly aligned + for the platform (this is of special importance for subclasses of, e.g., + str or long, so that pointers can be stored after the embedded data). + + Note that there's no memory wastage in doing this, as malloc has to + return (at worst) pointer-aligned memory anyway. +*/ +#if ((SIZEOF_VOID_P - 1) & SIZEOF_VOID_P) != 0 +# error "_PyObject_VAR_SIZE requires SIZEOF_VOID_P be a power of 2" +#endif + +#define _PyObject_VAR_SIZE(typeobj, nitems) \ + (size_t) \ + ( ( (typeobj)->tp_basicsize + \ + (nitems)*(typeobj)->tp_itemsize + \ + (SIZEOF_VOID_P - 1) \ + ) & ~(SIZEOF_VOID_P - 1) \ + ) + +#define PyObject_NEW(type, typeobj) \ +( (type *) PyObject_Init( \ + (PyObject *) PyObject_MALLOC( _PyObject_SIZE(typeobj) ), (typeobj)) ) + +#define PyObject_NEW_VAR(type, typeobj, n) \ +( (type *) PyObject_InitVar( \ + (PyVarObject *) PyObject_MALLOC(_PyObject_VAR_SIZE((typeobj),(n)) ),\ + (typeobj), (n)) ) + +/* This example code implements an object constructor with a custom + allocator, where PyObject_New is inlined, and shows the important + distinction between two steps (at least): + 1) the actual allocation of the object storage; + 2) the initialization of the Python specific fields + in this storage with PyObject_{Init, InitVar}. + + PyObject * + YourObject_New(...) + { + PyObject *op; + + op = (PyObject *) Your_Allocator(_PyObject_SIZE(YourTypeStruct)); + if (op == NULL) + return PyErr_NoMemory(); + + PyObject_Init(op, &YourTypeStruct); + + op->ob_field = value; + ... + return op; + } + + Note that in C++, the use of the new operator usually implies that + the 1st step is performed automatically for you, so in a C++ class + constructor you would start directly with PyObject_Init/InitVar +*/ + +/* + * Garbage Collection Support + * ========================== + */ + +/* C equivalent of gc.collect(). */ +PyAPI_FUNC(Py_ssize_t) PyGC_Collect(void); + +/* Test if a type has a GC head */ +#define PyType_IS_GC(t) PyType_HasFeature((t), Py_TPFLAGS_HAVE_GC) + +/* Test if an object has a GC head */ +#define PyObject_IS_GC(o) (PyType_IS_GC(Py_TYPE(o)) && \ + (Py_TYPE(o)->tp_is_gc == NULL || Py_TYPE(o)->tp_is_gc(o))) + +PyAPI_FUNC(PyVarObject *) _PyObject_GC_Resize(PyVarObject *, Py_ssize_t); +#define PyObject_GC_Resize(type, op, n) \ + ( (type *) _PyObject_GC_Resize((PyVarObject *)(op), (n)) ) + +/* for source compatibility with 2.2 */ +#define _PyObject_GC_Del PyObject_GC_Del + +/* GC information is stored BEFORE the object structure. */ +typedef union _gc_head { + struct { + union _gc_head *gc_next; + union _gc_head *gc_prev; + Py_ssize_t gc_refs; + } gc; + long double dummy; /* force worst-case alignment */ +} PyGC_Head; + +extern PyGC_Head *_PyGC_generation0; + +#define _Py_AS_GC(o) ((PyGC_Head *)(o)-1) + +#define _PyGC_REFS_UNTRACKED (-2) +#define _PyGC_REFS_REACHABLE (-3) +#define _PyGC_REFS_TENTATIVELY_UNREACHABLE (-4) + +/* Tell the GC to track this object. NB: While the object is tracked the + * collector it must be safe to call the ob_traverse method. */ +#define _PyObject_GC_TRACK(o) do { \ + PyGC_Head *g = _Py_AS_GC(o); \ + if (g->gc.gc_refs != _PyGC_REFS_UNTRACKED) \ + Py_FatalError("GC object already tracked"); \ + g->gc.gc_refs = _PyGC_REFS_REACHABLE; \ + g->gc.gc_next = _PyGC_generation0; \ + g->gc.gc_prev = _PyGC_generation0->gc.gc_prev; \ + g->gc.gc_prev->gc.gc_next = g; \ + _PyGC_generation0->gc.gc_prev = g; \ + } while (0); + +/* Tell the GC to stop tracking this object. + * gc_next doesn't need to be set to NULL, but doing so is a good + * way to provoke memory errors if calling code is confused. + */ +#define _PyObject_GC_UNTRACK(o) do { \ + PyGC_Head *g = _Py_AS_GC(o); \ + assert(g->gc.gc_refs != _PyGC_REFS_UNTRACKED); \ + g->gc.gc_refs = _PyGC_REFS_UNTRACKED; \ + g->gc.gc_prev->gc.gc_next = g->gc.gc_next; \ + g->gc.gc_next->gc.gc_prev = g->gc.gc_prev; \ + g->gc.gc_next = NULL; \ + } while (0); + +/* True if the object is currently tracked by the GC. */ +#define _PyObject_GC_IS_TRACKED(o) \ + ((_Py_AS_GC(o))->gc.gc_refs != _PyGC_REFS_UNTRACKED) + +/* True if the object may be tracked by the GC in the future, or already is. + This can be useful to implement some optimizations. */ +#define _PyObject_GC_MAY_BE_TRACKED(obj) \ + (PyObject_IS_GC(obj) && \ + (!PyTuple_CheckExact(obj) || _PyObject_GC_IS_TRACKED(obj))) + + +PyAPI_FUNC(PyObject *) _PyObject_GC_Malloc(size_t); +PyAPI_FUNC(PyObject *) _PyObject_GC_New(PyTypeObject *); +PyAPI_FUNC(PyVarObject *) _PyObject_GC_NewVar(PyTypeObject *, Py_ssize_t); +PyAPI_FUNC(void) PyObject_GC_Track(void *); +PyAPI_FUNC(void) PyObject_GC_UnTrack(void *); +PyAPI_FUNC(void) PyObject_GC_Del(void *); + +#define PyObject_GC_New(type, typeobj) \ + ( (type *) _PyObject_GC_New(typeobj) ) +#define PyObject_GC_NewVar(type, typeobj, n) \ + ( (type *) _PyObject_GC_NewVar((typeobj), (n)) ) + + +/* Utility macro to help write tp_traverse functions. + * To use this macro, the tp_traverse function must name its arguments + * "visit" and "arg". This is intended to keep tp_traverse functions + * looking as much alike as possible. + */ +#define Py_VISIT(op) \ + do { \ + if (op) { \ + int vret = visit((PyObject *)(op), arg); \ + if (vret) \ + return vret; \ + } \ + } while (0) + +/* This is here for the sake of backwards compatibility. Extensions that + * use the old GC API will still compile but the objects will not be + * tracked by the GC. */ +#define PyGC_HEAD_SIZE 0 +#define PyObject_GC_Init(op) +#define PyObject_GC_Fini(op) +#define PyObject_AS_GC(op) (op) +#define PyObject_FROM_GC(op) (op) + + +/* Test if a type supports weak references */ +#define PyType_SUPPORTS_WEAKREFS(t) \ + (PyType_HasFeature((t), Py_TPFLAGS_HAVE_WEAKREFS) \ + && ((t)->tp_weaklistoffset > 0)) + +#define PyObject_GET_WEAKREFS_LISTPTR(o) \ + ((PyObject **) (((char *) (o)) + Py_TYPE(o)->tp_weaklistoffset)) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OBJIMPL_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/opcode.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,171 @@ +#ifndef Py_OPCODE_H +#define Py_OPCODE_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Instruction opcodes for compiled code */ + +#define STOP_CODE 0 +#define POP_TOP 1 +#define ROT_TWO 2 +#define ROT_THREE 3 +#define DUP_TOP 4 +#define ROT_FOUR 5 +#define NOP 9 + +#define UNARY_POSITIVE 10 +#define UNARY_NEGATIVE 11 +#define UNARY_NOT 12 +#define UNARY_CONVERT 13 + +#define UNARY_INVERT 15 + +#define BINARY_POWER 19 + +#define BINARY_MULTIPLY 20 +#define BINARY_DIVIDE 21 +#define BINARY_MODULO 22 +#define BINARY_ADD 23 +#define BINARY_SUBTRACT 24 +#define BINARY_SUBSCR 25 +#define BINARY_FLOOR_DIVIDE 26 +#define BINARY_TRUE_DIVIDE 27 +#define INPLACE_FLOOR_DIVIDE 28 +#define INPLACE_TRUE_DIVIDE 29 + +#define SLICE 30 +/* Also uses 31-33 */ +#define SLICE_1 31 +#define SLICE_2 32 +#define SLICE_3 33 + +#define STORE_SLICE 40 +/* Also uses 41-43 */ +#define STORE_SLICE_1 41 +#define STORE_SLICE_2 42 +#define STORE_SLICE_3 43 + +#define DELETE_SLICE 50 +/* Also uses 51-53 */ +#define DELETE_SLICE_1 51 +#define DELETE_SLICE_2 52 +#define DELETE_SLICE_3 53 + +#define STORE_MAP 54 +#define INPLACE_ADD 55 +#define INPLACE_SUBTRACT 56 +#define INPLACE_MULTIPLY 57 +#define INPLACE_DIVIDE 58 +#define INPLACE_MODULO 59 +#define STORE_SUBSCR 60 +#define DELETE_SUBSCR 61 + +#define BINARY_LSHIFT 62 +#define BINARY_RSHIFT 63 +#define BINARY_AND 64 +#define BINARY_XOR 65 +#define BINARY_OR 66 +#define INPLACE_POWER 67 +#define GET_ITER 68 + +#define PRINT_EXPR 70 +#define PRINT_ITEM 71 +#define PRINT_NEWLINE 72 +#define PRINT_ITEM_TO 73 +#define PRINT_NEWLINE_TO 74 +#define INPLACE_LSHIFT 75 +#define INPLACE_RSHIFT 76 +#define INPLACE_AND 77 +#define INPLACE_XOR 78 +#define INPLACE_OR 79 +#define BREAK_LOOP 80 +#define WITH_CLEANUP 81 +#define LOAD_LOCALS 82 +#define RETURN_VALUE 83 +#define IMPORT_STAR 84 +#define EXEC_STMT 85 +#define YIELD_VALUE 86 +#define POP_BLOCK 87 +#define END_FINALLY 88 +#define BUILD_CLASS 89 + +#define HAVE_ARGUMENT 90 /* Opcodes from here have an argument: */ + +#define STORE_NAME 90 /* Index in name list */ +#define DELETE_NAME 91 /* "" */ +#define UNPACK_SEQUENCE 92 /* Number of sequence items */ +#define FOR_ITER 93 +#define LIST_APPEND 94 + +#define STORE_ATTR 95 /* Index in name list */ +#define DELETE_ATTR 96 /* "" */ +#define STORE_GLOBAL 97 /* "" */ +#define DELETE_GLOBAL 98 /* "" */ +#define DUP_TOPX 99 /* number of items to duplicate */ +#define LOAD_CONST 100 /* Index in const list */ +#define LOAD_NAME 101 /* Index in name list */ +#define BUILD_TUPLE 102 /* Number of tuple items */ +#define BUILD_LIST 103 /* Number of list items */ +#define BUILD_SET 104 /* Number of set items */ +#define BUILD_MAP 105 /* Always zero for now */ +#define LOAD_ATTR 106 /* Index in name list */ +#define COMPARE_OP 107 /* Comparison operator */ +#define IMPORT_NAME 108 /* Index in name list */ +#define IMPORT_FROM 109 /* Index in name list */ +#define JUMP_FORWARD 110 /* Number of bytes to skip */ + +#define JUMP_IF_FALSE_OR_POP 111 /* Target byte offset from beginning + of code */ +#define JUMP_IF_TRUE_OR_POP 112 /* "" */ +#define JUMP_ABSOLUTE 113 /* "" */ +#define POP_JUMP_IF_FALSE 114 /* "" */ +#define POP_JUMP_IF_TRUE 115 /* "" */ + +#define LOAD_GLOBAL 116 /* Index in name list */ + +#define CONTINUE_LOOP 119 /* Start of loop (absolute) */ +#define SETUP_LOOP 120 /* Target address (relative) */ +#define SETUP_EXCEPT 121 /* "" */ +#define SETUP_FINALLY 122 /* "" */ + +#define LOAD_FAST 124 /* Local variable number */ +#define STORE_FAST 125 /* Local variable number */ +#define DELETE_FAST 126 /* Local variable number */ + +#define RAISE_VARARGS 130 /* Number of raise arguments (1, 2 or 3) */ +/* CALL_FUNCTION_XXX opcodes defined below depend on this definition */ +#define CALL_FUNCTION 131 /* #args + (#kwargs<<8) */ +#define MAKE_FUNCTION 132 /* #defaults */ +#define BUILD_SLICE 133 /* Number of items */ + +#define MAKE_CLOSURE 134 /* #free vars */ +#define LOAD_CLOSURE 135 /* Load free variable from closure */ +#define LOAD_DEREF 136 /* Load and dereference from closure cell */ +#define STORE_DEREF 137 /* Store into cell */ + +/* The next 3 opcodes must be contiguous and satisfy + (CALL_FUNCTION_VAR - CALL_FUNCTION) & 3 == 1 */ +#define CALL_FUNCTION_VAR 140 /* #args + (#kwargs<<8) */ +#define CALL_FUNCTION_KW 141 /* #args + (#kwargs<<8) */ +#define CALL_FUNCTION_VAR_KW 142 /* #args + (#kwargs<<8) */ + +#define SETUP_WITH 143 + +/* Support for opargs more than 16 bits long */ +#define EXTENDED_ARG 145 + +#define SET_ADD 146 +#define MAP_ADD 147 + + +enum cmp_op {PyCmp_LT=Py_LT, PyCmp_LE=Py_LE, PyCmp_EQ=Py_EQ, PyCmp_NE=Py_NE, PyCmp_GT=Py_GT, PyCmp_GE=Py_GE, + PyCmp_IN, PyCmp_NOT_IN, PyCmp_IS, PyCmp_IS_NOT, PyCmp_EXC_MATCH, PyCmp_BAD}; + +#define HAS_ARG(op) ((op) >= HAVE_ARGUMENT) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OPCODE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/osdefs.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,63 @@ +#ifndef Py_OSDEFS_H +#define Py_OSDEFS_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Operating system dependencies */ + +/* Mod by chrish: QNX has WATCOM, but isn't DOS */ +#if !defined(__QNX__) +#if defined(MS_WINDOWS) || defined(__BORLANDC__) || defined(__WATCOMC__) || defined(__DJGPP__) || defined(PYOS_OS2) +#if defined(PYOS_OS2) && defined(PYCC_GCC) +#define MAXPATHLEN 260 +#define SEP '/' +#define ALTSEP '\\' +#else +#define SEP '\\' +#define ALTSEP '/' +#define MAXPATHLEN 256 +#endif +#define DELIM ';' +#endif +#endif + +#ifdef RISCOS +#define SEP '.' +#define MAXPATHLEN 256 +#define DELIM ',' +#endif + + +/* Filename separator */ +#ifndef SEP +#define SEP '/' +#endif + +/* Max pathname length */ +#ifdef __hpux +#include <sys/param.h> +#include <limits.h> +#ifndef PATH_MAX +#define PATH_MAX MAXPATHLEN +#endif +#endif + +#ifndef MAXPATHLEN +#if defined(PATH_MAX) && PATH_MAX > 1024 +#define MAXPATHLEN PATH_MAX +#else +#define MAXPATHLEN 1024 +#endif +#endif + +/* Search path entry delimiter */ +#ifndef DELIM +#define DELIM ':' +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OSDEFS_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/parsetok.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,64 @@ + +/* Parser-tokenizer link interface */ + +#ifndef Py_PARSETOK_H +#define Py_PARSETOK_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int error; + const char *filename; + int lineno; + int offset; + char *text; + int token; + int expected; +} perrdetail; + +#if 0 +#define PyPARSE_YIELD_IS_KEYWORD 0x0001 +#endif + +#define PyPARSE_DONT_IMPLY_DEDENT 0x0002 + +#if 0 +#define PyPARSE_WITH_IS_KEYWORD 0x0003 +#endif + +#define PyPARSE_PRINT_IS_FUNCTION 0x0004 +#define PyPARSE_UNICODE_LITERALS 0x0008 + + + +PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int, + perrdetail *); +PyAPI_FUNC(node *) PyParser_ParseFile (FILE *, const char *, grammar *, int, + char *, char *, perrdetail *); + +PyAPI_FUNC(node *) PyParser_ParseStringFlags(const char *, grammar *, int, + perrdetail *, int); +PyAPI_FUNC(node *) PyParser_ParseFileFlags(FILE *, const char *, grammar *, + int, char *, char *, + perrdetail *, int); +PyAPI_FUNC(node *) PyParser_ParseFileFlagsEx(FILE *, const char *, grammar *, + int, char *, char *, + perrdetail *, int *); + +PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilename(const char *, + const char *, + grammar *, int, + perrdetail *, int); +PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(const char *, + const char *, + grammar *, int, + perrdetail *, int *); + +/* Note that he following function is defined in pythonrun.c not parsetok.c. */ +PyAPI_FUNC(void) PyParser_SetError(perrdetail *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PARSETOK_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/patchlevel.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,43 @@ + +/* Newfangled version identification scheme. + + This scheme was added in Python 1.5.2b2; before that time, only PATCHLEVEL + was available. To test for presence of the scheme, test for + defined(PY_MAJOR_VERSION). + + When the major or minor version changes, the VERSION variable in + configure.ac must also be changed. + + There is also (independent) API version information in modsupport.h. +*/ + +/* Values for PY_RELEASE_LEVEL */ +#define PY_RELEASE_LEVEL_ALPHA 0xA +#define PY_RELEASE_LEVEL_BETA 0xB +#define PY_RELEASE_LEVEL_GAMMA 0xC /* For release candidates */ +#define PY_RELEASE_LEVEL_FINAL 0xF /* Serial should be 0 here */ + /* Higher for patch releases */ + +/* Version parsed out into numeric values */ +/*--start constants--*/ +#define PY_MAJOR_VERSION 2 +#define PY_MINOR_VERSION 7 +#define PY_MICRO_VERSION 12 +#define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_FINAL +#define PY_RELEASE_SERIAL 0 + +/* Version as a string */ +#define PY_VERSION "2.7.12" +/*--end constants--*/ + +/* Subversion Revision number of this file (not of the repository). Empty + since Mercurial migration. */ +#define PY_PATCHLEVEL_REVISION "" + +/* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2. + Use this for numeric comparisons, e.g. #if PY_VERSION_HEX >= ... */ +#define PY_VERSION_HEX ((PY_MAJOR_VERSION << 24) | \ + (PY_MINOR_VERSION << 16) | \ + (PY_MICRO_VERSION << 8) | \ + (PY_RELEASE_LEVEL << 4) | \ + (PY_RELEASE_SERIAL << 0))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pgen.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,18 @@ +#ifndef Py_PGEN_H +#define Py_PGEN_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Parser generator interface */ + +extern grammar *meta_grammar(void); + +struct _node; +extern grammar *pgen(struct _node *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PGEN_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pgenheaders.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,42 @@ +#ifndef Py_PGENHEADERS_H +#define Py_PGENHEADERS_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Include files and extern declarations used by most of the parser. */ + +#include "Python.h" + +PyAPI_FUNC(void) PySys_WriteStdout(const char *format, ...) + Py_GCC_ATTRIBUTE((format(printf, 1, 2))); +PyAPI_FUNC(void) PySys_WriteStderr(const char *format, ...) + Py_GCC_ATTRIBUTE((format(printf, 1, 2))); + +#define addarc _Py_addarc +#define addbit _Py_addbit +#define adddfa _Py_adddfa +#define addfirstsets _Py_addfirstsets +#define addlabel _Py_addlabel +#define addstate _Py_addstate +#define delbitset _Py_delbitset +#define dumptree _Py_dumptree +#define findlabel _Py_findlabel +#define mergebitset _Py_mergebitset +#define meta_grammar _Py_meta_grammar +#define newbitset _Py_newbitset +#define newgrammar _Py_newgrammar +#define pgen _Py_pgen +#define printgrammar _Py_printgrammar +#define printnonterminals _Py_printnonterminals +#define printtree _Py_printtree +#define samebitset _Py_samebitset +#define showtree _Py_showtree +#define tok_dump _Py_tok_dump +#define translatelabels _Py_translatelabels + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PGENHEADERS_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/py_curses.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,176 @@ + +#ifndef Py_CURSES_H +#define Py_CURSES_H + +#ifdef __APPLE__ +/* +** On Mac OS X 10.2 [n]curses.h and stdlib.h use different guards +** against multiple definition of wchar_t. +*/ +#ifdef _BSD_WCHAR_T_DEFINED_ +#define _WCHAR_T +#endif + +/* the following define is necessary for OS X 10.6; without it, the + Apple-supplied ncurses.h sets NCURSES_OPAQUE to 1, and then Python + can't get at the WINDOW flags field. */ +#define NCURSES_OPAQUE 0 +#endif /* __APPLE__ */ + +#ifdef __FreeBSD__ +/* +** On FreeBSD, [n]curses.h and stdlib.h/wchar.h use different guards +** against multiple definition of wchar_t and wint_t. +*/ +#ifdef _XOPEN_SOURCE_EXTENDED +#ifndef __FreeBSD_version +#include <osreldate.h> +#endif +#if __FreeBSD_version >= 500000 +#ifndef __wchar_t +#define __wchar_t +#endif +#ifndef __wint_t +#define __wint_t +#endif +#else +#ifndef _WCHAR_T +#define _WCHAR_T +#endif +#ifndef _WINT_T +#define _WINT_T +#endif +#endif +#endif +#endif + +#ifdef HAVE_NCURSES_H +#include <ncurses.h> +#else +#include <curses.h> +#ifdef HAVE_TERM_H +/* for tigetstr, which is not declared in SysV curses */ +#include <term.h> +#endif +#endif + +#ifdef HAVE_NCURSES_H +/* configure was checking <curses.h>, but we will + use <ncurses.h>, which has all these features. */ +#ifndef WINDOW_HAS_FLAGS +#define WINDOW_HAS_FLAGS 1 +#endif +#ifndef MVWDELCH_IS_EXPRESSION +#define MVWDELCH_IS_EXPRESSION 1 +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define PyCurses_API_pointers 4 + +/* Type declarations */ + +typedef struct { + PyObject_HEAD + WINDOW *win; +} PyCursesWindowObject; + +#define PyCursesWindow_Check(v) (Py_TYPE(v) == &PyCursesWindow_Type) + +#define PyCurses_CAPSULE_NAME "_curses._C_API" + + +#ifdef CURSES_MODULE +/* This section is used when compiling _cursesmodule.c */ + +#else +/* This section is used in modules that use the _cursesmodule API */ + +static void **PyCurses_API; + +#define PyCursesWindow_Type (*(PyTypeObject *) PyCurses_API[0]) +#define PyCursesSetupTermCalled {if (! ((int (*)(void))PyCurses_API[1]) () ) return NULL;} +#define PyCursesInitialised {if (! ((int (*)(void))PyCurses_API[2]) () ) return NULL;} +#define PyCursesInitialisedColor {if (! ((int (*)(void))PyCurses_API[3]) () ) return NULL;} + +#define import_curses() \ + PyCurses_API = (void **)PyCapsule_Import(PyCurses_CAPSULE_NAME, 1); + +#endif + +/* general error messages */ +static char *catchall_ERR = "curses function returned ERR"; +static char *catchall_NULL = "curses function returned NULL"; + +/* Function Prototype Macros - They are ugly but very, very useful. ;-) + + X - function name + TYPE - parameter Type + ERGSTR - format string for construction of the return value + PARSESTR - format string for argument parsing + */ + +#define NoArgNoReturnFunction(X) \ +static PyObject *PyCurses_ ## X (PyObject *self) \ +{ \ + PyCursesInitialised \ + return PyCursesCheckERR(X(), # X); } + +#define NoArgOrFlagNoReturnFunction(X) \ +static PyObject *PyCurses_ ## X (PyObject *self, PyObject *args) \ +{ \ + int flag = 0; \ + PyCursesInitialised \ + switch(PyTuple_Size(args)) { \ + case 0: \ + return PyCursesCheckERR(X(), # X); \ + case 1: \ + if (!PyArg_ParseTuple(args, "i;True(1) or False(0)", &flag)) return NULL; \ + if (flag) return PyCursesCheckERR(X(), # X); \ + else return PyCursesCheckERR(no ## X (), # X); \ + default: \ + PyErr_SetString(PyExc_TypeError, # X " requires 0 or 1 arguments"); \ + return NULL; } } + +#define NoArgReturnIntFunction(X) \ +static PyObject *PyCurses_ ## X (PyObject *self) \ +{ \ + PyCursesInitialised \ + return PyInt_FromLong((long) X()); } + + +#define NoArgReturnStringFunction(X) \ +static PyObject *PyCurses_ ## X (PyObject *self) \ +{ \ + PyCursesInitialised \ + return PyString_FromString(X()); } + +#define NoArgTrueFalseFunction(X) \ +static PyObject *PyCurses_ ## X (PyObject *self) \ +{ \ + PyCursesInitialised \ + if (X () == FALSE) { \ + Py_INCREF(Py_False); \ + return Py_False; \ + } \ + Py_INCREF(Py_True); \ + return Py_True; } + +#define NoArgNoReturnVoidFunction(X) \ +static PyObject *PyCurses_ ## X (PyObject *self) \ +{ \ + PyCursesInitialised \ + X(); \ + Py_INCREF(Py_None); \ + return Py_None; } + +#ifdef __cplusplus +} +#endif + +#endif /* !defined(Py_CURSES_H) */ + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pyarena.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,62 @@ +/* An arena-like memory interface for the compiler. + */ + +#ifndef Py_PYARENA_H +#define Py_PYARENA_H + +#ifdef __cplusplus +extern "C" { +#endif + + typedef struct _arena PyArena; + + /* PyArena_New() and PyArena_Free() create a new arena and free it, + respectively. Once an arena has been created, it can be used + to allocate memory via PyArena_Malloc(). Pointers to PyObject can + also be registered with the arena via PyArena_AddPyObject(), and the + arena will ensure that the PyObjects stay alive at least until + PyArena_Free() is called. When an arena is freed, all the memory it + allocated is freed, the arena releases internal references to registered + PyObject*, and none of its pointers are valid. + XXX (tim) What does "none of its pointers are valid" mean? Does it + XXX mean that pointers previously obtained via PyArena_Malloc() are + XXX no longer valid? (That's clearly true, but not sure that's what + XXX the text is trying to say.) + + PyArena_New() returns an arena pointer. On error, it + returns a negative number and sets an exception. + XXX (tim): Not true. On error, PyArena_New() actually returns NULL, + XXX and looks like it may or may not set an exception (e.g., if the + XXX internal PyList_New(0) returns NULL, PyArena_New() passes that on + XXX and an exception is set; OTOH, if the internal + XXX block_new(DEFAULT_BLOCK_SIZE) returns NULL, that's passed on but + XXX an exception is not set in that case). + */ + PyAPI_FUNC(PyArena *) PyArena_New(void); + PyAPI_FUNC(void) PyArena_Free(PyArena *); + + /* Mostly like malloc(), return the address of a block of memory spanning + * `size` bytes, or return NULL (without setting an exception) if enough + * new memory can't be obtained. Unlike malloc(0), PyArena_Malloc() with + * size=0 does not guarantee to return a unique pointer (the pointer + * returned may equal one or more other pointers obtained from + * PyArena_Malloc()). + * Note that pointers obtained via PyArena_Malloc() must never be passed to + * the system free() or realloc(), or to any of Python's similar memory- + * management functions. PyArena_Malloc()-obtained pointers remain valid + * until PyArena_Free(ar) is called, at which point all pointers obtained + * from the arena `ar` become invalid simultaneously. + */ + PyAPI_FUNC(void *) PyArena_Malloc(PyArena *, size_t size); + + /* This routine isn't a proper arena allocation routine. It takes + * a PyObject* and records it so that it can be DECREFed when the + * arena is freed. + */ + PyAPI_FUNC(int) PyArena_AddPyObject(PyArena *, PyObject *); + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_PYARENA_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pycapsule.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,56 @@ + +/* Capsule objects let you wrap a C "void *" pointer in a Python + object. They're a way of passing data through the Python interpreter + without creating your own custom type. + + Capsules are used for communication between extension modules. + They provide a way for an extension module to export a C interface + to other extension modules, so that extension modules can use the + Python import mechanism to link to one another. + + For more information, please see "c-api/capsule.html" in the + documentation. +*/ + +#ifndef Py_CAPSULE_H +#define Py_CAPSULE_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyCapsule_Type; + +typedef void (*PyCapsule_Destructor)(PyObject *); + +#define PyCapsule_CheckExact(op) (Py_TYPE(op) == &PyCapsule_Type) + + +PyAPI_FUNC(PyObject *) PyCapsule_New( + void *pointer, + const char *name, + PyCapsule_Destructor destructor); + +PyAPI_FUNC(void *) PyCapsule_GetPointer(PyObject *capsule, const char *name); + +PyAPI_FUNC(PyCapsule_Destructor) PyCapsule_GetDestructor(PyObject *capsule); + +PyAPI_FUNC(const char *) PyCapsule_GetName(PyObject *capsule); + +PyAPI_FUNC(void *) PyCapsule_GetContext(PyObject *capsule); + +PyAPI_FUNC(int) PyCapsule_IsValid(PyObject *capsule, const char *name); + +PyAPI_FUNC(int) PyCapsule_SetPointer(PyObject *capsule, void *pointer); + +PyAPI_FUNC(int) PyCapsule_SetDestructor(PyObject *capsule, PyCapsule_Destructor destructor); + +PyAPI_FUNC(int) PyCapsule_SetName(PyObject *capsule, const char *name); + +PyAPI_FUNC(int) PyCapsule_SetContext(PyObject *capsule, void *context); + +PyAPI_FUNC(void *) PyCapsule_Import(const char *name, int no_block); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CAPSULE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pyconfig.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,97 @@ +#if defined(__linux__) +# if defined(__x86_64__) && defined(__LP64__) +# include <x86_64-linux-gnu/python2.7/pyconfig.h> +# elif defined(__x86_64__) && defined(__ILP32__) +# include <x86_64-linux-gnux32/python2.7/pyconfig.h> +# elif defined(__i386__) +# include <i386-linux-gnu/python2.7/pyconfig.h> +# elif defined(__aarch64__) && defined(__AARCH64EL__) +# if defined(__ILP32__) +# include <aarch64_ilp32-linux-gnu/python2.7/pyconfig.h> +# else +# include <aarch64-linux-gnu/python2.7/pyconfig.h> +# endif +# elif defined(__aarch64__) && defined(__AARCH64EB__) +# if defined(__ILP32__) +# include <aarch64_be_ilp32-linux-gnu/python2.7/pyconfig.h> +# else +# include <aarch64_be-linux-gnu/python2.7/pyconfig.h> +# endif +# elif defined(__alpha__) +# include <alpha-linux-gnu/python2.7/pyconfig.h> +# elif defined(__ARM_EABI__) && defined(__ARM_PCS_VFP) +# if defined(__ARMEL__) +# include <arm-linux-gnueabihf/python2.7/pyconfig.h> +# else +# include <armeb-linux-gnueabihf/python2.7/pyconfig.h> +# endif +# elif defined(__ARM_EABI__) && !defined(__ARM_PCS_VFP) +# if defined(__ARMEL__) +# include <arm-linux-gnueabi/python2.7/pyconfig.h> +# else +# include <armeb-linux-gnueabi/python2.7/pyconfig.h> +# endif +# elif defined(__hppa__) +# include <hppa-linux-gnu/python2.7/pyconfig.h> +# elif defined(__ia64__) +# include <ia64-linux-gnu/python2.7/pyconfig.h> +# elif defined(__m68k__) && !defined(__mcoldfire__) +# include <m68k-linux-gnu/python2.7/pyconfig.h> +# elif defined(__mips_hard_float) && defined(_MIPSEL) +# if _MIPS_SIM == _ABIO32 +# include <mipsel-linux-gnu/python2.7/pyconfig.h> +# elif _MIPS_SIM == _ABIN32 +# include <mips64el-linux-gnuabin32/python2.7/pyconfig.h> +# elif _MIPS_SIM == _ABI64 +# include <mips64el-linux-gnuabi64/python2.7/pyconfig.h> +# else +# error unknown multiarch location for pyconfig.h +# endif +# elif defined(__mips_hard_float) +# if _MIPS_SIM == _ABIO32 +# include <mips-linux-gnu/python2.7/pyconfig.h> +# elif _MIPS_SIM == _ABIN32 +# include <mips64-linux-gnuabin32/python2.7/pyconfig.h> +# elif _MIPS_SIM == _ABI64 +# include <mips64-linux-gnuabi64/python2.7/pyconfig.h> +# else +# error unknown multiarch location for pyconfig.h +# endif +# elif defined(__or1k__) +# include <or1k-linux-gnu/python2.7/pyconfig.h> +# elif defined(__powerpc__) && defined(__SPE__) +# include <powerpc-linux-gnuspe/python2.7/pyconfig.h> +# elif defined(__powerpc64__) +# if defined(__LITTLE_ENDIAN__) +# include <powerpc64le-linux-gnu/python2.7/pyconfig.h> +# else +# include <powerpc64-linux-gnu/python2.7/pyconfig.h> +# endif +# elif defined(__powerpc__) +# include <powerpc-linux-gnu/python2.7/pyconfig.h> +# elif defined(__s390x__) +# include <s390x-linux-gnu/python2.7/pyconfig.h> +# elif defined(__s390__) +# include <s390-linux-gnu/python2.7/pyconfig.h> +# elif defined(__sh__) && defined(__LITTLE_ENDIAN__) +# include <sh4-linux-gnu/python2.7/pyconfig.h> +# elif defined(__sparc__) && defined(__arch64__) +# include <sparc64-linux-gnu/python2.7/pyconfig.h> +# elif defined(__sparc__) +# include <sparc-linux-gnu/python2.7/pyconfig.h> +# else +# error unknown multiarch location for pyconfig.h +# endif +#elif defined(__FreeBSD_kernel__) +# if defined(__LP64__) +# include <x86_64-kfreebsd-gnu/python2.7/pyconfig.h> +# elif defined(__i386__) +# include <i386-kfreebsd-gnu/python2.7/pyconfig.h> +# else +# error unknown multiarch location for pyconfig.h +# endif +#elif defined(__gnu_hurd__) +# include <i386-gnu/python2.7/pyconfig.h> +#else +# error unknown multiarch location for pyconfig.h +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pyctype.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,31 @@ +#ifndef PYCTYPE_H +#define PYCTYPE_H + +#define PY_CTF_LOWER 0x01 +#define PY_CTF_UPPER 0x02 +#define PY_CTF_ALPHA (PY_CTF_LOWER|PY_CTF_UPPER) +#define PY_CTF_DIGIT 0x04 +#define PY_CTF_ALNUM (PY_CTF_ALPHA|PY_CTF_DIGIT) +#define PY_CTF_SPACE 0x08 +#define PY_CTF_XDIGIT 0x10 + +PyAPI_DATA(const unsigned int) _Py_ctype_table[256]; + +/* Unlike their C counterparts, the following macros are not meant to + * handle an int with any of the values [EOF, 0-UCHAR_MAX]. The argument + * must be a signed/unsigned char. */ +#define Py_ISLOWER(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_LOWER) +#define Py_ISUPPER(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_UPPER) +#define Py_ISALPHA(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALPHA) +#define Py_ISDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_DIGIT) +#define Py_ISXDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_XDIGIT) +#define Py_ISALNUM(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALNUM) +#define Py_ISSPACE(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_SPACE) + +PyAPI_DATA(const unsigned char) _Py_ctype_tolower[256]; +PyAPI_DATA(const unsigned char) _Py_ctype_toupper[256]; + +#define Py_TOLOWER(c) (_Py_ctype_tolower[Py_CHARMASK(c)]) +#define Py_TOUPPER(c) (_Py_ctype_toupper[Py_CHARMASK(c)]) + +#endif /* !PYCTYPE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pydebug.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,41 @@ + +#ifndef Py_PYDEBUG_H +#define Py_PYDEBUG_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(int) Py_DebugFlag; +PyAPI_DATA(int) Py_VerboseFlag; +PyAPI_DATA(int) Py_InteractiveFlag; +PyAPI_DATA(int) Py_InspectFlag; +PyAPI_DATA(int) Py_OptimizeFlag; +PyAPI_DATA(int) Py_NoSiteFlag; +PyAPI_DATA(int) Py_BytesWarningFlag; +PyAPI_DATA(int) Py_UseClassExceptionsFlag; +PyAPI_DATA(int) Py_FrozenFlag; +PyAPI_DATA(int) Py_TabcheckFlag; +PyAPI_DATA(int) Py_UnicodeFlag; +PyAPI_DATA(int) Py_IgnoreEnvironmentFlag; +PyAPI_DATA(int) Py_DivisionWarningFlag; +PyAPI_DATA(int) Py_DontWriteBytecodeFlag; +PyAPI_DATA(int) Py_NoUserSiteDirectory; +/* _XXX Py_QnewFlag should go away in 3.0. It's true iff -Qnew is passed, + on the command line, and is used in 2.2 by ceval.c to make all "/" divisions + true divisions (which they will be in 3.0). */ +PyAPI_DATA(int) _Py_QnewFlag; +/* Warn about 3.x issues */ +PyAPI_DATA(int) Py_Py3kWarningFlag; +PyAPI_DATA(int) Py_HashRandomizationFlag; + +/* this is a wrapper around getenv() that pays attention to + Py_IgnoreEnvironmentFlag. It should be used for getting variables like + PYTHONPATH and PYTHONHOME from the environment */ +#define Py_GETENV(s) (Py_IgnoreEnvironmentFlag ? NULL : getenv(s)) + +PyAPI_FUNC(void) Py_FatalError(const char *message); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYDEBUG_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pyerrors.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,329 @@ +#ifndef Py_ERRORS_H +#define Py_ERRORS_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Error objects */ + +typedef struct { + PyObject_HEAD + PyObject *dict; + PyObject *args; + PyObject *message; +} PyBaseExceptionObject; + +typedef struct { + PyObject_HEAD + PyObject *dict; + PyObject *args; + PyObject *message; + PyObject *msg; + PyObject *filename; + PyObject *lineno; + PyObject *offset; + PyObject *text; + PyObject *print_file_and_line; +} PySyntaxErrorObject; + +#ifdef Py_USING_UNICODE +typedef struct { + PyObject_HEAD + PyObject *dict; + PyObject *args; + PyObject *message; + PyObject *encoding; + PyObject *object; + Py_ssize_t start; + Py_ssize_t end; + PyObject *reason; +} PyUnicodeErrorObject; +#endif + +typedef struct { + PyObject_HEAD + PyObject *dict; + PyObject *args; + PyObject *message; + PyObject *code; +} PySystemExitObject; + +typedef struct { + PyObject_HEAD + PyObject *dict; + PyObject *args; + PyObject *message; + PyObject *myerrno; + PyObject *strerror; + PyObject *filename; +} PyEnvironmentErrorObject; + +#ifdef MS_WINDOWS +typedef struct { + PyObject_HEAD + PyObject *dict; + PyObject *args; + PyObject *message; + PyObject *myerrno; + PyObject *strerror; + PyObject *filename; + PyObject *winerror; +} PyWindowsErrorObject; +#endif + +/* Error handling definitions */ + +PyAPI_FUNC(void) PyErr_SetNone(PyObject *); +PyAPI_FUNC(void) PyErr_SetObject(PyObject *, PyObject *); +PyAPI_FUNC(void) PyErr_SetString(PyObject *, const char *); +PyAPI_FUNC(PyObject *) PyErr_Occurred(void); +PyAPI_FUNC(void) PyErr_Clear(void); +PyAPI_FUNC(void) PyErr_Fetch(PyObject **, PyObject **, PyObject **); +PyAPI_FUNC(void) PyErr_Restore(PyObject *, PyObject *, PyObject *); + +#ifdef Py_DEBUG +#define _PyErr_OCCURRED() PyErr_Occurred() +#else +#define _PyErr_OCCURRED() (_PyThreadState_Current->curexc_type) +#endif + +/* Error testing and normalization */ +PyAPI_FUNC(int) PyErr_GivenExceptionMatches(PyObject *, PyObject *); +PyAPI_FUNC(int) PyErr_ExceptionMatches(PyObject *); +PyAPI_FUNC(void) PyErr_NormalizeException(PyObject**, PyObject**, PyObject**); +PyAPI_FUNC(void) _PyErr_ReplaceException(PyObject *, PyObject *, PyObject *); + +/* */ + +#define PyExceptionClass_Check(x) \ + (PyClass_Check((x)) || (PyType_Check((x)) && \ + PyType_FastSubclass((PyTypeObject*)(x), Py_TPFLAGS_BASE_EXC_SUBCLASS))) + +#define PyExceptionInstance_Check(x) \ + (PyInstance_Check((x)) || \ + PyType_FastSubclass((x)->ob_type, Py_TPFLAGS_BASE_EXC_SUBCLASS)) + +#define PyExceptionClass_Name(x) \ + (PyClass_Check((x)) \ + ? PyString_AS_STRING(((PyClassObject*)(x))->cl_name) \ + : (char *)(((PyTypeObject*)(x))->tp_name)) + +#define PyExceptionInstance_Class(x) \ + ((PyInstance_Check((x)) \ + ? (PyObject*)((PyInstanceObject*)(x))->in_class \ + : (PyObject*)((x)->ob_type))) + + +/* Predefined exceptions */ + +PyAPI_DATA(PyObject *) PyExc_BaseException; +PyAPI_DATA(PyObject *) PyExc_Exception; +PyAPI_DATA(PyObject *) PyExc_StopIteration; +PyAPI_DATA(PyObject *) PyExc_GeneratorExit; +PyAPI_DATA(PyObject *) PyExc_StandardError; +PyAPI_DATA(PyObject *) PyExc_ArithmeticError; +PyAPI_DATA(PyObject *) PyExc_LookupError; + +PyAPI_DATA(PyObject *) PyExc_AssertionError; +PyAPI_DATA(PyObject *) PyExc_AttributeError; +PyAPI_DATA(PyObject *) PyExc_EOFError; +PyAPI_DATA(PyObject *) PyExc_FloatingPointError; +PyAPI_DATA(PyObject *) PyExc_EnvironmentError; +PyAPI_DATA(PyObject *) PyExc_IOError; +PyAPI_DATA(PyObject *) PyExc_OSError; +PyAPI_DATA(PyObject *) PyExc_ImportError; +PyAPI_DATA(PyObject *) PyExc_IndexError; +PyAPI_DATA(PyObject *) PyExc_KeyError; +PyAPI_DATA(PyObject *) PyExc_KeyboardInterrupt; +PyAPI_DATA(PyObject *) PyExc_MemoryError; +PyAPI_DATA(PyObject *) PyExc_NameError; +PyAPI_DATA(PyObject *) PyExc_OverflowError; +PyAPI_DATA(PyObject *) PyExc_RuntimeError; +PyAPI_DATA(PyObject *) PyExc_NotImplementedError; +PyAPI_DATA(PyObject *) PyExc_SyntaxError; +PyAPI_DATA(PyObject *) PyExc_IndentationError; +PyAPI_DATA(PyObject *) PyExc_TabError; +PyAPI_DATA(PyObject *) PyExc_ReferenceError; +PyAPI_DATA(PyObject *) PyExc_SystemError; +PyAPI_DATA(PyObject *) PyExc_SystemExit; +PyAPI_DATA(PyObject *) PyExc_TypeError; +PyAPI_DATA(PyObject *) PyExc_UnboundLocalError; +PyAPI_DATA(PyObject *) PyExc_UnicodeError; +PyAPI_DATA(PyObject *) PyExc_UnicodeEncodeError; +PyAPI_DATA(PyObject *) PyExc_UnicodeDecodeError; +PyAPI_DATA(PyObject *) PyExc_UnicodeTranslateError; +PyAPI_DATA(PyObject *) PyExc_ValueError; +PyAPI_DATA(PyObject *) PyExc_ZeroDivisionError; +#ifdef MS_WINDOWS +PyAPI_DATA(PyObject *) PyExc_WindowsError; +#endif +#ifdef __VMS +PyAPI_DATA(PyObject *) PyExc_VMSError; +#endif + +PyAPI_DATA(PyObject *) PyExc_BufferError; + +PyAPI_DATA(PyObject *) PyExc_MemoryErrorInst; +PyAPI_DATA(PyObject *) PyExc_RecursionErrorInst; + +/* Predefined warning categories */ +PyAPI_DATA(PyObject *) PyExc_Warning; +PyAPI_DATA(PyObject *) PyExc_UserWarning; +PyAPI_DATA(PyObject *) PyExc_DeprecationWarning; +PyAPI_DATA(PyObject *) PyExc_PendingDeprecationWarning; +PyAPI_DATA(PyObject *) PyExc_SyntaxWarning; +PyAPI_DATA(PyObject *) PyExc_RuntimeWarning; +PyAPI_DATA(PyObject *) PyExc_FutureWarning; +PyAPI_DATA(PyObject *) PyExc_ImportWarning; +PyAPI_DATA(PyObject *) PyExc_UnicodeWarning; +PyAPI_DATA(PyObject *) PyExc_BytesWarning; + + +/* Convenience functions */ + +PyAPI_FUNC(int) PyErr_BadArgument(void); +PyAPI_FUNC(PyObject *) PyErr_NoMemory(void); +PyAPI_FUNC(PyObject *) PyErr_SetFromErrno(PyObject *); +PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithFilenameObject( + PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithFilename( + PyObject *, const char *); +#ifdef MS_WINDOWS +PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithUnicodeFilename( + PyObject *, const Py_UNICODE *); +#endif /* MS_WINDOWS */ + +PyAPI_FUNC(PyObject *) PyErr_Format(PyObject *, const char *, ...) + Py_GCC_ATTRIBUTE((format(printf, 2, 3))); + +#ifdef MS_WINDOWS +PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilenameObject( + int, const char *); +PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilename( + int, const char *); +PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithUnicodeFilename( + int, const Py_UNICODE *); +PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErr(int); +PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithFilenameObject( + PyObject *,int, PyObject *); +PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithFilename( + PyObject *,int, const char *); +PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithUnicodeFilename( + PyObject *,int, const Py_UNICODE *); +PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErr(PyObject *, int); +#endif /* MS_WINDOWS */ + +/* Export the old function so that the existing API remains available: */ +PyAPI_FUNC(void) PyErr_BadInternalCall(void); +PyAPI_FUNC(void) _PyErr_BadInternalCall(const char *filename, int lineno); +/* Mask the old API with a call to the new API for code compiled under + Python 2.0: */ +#define PyErr_BadInternalCall() _PyErr_BadInternalCall(__FILE__, __LINE__) + +/* Function to create a new exception */ +PyAPI_FUNC(PyObject *) PyErr_NewException( + char *name, PyObject *base, PyObject *dict); +PyAPI_FUNC(PyObject *) PyErr_NewExceptionWithDoc( + char *name, char *doc, PyObject *base, PyObject *dict); +PyAPI_FUNC(void) PyErr_WriteUnraisable(PyObject *); + +/* In sigcheck.c or signalmodule.c */ +PyAPI_FUNC(int) PyErr_CheckSignals(void); +PyAPI_FUNC(void) PyErr_SetInterrupt(void); + +/* In signalmodule.c */ +int PySignal_SetWakeupFd(int fd); + +/* Support for adding program text to SyntaxErrors */ +PyAPI_FUNC(void) PyErr_SyntaxLocation(const char *, int); +PyAPI_FUNC(PyObject *) PyErr_ProgramText(const char *, int); + +#ifdef Py_USING_UNICODE +/* The following functions are used to create and modify unicode + exceptions from C */ + +/* create a UnicodeDecodeError object */ +PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_Create( + const char *, const char *, Py_ssize_t, Py_ssize_t, Py_ssize_t, const char *); + +/* create a UnicodeEncodeError object */ +PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_Create( + const char *, const Py_UNICODE *, Py_ssize_t, Py_ssize_t, Py_ssize_t, const char *); + +/* create a UnicodeTranslateError object */ +PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create( + const Py_UNICODE *, Py_ssize_t, Py_ssize_t, Py_ssize_t, const char *); + +/* get the encoding attribute */ +PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetEncoding(PyObject *); +PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetEncoding(PyObject *); + +/* get the object attribute */ +PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetObject(PyObject *); +PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetObject(PyObject *); +PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetObject(PyObject *); + +/* get the value of the start attribute (the int * may not be NULL) + return 0 on success, -1 on failure */ +PyAPI_FUNC(int) PyUnicodeEncodeError_GetStart(PyObject *, Py_ssize_t *); +PyAPI_FUNC(int) PyUnicodeDecodeError_GetStart(PyObject *, Py_ssize_t *); +PyAPI_FUNC(int) PyUnicodeTranslateError_GetStart(PyObject *, Py_ssize_t *); + +/* assign a new value to the start attribute + return 0 on success, -1 on failure */ +PyAPI_FUNC(int) PyUnicodeEncodeError_SetStart(PyObject *, Py_ssize_t); +PyAPI_FUNC(int) PyUnicodeDecodeError_SetStart(PyObject *, Py_ssize_t); +PyAPI_FUNC(int) PyUnicodeTranslateError_SetStart(PyObject *, Py_ssize_t); + +/* get the value of the end attribute (the int *may not be NULL) + return 0 on success, -1 on failure */ +PyAPI_FUNC(int) PyUnicodeEncodeError_GetEnd(PyObject *, Py_ssize_t *); +PyAPI_FUNC(int) PyUnicodeDecodeError_GetEnd(PyObject *, Py_ssize_t *); +PyAPI_FUNC(int) PyUnicodeTranslateError_GetEnd(PyObject *, Py_ssize_t *); + +/* assign a new value to the end attribute + return 0 on success, -1 on failure */ +PyAPI_FUNC(int) PyUnicodeEncodeError_SetEnd(PyObject *, Py_ssize_t); +PyAPI_FUNC(int) PyUnicodeDecodeError_SetEnd(PyObject *, Py_ssize_t); +PyAPI_FUNC(int) PyUnicodeTranslateError_SetEnd(PyObject *, Py_ssize_t); + +/* get the value of the reason attribute */ +PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetReason(PyObject *); +PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetReason(PyObject *); +PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetReason(PyObject *); + +/* assign a new value to the reason attribute + return 0 on success, -1 on failure */ +PyAPI_FUNC(int) PyUnicodeEncodeError_SetReason( + PyObject *, const char *); +PyAPI_FUNC(int) PyUnicodeDecodeError_SetReason( + PyObject *, const char *); +PyAPI_FUNC(int) PyUnicodeTranslateError_SetReason( + PyObject *, const char *); +#endif + + +/* These APIs aren't really part of the error implementation, but + often needed to format error messages; the native C lib APIs are + not available on all platforms, which is why we provide emulations + for those platforms in Python/mysnprintf.c, + WARNING: The return value of snprintf varies across platforms; do + not rely on any particular behavior; eventually the C99 defn may + be reliable. +*/ +#if defined(MS_WIN32) && !defined(HAVE_SNPRINTF) +# define HAVE_SNPRINTF +# define snprintf _snprintf +# define vsnprintf _vsnprintf +#endif + +#include <stdarg.h> +PyAPI_FUNC(int) PyOS_snprintf(char *str, size_t size, const char *format, ...) + Py_GCC_ATTRIBUTE((format(printf, 3, 4))); +PyAPI_FUNC(int) PyOS_vsnprintf(char *str, size_t size, const char *format, va_list va) + Py_GCC_ATTRIBUTE((format(printf, 3, 0))); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_ERRORS_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pyexpat.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,48 @@ +/* Stuff to export relevant 'expat' entry points from pyexpat to other + * parser modules, such as cElementTree. */ + +/* note: you must import expat.h before importing this module! */ + +#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.0" +#define PyExpat_CAPSULE_NAME "pyexpat.expat_CAPI" + +struct PyExpat_CAPI +{ + char* magic; /* set to PyExpat_CAPI_MAGIC */ + int size; /* set to sizeof(struct PyExpat_CAPI) */ + int MAJOR_VERSION; + int MINOR_VERSION; + int MICRO_VERSION; + /* pointers to selected expat functions. add new functions at + the end, if needed */ + const XML_LChar * (*ErrorString)(enum XML_Error code); + enum XML_Error (*GetErrorCode)(XML_Parser parser); + XML_Size (*GetErrorColumnNumber)(XML_Parser parser); + XML_Size (*GetErrorLineNumber)(XML_Parser parser); + enum XML_Status (*Parse)( + XML_Parser parser, const char *s, int len, int isFinal); + XML_Parser (*ParserCreate_MM)( + const XML_Char *encoding, const XML_Memory_Handling_Suite *memsuite, + const XML_Char *namespaceSeparator); + void (*ParserFree)(XML_Parser parser); + void (*SetCharacterDataHandler)( + XML_Parser parser, XML_CharacterDataHandler handler); + void (*SetCommentHandler)( + XML_Parser parser, XML_CommentHandler handler); + void (*SetDefaultHandlerExpand)( + XML_Parser parser, XML_DefaultHandler handler); + void (*SetElementHandler)( + XML_Parser parser, XML_StartElementHandler start, + XML_EndElementHandler end); + void (*SetNamespaceDeclHandler)( + XML_Parser parser, XML_StartNamespaceDeclHandler start, + XML_EndNamespaceDeclHandler end); + void (*SetProcessingInstructionHandler)( + XML_Parser parser, XML_ProcessingInstructionHandler handler); + void (*SetUnknownEncodingHandler)( + XML_Parser parser, XML_UnknownEncodingHandler handler, + void *encodingHandlerData); + void (*SetUserData)(XML_Parser parser, void *userData); + /* always add new stuff to the end! */ +}; +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pyfpe.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,176 @@ +#ifndef Py_PYFPE_H +#define Py_PYFPE_H +#ifdef __cplusplus +extern "C" { +#endif +/* + --------------------------------------------------------------------- + / Copyright (c) 1996. \ + | The Regents of the University of California. | + | All rights reserved. | + | | + | Permission to use, copy, modify, and distribute this software for | + | any purpose without fee is hereby granted, provided that this en- | + | tire notice is included in all copies of any software which is or | + | includes a copy or modification of this software and in all | + | copies of the supporting documentation for such software. | + | | + | This work was produced at the University of California, Lawrence | + | Livermore National Laboratory under contract no. W-7405-ENG-48 | + | between the U.S. Department of Energy and The Regents of the | + | University of California for the operation of UC LLNL. | + | | + | DISCLAIMER | + | | + | This software was prepared as an account of work sponsored by an | + | agency of the United States Government. Neither the United States | + | Government nor the University of California nor any of their em- | + | ployees, makes any warranty, express or implied, or assumes any | + | liability or responsibility for the accuracy, completeness, or | + | usefulness of any information, apparatus, product, or process | + | disclosed, or represents that its use would not infringe | + | privately-owned rights. Reference herein to any specific commer- | + | cial products, process, or service by trade name, trademark, | + | manufacturer, or otherwise, does not necessarily constitute or | + | imply its endorsement, recommendation, or favoring by the United | + | States Government or the University of California. The views and | + | opinions of authors expressed herein do not necessarily state or | + | reflect those of the United States Government or the University | + | of California, and shall not be used for advertising or product | + \ endorsement purposes. / + --------------------------------------------------------------------- +*/ + +/* + * Define macros for handling SIGFPE. + * Lee Busby, LLNL, November, 1996 + * busby1@llnl.gov + * + ********************************************* + * Overview of the system for handling SIGFPE: + * + * This file (Include/pyfpe.h) defines a couple of "wrapper" macros for + * insertion into your Python C code of choice. Their proper use is + * discussed below. The file Python/pyfpe.c defines a pair of global + * variables PyFPE_jbuf and PyFPE_counter which are used by the signal + * handler for SIGFPE to decide if a particular exception was protected + * by the macros. The signal handler itself, and code for enabling the + * generation of SIGFPE in the first place, is in a (new) Python module + * named fpectl. This module is standard in every respect. It can be loaded + * either statically or dynamically as you choose, and like any other + * Python module, has no effect until you import it. + * + * In the general case, there are three steps toward handling SIGFPE in any + * Python code: + * + * 1) Add the *_PROTECT macros to your C code as required to protect + * dangerous floating point sections. + * + * 2) Turn on the inclusion of the code by adding the ``--with-fpectl'' + * flag at the time you run configure. If the fpectl or other modules + * which use the *_PROTECT macros are to be dynamically loaded, be + * sure they are compiled with WANT_SIGFPE_HANDLER defined. + * + * 3) When python is built and running, import fpectl, and execute + * fpectl.turnon_sigfpe(). This sets up the signal handler and enables + * generation of SIGFPE whenever an exception occurs. From this point + * on, any properly trapped SIGFPE should result in the Python + * FloatingPointError exception. + * + * Step 1 has been done already for the Python kernel code, and should be + * done soon for the NumPy array package. Step 2 is usually done once at + * python install time. Python's behavior with respect to SIGFPE is not + * changed unless you also do step 3. Thus you can control this new + * facility at compile time, or run time, or both. + * + ******************************** + * Using the macros in your code: + * + * static PyObject *foobar(PyObject *self,PyObject *args) + * { + * .... + * PyFPE_START_PROTECT("Error in foobar", return 0) + * result = dangerous_op(somearg1, somearg2, ...); + * PyFPE_END_PROTECT(result) + * .... + * } + * + * If a floating point error occurs in dangerous_op, foobar returns 0 (NULL), + * after setting the associated value of the FloatingPointError exception to + * "Error in foobar". ``Dangerous_op'' can be a single operation, or a block + * of code, function calls, or any combination, so long as no alternate + * return is possible before the PyFPE_END_PROTECT macro is reached. + * + * The macros can only be used in a function context where an error return + * can be recognized as signaling a Python exception. (Generally, most + * functions that return a PyObject * will qualify.) + * + * Guido's original design suggestion for PyFPE_START_PROTECT and + * PyFPE_END_PROTECT had them open and close a local block, with a locally + * defined jmp_buf and jmp_buf pointer. This would allow recursive nesting + * of the macros. The Ansi C standard makes it clear that such local + * variables need to be declared with the "volatile" type qualifier to keep + * setjmp from corrupting their values. Some current implementations seem + * to be more restrictive. For example, the HPUX man page for setjmp says + * + * Upon the return from a setjmp() call caused by a longjmp(), the + * values of any non-static local variables belonging to the routine + * from which setjmp() was called are undefined. Code which depends on + * such values is not guaranteed to be portable. + * + * I therefore decided on a more limited form of nesting, using a counter + * variable (PyFPE_counter) to keep track of any recursion. If an exception + * occurs in an ``inner'' pair of macros, the return will apparently + * come from the outermost level. + * + */ + +#ifdef WANT_SIGFPE_HANDLER +#include <signal.h> +#include <setjmp.h> +#include <math.h> +extern jmp_buf PyFPE_jbuf; +extern int PyFPE_counter; +extern double PyFPE_dummy(void *); + +#define PyFPE_START_PROTECT(err_string, leave_stmt) \ +if (!PyFPE_counter++ && setjmp(PyFPE_jbuf)) { \ + PyErr_SetString(PyExc_FloatingPointError, err_string); \ + PyFPE_counter = 0; \ + leave_stmt; \ +} + +/* + * This (following) is a heck of a way to decrement a counter. However, + * unless the macro argument is provided, code optimizers will sometimes move + * this statement so that it gets executed *before* the unsafe expression + * which we're trying to protect. That pretty well messes things up, + * of course. + * + * If the expression(s) you're trying to protect don't happen to return a + * value, you will need to manufacture a dummy result just to preserve the + * correct ordering of statements. Note that the macro passes the address + * of its argument (so you need to give it something which is addressable). + * If your expression returns multiple results, pass the last such result + * to PyFPE_END_PROTECT. + * + * Note that PyFPE_dummy returns a double, which is cast to int. + * This seeming insanity is to tickle the Floating Point Unit (FPU). + * If an exception has occurred in a preceding floating point operation, + * some architectures (notably Intel 80x86) will not deliver the interrupt + * until the *next* floating point operation. This is painful if you've + * already decremented PyFPE_counter. + */ +#define PyFPE_END_PROTECT(v) PyFPE_counter -= (int)PyFPE_dummy(&(v)); + +#else + +#define PyFPE_START_PROTECT(err_string, leave_stmt) +#define PyFPE_END_PROTECT(v) + +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYFPE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pygetopt.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,18 @@ + +#ifndef Py_PYGETOPT_H +#define Py_PYGETOPT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(int) _PyOS_opterr; +PyAPI_DATA(int) _PyOS_optind; +PyAPI_DATA(char *) _PyOS_optarg; + +PyAPI_FUNC(void) _PyOS_ResetGetOpt(void); +PyAPI_FUNC(int) _PyOS_GetOpt(int argc, char **argv, char *optstring); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYGETOPT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pymacconfig.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,102 @@ +#ifndef PYMACCONFIG_H +#define PYMACCONFIG_H + /* + * This file moves some of the autoconf magic to compile-time + * when building on MacOSX. This is needed for building 4-way + * universal binaries and for 64-bit universal binaries because + * the values redefined below aren't configure-time constant but + * only compile-time constant in these scenarios. + */ + +#if defined(__APPLE__) + +# undef SIZEOF_LONG +# undef SIZEOF_PTHREAD_T +# undef SIZEOF_SIZE_T +# undef SIZEOF_TIME_T +# undef SIZEOF_VOID_P +# undef SIZEOF__BOOL +# undef SIZEOF_UINTPTR_T +# undef SIZEOF_PTHREAD_T +# undef WORDS_BIGENDIAN +# undef DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 +# undef DOUBLE_IS_BIG_ENDIAN_IEEE754 +# undef DOUBLE_IS_LITTLE_ENDIAN_IEEE754 +# undef HAVE_GCC_ASM_FOR_X87 + +# undef VA_LIST_IS_ARRAY +# if defined(__LP64__) && defined(__x86_64__) +# define VA_LIST_IS_ARRAY 1 +# endif + +# undef HAVE_LARGEFILE_SUPPORT +# ifndef __LP64__ +# define HAVE_LARGEFILE_SUPPORT 1 +# endif + +# undef SIZEOF_LONG +# ifdef __LP64__ +# define SIZEOF__BOOL 1 +# define SIZEOF__BOOL 1 +# define SIZEOF_LONG 8 +# define SIZEOF_PTHREAD_T 8 +# define SIZEOF_SIZE_T 8 +# define SIZEOF_TIME_T 8 +# define SIZEOF_VOID_P 8 +# define SIZEOF_UINTPTR_T 8 +# define SIZEOF_PTHREAD_T 8 +# else +# ifdef __ppc__ +# define SIZEOF__BOOL 4 +# else +# define SIZEOF__BOOL 1 +# endif +# define SIZEOF_LONG 4 +# define SIZEOF_PTHREAD_T 4 +# define SIZEOF_SIZE_T 4 +# define SIZEOF_TIME_T 4 +# define SIZEOF_VOID_P 4 +# define SIZEOF_UINTPTR_T 4 +# define SIZEOF_PTHREAD_T 4 +# endif + +# if defined(__LP64__) + /* MacOSX 10.4 (the first release to support 64-bit code + * at all) only supports 64-bit in the UNIX layer. + * Therefore surpress the toolbox-glue in 64-bit mode. + */ + + /* In 64-bit mode setpgrp always has no arguments, in 32-bit + * mode that depends on the compilation environment + */ +# undef SETPGRP_HAVE_ARG + +# endif + +#ifdef __BIG_ENDIAN__ +#define WORDS_BIGENDIAN 1 +#define DOUBLE_IS_BIG_ENDIAN_IEEE754 +#else +#define DOUBLE_IS_LITTLE_ENDIAN_IEEE754 +#endif /* __BIG_ENDIAN */ + +#ifdef __i386__ +# define HAVE_GCC_ASM_FOR_X87 +#endif + + /* + * The definition in pyconfig.h is only valid on the OS release + * where configure ran on and not necessarily for all systems where + * the executable can be used on. + * + * Specifically: OSX 10.4 has limited supported for '%zd', while + * 10.5 has full support for '%zd'. A binary built on 10.5 won't + * work properly on 10.4 unless we surpress the definition + * of PY_FORMAT_SIZE_T + */ +#undef PY_FORMAT_SIZE_T + + +#endif /* defined(_APPLE__) */ + +#endif /* PYMACCONFIG_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pymactoolbox.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,217 @@ +/* +** pymactoolbox.h - globals defined in mactoolboxglue.c +*/ +#ifndef Py_PYMACTOOLBOX_H +#define Py_PYMACTOOLBOX_H +#ifdef __cplusplus + extern "C" { +#endif + +#include <Carbon/Carbon.h> + +#ifndef __LP64__ +#include <QuickTime/QuickTime.h> +#endif /* !__LP64__ */ + +/* +** Helper routines for error codes and such. +*/ +char *PyMac_StrError(int); /* strerror with mac errors */ +extern PyObject *PyMac_OSErrException; /* Exception for OSErr */ +PyObject *PyMac_GetOSErrException(void); /* Initialize & return it */ +PyObject *PyErr_Mac(PyObject *, int); /* Exception with a mac error */ +PyObject *PyMac_Error(OSErr); /* Uses PyMac_GetOSErrException */ +#ifndef __LP64__ +extern OSErr PyMac_GetFullPathname(FSSpec *, char *, int); /* convert + fsspec->path */ +#endif /* __LP64__ */ + +/* +** These conversion routines are defined in mactoolboxglue.c itself. +*/ +int PyMac_GetOSType(PyObject *, OSType *); /* argument parser for OSType */ +PyObject *PyMac_BuildOSType(OSType); /* Convert OSType to PyObject */ + +PyObject *PyMac_BuildNumVersion(NumVersion);/* Convert NumVersion to PyObject */ + +int PyMac_GetStr255(PyObject *, Str255); /* argument parser for Str255 */ +PyObject *PyMac_BuildStr255(Str255); /* Convert Str255 to PyObject */ +PyObject *PyMac_BuildOptStr255(Str255); /* Convert Str255 to PyObject, + NULL to None */ + +int PyMac_GetRect(PyObject *, Rect *); /* argument parser for Rect */ +PyObject *PyMac_BuildRect(Rect *); /* Convert Rect to PyObject */ + +int PyMac_GetPoint(PyObject *, Point *); /* argument parser for Point */ +PyObject *PyMac_BuildPoint(Point); /* Convert Point to PyObject */ + +int PyMac_GetEventRecord(PyObject *, EventRecord *); /* argument parser for + EventRecord */ +PyObject *PyMac_BuildEventRecord(EventRecord *); /* Convert EventRecord to + PyObject */ + +int PyMac_GetFixed(PyObject *, Fixed *); /* argument parser for Fixed */ +PyObject *PyMac_BuildFixed(Fixed); /* Convert Fixed to PyObject */ +int PyMac_Getwide(PyObject *, wide *); /* argument parser for wide */ +PyObject *PyMac_Buildwide(wide *); /* Convert wide to PyObject */ + +/* +** The rest of the routines are implemented by extension modules. If they are +** dynamically loaded mactoolboxglue will contain a stub implementation of the +** routine, which imports the module, whereupon the module's init routine will +** communicate the routine pointer back to the stub. +** If USE_TOOLBOX_OBJECT_GLUE is not defined there is no glue code, and the +** extension modules simply declare the routine. This is the case for static +** builds (and could be the case for MacPython CFM builds, because CFM extension +** modules can reference each other without problems). +*/ + +#ifdef USE_TOOLBOX_OBJECT_GLUE +/* +** These macros are used in the module init code. If we use toolbox object glue +** it sets the function pointer to point to the real function. +*/ +#define PyMac_INIT_TOOLBOX_OBJECT_NEW(object, rtn) { \ + extern PyObject *(*PyMacGluePtr_##rtn)(object); \ + PyMacGluePtr_##rtn = _##rtn; \ +} +#define PyMac_INIT_TOOLBOX_OBJECT_CONVERT(object, rtn) { \ + extern int (*PyMacGluePtr_##rtn)(PyObject *, object *); \ + PyMacGluePtr_##rtn = _##rtn; \ +} +#else +/* +** If we don't use toolbox object glue the init macros are empty. Moreover, we define +** _xxx_New to be the same as xxx_New, and the code in mactoolboxglue isn't included. +*/ +#define PyMac_INIT_TOOLBOX_OBJECT_NEW(object, rtn) +#define PyMac_INIT_TOOLBOX_OBJECT_CONVERT(object, rtn) +#endif /* USE_TOOLBOX_OBJECT_GLUE */ + +/* macfs exports */ +#ifndef __LP64__ +int PyMac_GetFSSpec(PyObject *, FSSpec *); /* argument parser for FSSpec */ +PyObject *PyMac_BuildFSSpec(FSSpec *); /* Convert FSSpec to PyObject */ +#endif /* !__LP64__ */ + +int PyMac_GetFSRef(PyObject *, FSRef *); /* argument parser for FSRef */ +PyObject *PyMac_BuildFSRef(FSRef *); /* Convert FSRef to PyObject */ + +/* AE exports */ +extern PyObject *AEDesc_New(AppleEvent *); /* XXXX Why passed by address?? */ +extern PyObject *AEDesc_NewBorrowed(AppleEvent *); +extern int AEDesc_Convert(PyObject *, AppleEvent *); + +/* Cm exports */ +extern PyObject *CmpObj_New(Component); +extern int CmpObj_Convert(PyObject *, Component *); +extern PyObject *CmpInstObj_New(ComponentInstance); +extern int CmpInstObj_Convert(PyObject *, ComponentInstance *); + +/* Ctl exports */ +#ifndef __LP64__ +extern PyObject *CtlObj_New(ControlHandle); +extern int CtlObj_Convert(PyObject *, ControlHandle *); +#endif /* !__LP64__ */ + +/* Dlg exports */ +#ifndef __LP64__ +extern PyObject *DlgObj_New(DialogPtr); +extern int DlgObj_Convert(PyObject *, DialogPtr *); +extern PyObject *DlgObj_WhichDialog(DialogPtr); +#endif /* !__LP64__ */ + +/* Drag exports */ +#ifndef __LP64__ +extern PyObject *DragObj_New(DragReference); +extern int DragObj_Convert(PyObject *, DragReference *); +#endif /* !__LP64__ */ + +/* List exports */ +#ifndef __LP64__ +extern PyObject *ListObj_New(ListHandle); +extern int ListObj_Convert(PyObject *, ListHandle *); +#endif /* !__LP64__ */ + +/* Menu exports */ +#ifndef __LP64__ +extern PyObject *MenuObj_New(MenuHandle); +extern int MenuObj_Convert(PyObject *, MenuHandle *); +#endif /* !__LP64__ */ + +/* Qd exports */ +#ifndef __LP64__ +extern PyObject *GrafObj_New(GrafPtr); +extern int GrafObj_Convert(PyObject *, GrafPtr *); +extern PyObject *BMObj_New(BitMapPtr); +extern int BMObj_Convert(PyObject *, BitMapPtr *); +extern PyObject *QdRGB_New(RGBColor *); +extern int QdRGB_Convert(PyObject *, RGBColor *); +#endif /* !__LP64__ */ + +/* Qdoffs exports */ +#ifndef __LP64__ +extern PyObject *GWorldObj_New(GWorldPtr); +extern int GWorldObj_Convert(PyObject *, GWorldPtr *); +#endif /* !__LP64__ */ + +/* Qt exports */ +#ifndef __LP64__ +extern PyObject *TrackObj_New(Track); +extern int TrackObj_Convert(PyObject *, Track *); +extern PyObject *MovieObj_New(Movie); +extern int MovieObj_Convert(PyObject *, Movie *); +extern PyObject *MovieCtlObj_New(MovieController); +extern int MovieCtlObj_Convert(PyObject *, MovieController *); +extern PyObject *TimeBaseObj_New(TimeBase); +extern int TimeBaseObj_Convert(PyObject *, TimeBase *); +extern PyObject *UserDataObj_New(UserData); +extern int UserDataObj_Convert(PyObject *, UserData *); +extern PyObject *MediaObj_New(Media); +extern int MediaObj_Convert(PyObject *, Media *); +#endif /* !__LP64__ */ + +/* Res exports */ +extern PyObject *ResObj_New(Handle); +extern int ResObj_Convert(PyObject *, Handle *); +extern PyObject *OptResObj_New(Handle); +extern int OptResObj_Convert(PyObject *, Handle *); + +/* TE exports */ +#ifndef __LP64__ +extern PyObject *TEObj_New(TEHandle); +extern int TEObj_Convert(PyObject *, TEHandle *); +#endif /* !__LP64__ */ + +/* Win exports */ +#ifndef __LP64__ +extern PyObject *WinObj_New(WindowPtr); +extern int WinObj_Convert(PyObject *, WindowPtr *); +extern PyObject *WinObj_WhichWindow(WindowPtr); +#endif /* !__LP64__ */ + +/* CF exports */ +extern PyObject *CFObj_New(CFTypeRef); +extern int CFObj_Convert(PyObject *, CFTypeRef *); +extern PyObject *CFTypeRefObj_New(CFTypeRef); +extern int CFTypeRefObj_Convert(PyObject *, CFTypeRef *); +extern PyObject *CFStringRefObj_New(CFStringRef); +extern int CFStringRefObj_Convert(PyObject *, CFStringRef *); +extern PyObject *CFMutableStringRefObj_New(CFMutableStringRef); +extern int CFMutableStringRefObj_Convert(PyObject *, CFMutableStringRef *); +extern PyObject *CFArrayRefObj_New(CFArrayRef); +extern int CFArrayRefObj_Convert(PyObject *, CFArrayRef *); +extern PyObject *CFMutableArrayRefObj_New(CFMutableArrayRef); +extern int CFMutableArrayRefObj_Convert(PyObject *, CFMutableArrayRef *); +extern PyObject *CFDictionaryRefObj_New(CFDictionaryRef); +extern int CFDictionaryRefObj_Convert(PyObject *, CFDictionaryRef *); +extern PyObject *CFMutableDictionaryRefObj_New(CFMutableDictionaryRef); +extern int CFMutableDictionaryRefObj_Convert(PyObject *, CFMutableDictionaryRef *); +extern PyObject *CFURLRefObj_New(CFURLRef); +extern int CFURLRefObj_Convert(PyObject *, CFURLRef *); +extern int OptionalCFURLRefObj_Convert(PyObject *, CFURLRef *); + +#ifdef __cplusplus + } +#endif +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pymath.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,214 @@ +#ifndef Py_PYMATH_H +#define Py_PYMATH_H + +#include "pyconfig.h" /* include for defines */ + +/************************************************************************** +Symbols and macros to supply platform-independent interfaces to mathematical +functions and constants +**************************************************************************/ + +/* Python provides implementations for copysign, round and hypot in + * Python/pymath.c just in case your math library doesn't provide the + * functions. + * + *Note: PC/pyconfig.h defines copysign as _copysign + */ +#ifndef HAVE_COPYSIGN +extern double copysign(double, double); +#endif + +#ifndef HAVE_ROUND +extern double round(double); +#endif + +#ifndef HAVE_HYPOT +extern double hypot(double, double); +#endif + +/* extra declarations */ +#ifndef _MSC_VER +#ifndef __STDC__ +extern double fmod (double, double); +extern double frexp (double, int *); +extern double ldexp (double, int); +extern double modf (double, double *); +extern double pow(double, double); +#endif /* __STDC__ */ +#endif /* _MSC_VER */ + +#ifdef _OSF_SOURCE +/* OSF1 5.1 doesn't make these available with XOPEN_SOURCE_EXTENDED defined */ +extern int finite(double); +extern double copysign(double, double); +#endif + +/* High precision defintion of pi and e (Euler) + * The values are taken from libc6's math.h. + */ +#ifndef Py_MATH_PIl +#define Py_MATH_PIl 3.1415926535897932384626433832795029L +#endif +#ifndef Py_MATH_PI +#define Py_MATH_PI 3.14159265358979323846 +#endif + +#ifndef Py_MATH_El +#define Py_MATH_El 2.7182818284590452353602874713526625L +#endif + +#ifndef Py_MATH_E +#define Py_MATH_E 2.7182818284590452354 +#endif + +/* On x86, Py_FORCE_DOUBLE forces a floating-point number out of an x87 FPU + register and into a 64-bit memory location, rounding from extended + precision to double precision in the process. On other platforms it does + nothing. */ + +/* we take double rounding as evidence of x87 usage */ +#ifndef Py_FORCE_DOUBLE +# ifdef X87_DOUBLE_ROUNDING +PyAPI_FUNC(double) _Py_force_double(double); +# define Py_FORCE_DOUBLE(X) (_Py_force_double(X)) +# else +# define Py_FORCE_DOUBLE(X) (X) +# endif +#endif + +#ifdef HAVE_GCC_ASM_FOR_X87 +PyAPI_FUNC(unsigned short) _Py_get_387controlword(void); +PyAPI_FUNC(void) _Py_set_387controlword(unsigned short); +#endif + +/* Py_IS_NAN(X) + * Return 1 if float or double arg is a NaN, else 0. + * Caution: + * X is evaluated more than once. + * This may not work on all platforms. Each platform has *some* + * way to spell this, though -- override in pyconfig.h if you have + * a platform where it doesn't work. + * Note: PC/pyconfig.h defines Py_IS_NAN as _isnan + */ +#ifndef Py_IS_NAN +#if defined HAVE_DECL_ISNAN && HAVE_DECL_ISNAN == 1 +#define Py_IS_NAN(X) isnan(X) +#else +#define Py_IS_NAN(X) ((X) != (X)) +#endif +#endif + +/* Py_IS_INFINITY(X) + * Return 1 if float or double arg is an infinity, else 0. + * Caution: + * X is evaluated more than once. + * This implementation may set the underflow flag if |X| is very small; + * it really can't be implemented correctly (& easily) before C99. + * Override in pyconfig.h if you have a better spelling on your platform. + * Py_FORCE_DOUBLE is used to avoid getting false negatives from a + * non-infinite value v sitting in an 80-bit x87 register such that + * v becomes infinite when spilled from the register to 64-bit memory. + * Note: PC/pyconfig.h defines Py_IS_INFINITY as _isinf + */ +#ifndef Py_IS_INFINITY +# if defined HAVE_DECL_ISINF && HAVE_DECL_ISINF == 1 +# define Py_IS_INFINITY(X) isinf(X) +# else +# define Py_IS_INFINITY(X) ((X) && \ + (Py_FORCE_DOUBLE(X)*0.5 == Py_FORCE_DOUBLE(X))) +# endif +#endif + +/* Py_IS_FINITE(X) + * Return 1 if float or double arg is neither infinite nor NAN, else 0. + * Some compilers (e.g. VisualStudio) have intrisics for this, so a special + * macro for this particular test is useful + * Note: PC/pyconfig.h defines Py_IS_FINITE as _finite + */ +#ifndef Py_IS_FINITE +#if defined HAVE_DECL_ISFINITE && HAVE_DECL_ISFINITE == 1 +#define Py_IS_FINITE(X) isfinite(X) +#elif defined HAVE_FINITE +#define Py_IS_FINITE(X) finite(X) +#else +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif +#endif + +/* HUGE_VAL is supposed to expand to a positive double infinity. Python + * uses Py_HUGE_VAL instead because some platforms are broken in this + * respect. We used to embed code in pyport.h to try to worm around that, + * but different platforms are broken in conflicting ways. If you're on + * a platform where HUGE_VAL is defined incorrectly, fiddle your Python + * config to #define Py_HUGE_VAL to something that works on your platform. + */ +#ifndef Py_HUGE_VAL +#define Py_HUGE_VAL HUGE_VAL +#endif + +/* Py_NAN + * A value that evaluates to a NaN. On IEEE 754 platforms INF*0 or + * INF/INF works. Define Py_NO_NAN in pyconfig.h if your platform + * doesn't support NaNs. + */ +#if !defined(Py_NAN) && !defined(Py_NO_NAN) +#if !defined(__INTEL_COMPILER) + #define Py_NAN (Py_HUGE_VAL * 0.) +#else /* __INTEL_COMPILER */ + #if defined(ICC_NAN_STRICT) + #pragma float_control(push) + #pragma float_control(precise, on) + #pragma float_control(except, on) + #if defined(_MSC_VER) + __declspec(noinline) + #else /* Linux */ + __attribute__((noinline)) + #endif /* _MSC_VER */ + static double __icc_nan() + { + return sqrt(-1.0); + } + #pragma float_control (pop) + #define Py_NAN __icc_nan() + #else /* ICC_NAN_RELAXED as default for Intel Compiler */ + static union { unsigned char buf[8]; double __icc_nan; } __nan_store = {0,0,0,0,0,0,0xf8,0x7f}; + #define Py_NAN (__nan_store.__icc_nan) + #endif /* ICC_NAN_STRICT */ +#endif /* __INTEL_COMPILER */ +#endif + +/* Py_OVERFLOWED(X) + * Return 1 iff a libm function overflowed. Set errno to 0 before calling + * a libm function, and invoke this macro after, passing the function + * result. + * Caution: + * This isn't reliable. C99 no longer requires libm to set errno under + * any exceptional condition, but does require +- HUGE_VAL return + * values on overflow. A 754 box *probably* maps HUGE_VAL to a + * double infinity, and we're cool if that's so, unless the input + * was an infinity and an infinity is the expected result. A C89 + * system sets errno to ERANGE, so we check for that too. We're + * out of luck if a C99 754 box doesn't map HUGE_VAL to +Inf, or + * if the returned result is a NaN, or if a C89 box returns HUGE_VAL + * in non-overflow cases. + * X is evaluated more than once. + * Some platforms have better way to spell this, so expect some #ifdef'ery. + * + * OpenBSD uses 'isinf()' because a compiler bug on that platform causes + * the longer macro version to be mis-compiled. This isn't optimal, and + * should be removed once a newer compiler is available on that platform. + * The system that had the failure was running OpenBSD 3.2 on Intel, with + * gcc 2.95.3. + * + * According to Tim's checkin, the FreeBSD systems use isinf() to work + * around a FPE bug on that platform. + */ +#if defined(__FreeBSD__) || defined(__OpenBSD__) +#define Py_OVERFLOWED(X) isinf(X) +#else +#define Py_OVERFLOWED(X) ((X) != 0.0 && (errno == ERANGE || \ + (X) == Py_HUGE_VAL || \ + (X) == -Py_HUGE_VAL)) +#endif + +#endif /* Py_PYMATH_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pymem.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,122 @@ +/* The PyMem_ family: low-level memory allocation interfaces. + See objimpl.h for the PyObject_ memory family. +*/ + +#ifndef Py_PYMEM_H +#define Py_PYMEM_H + +#include "pyport.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* BEWARE: + + Each interface exports both functions and macros. Extension modules should + use the functions, to ensure binary compatibility across Python versions. + Because the Python implementation is free to change internal details, and + the macros may (or may not) expose details for speed, if you do use the + macros you must recompile your extensions with each Python release. + + Never mix calls to PyMem_ with calls to the platform malloc/realloc/ + calloc/free. For example, on Windows different DLLs may end up using + different heaps, and if you use PyMem_Malloc you'll get the memory from the + heap used by the Python DLL; it could be a disaster if you free()'ed that + directly in your own extension. Using PyMem_Free instead ensures Python + can return the memory to the proper heap. As another example, in + PYMALLOC_DEBUG mode, Python wraps all calls to all PyMem_ and PyObject_ + memory functions in special debugging wrappers that add additional + debugging info to dynamic memory blocks. The system routines have no idea + what to do with that stuff, and the Python wrappers have no idea what to do + with raw blocks obtained directly by the system routines then. + + The GIL must be held when using these APIs. +*/ + +/* + * Raw memory interface + * ==================== + */ + +/* Functions + + Functions supplying platform-independent semantics for malloc/realloc/ + free. These functions make sure that allocating 0 bytes returns a distinct + non-NULL pointer (whenever possible -- if we're flat out of memory, NULL + may be returned), even if the platform malloc and realloc don't. + Returned pointers must be checked for NULL explicitly. No action is + performed on failure (no exception is set, no warning is printed, etc). +*/ + +PyAPI_FUNC(void *) PyMem_Malloc(size_t); +PyAPI_FUNC(void *) PyMem_Realloc(void *, size_t); +PyAPI_FUNC(void) PyMem_Free(void *); + +/* Starting from Python 1.6, the wrappers Py_{Malloc,Realloc,Free} are + no longer supported. They used to call PyErr_NoMemory() on failure. */ + +/* Macros. */ +#ifdef PYMALLOC_DEBUG +/* Redirect all memory operations to Python's debugging allocator. */ +#define PyMem_MALLOC _PyMem_DebugMalloc +#define PyMem_REALLOC _PyMem_DebugRealloc +#define PyMem_FREE _PyMem_DebugFree + +#else /* ! PYMALLOC_DEBUG */ + +/* PyMem_MALLOC(0) means malloc(1). Some systems would return NULL + for malloc(0), which would be treated as an error. Some platforms + would return a pointer with no memory behind it, which would break + pymalloc. To solve these problems, allocate an extra byte. */ +/* Returns NULL to indicate error if a negative size or size larger than + Py_ssize_t can represent is supplied. Helps prevents security holes. */ +#define PyMem_MALLOC(n) ((size_t)(n) > (size_t)PY_SSIZE_T_MAX ? NULL \ + : malloc((n) ? (n) : 1)) +#define PyMem_REALLOC(p, n) ((size_t)(n) > (size_t)PY_SSIZE_T_MAX ? NULL \ + : realloc((p), (n) ? (n) : 1)) +#define PyMem_FREE free + +#endif /* PYMALLOC_DEBUG */ + +/* + * Type-oriented memory interface + * ============================== + * + * Allocate memory for n objects of the given type. Returns a new pointer + * or NULL if the request was too large or memory allocation failed. Use + * these macros rather than doing the multiplication yourself so that proper + * overflow checking is always done. + */ + +#define PyMem_New(type, n) \ + ( ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ + ( (type *) PyMem_Malloc((n) * sizeof(type)) ) ) +#define PyMem_NEW(type, n) \ + ( ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ + ( (type *) PyMem_MALLOC((n) * sizeof(type)) ) ) + +/* + * The value of (p) is always clobbered by this macro regardless of success. + * The caller MUST check if (p) is NULL afterwards and deal with the memory + * error if so. This means the original value of (p) MUST be saved for the + * caller's memory error handler to not lose track of it. + */ +#define PyMem_Resize(p, type, n) \ + ( (p) = ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ + (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) +#define PyMem_RESIZE(p, type, n) \ + ( (p) = ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ + (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) + +/* PyMem{Del,DEL} are left over from ancient days, and shouldn't be used + * anymore. They're just confusing aliases for PyMem_{Free,FREE} now. + */ +#define PyMem_Del PyMem_Free +#define PyMem_DEL PyMem_FREE + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_PYMEM_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pyport.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,941 @@ +#ifndef Py_PYPORT_H +#define Py_PYPORT_H + +#include "pyconfig.h" /* include for defines */ + +/* Some versions of HP-UX & Solaris need inttypes.h for int32_t, + INT32_MAX, etc. */ +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif + +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif + +/************************************************************************** +Symbols and macros to supply platform-independent interfaces to basic +C language & library operations whose spellings vary across platforms. + +Please try to make documentation here as clear as possible: by definition, +the stuff here is trying to illuminate C's darkest corners. + +Config #defines referenced here: + +SIGNED_RIGHT_SHIFT_ZERO_FILLS +Meaning: To be defined iff i>>j does not extend the sign bit when i is a + signed integral type and i < 0. +Used in: Py_ARITHMETIC_RIGHT_SHIFT + +Py_DEBUG +Meaning: Extra checks compiled in for debug mode. +Used in: Py_SAFE_DOWNCAST + +HAVE_UINTPTR_T +Meaning: The C9X type uintptr_t is supported by the compiler +Used in: Py_uintptr_t + +HAVE_LONG_LONG +Meaning: The compiler supports the C type "long long" +Used in: PY_LONG_LONG + +**************************************************************************/ + + +/* For backward compatibility only. Obsolete, do not use. */ +#ifdef HAVE_PROTOTYPES +#define Py_PROTO(x) x +#else +#define Py_PROTO(x) () +#endif +#ifndef Py_FPROTO +#define Py_FPROTO(x) Py_PROTO(x) +#endif + +/* typedefs for some C9X-defined synonyms for integral types. + * + * The names in Python are exactly the same as the C9X names, except with a + * Py_ prefix. Until C9X is universally implemented, this is the only way + * to ensure that Python gets reliable names that don't conflict with names + * in non-Python code that are playing their own tricks to define the C9X + * names. + * + * NOTE: don't go nuts here! Python has no use for *most* of the C9X + * integral synonyms. Only define the ones we actually need. + */ + +#ifdef HAVE_LONG_LONG +#ifndef PY_LONG_LONG +#define PY_LONG_LONG long long +#if defined(LLONG_MAX) +/* If LLONG_MAX is defined in limits.h, use that. */ +#define PY_LLONG_MIN LLONG_MIN +#define PY_LLONG_MAX LLONG_MAX +#define PY_ULLONG_MAX ULLONG_MAX +#elif defined(__LONG_LONG_MAX__) +/* Otherwise, if GCC has a builtin define, use that. */ +#define PY_LLONG_MAX __LONG_LONG_MAX__ +#define PY_LLONG_MIN (-PY_LLONG_MAX-1) +#define PY_ULLONG_MAX (__LONG_LONG_MAX__*2ULL + 1ULL) +#else +/* Otherwise, rely on two's complement. */ +#define PY_ULLONG_MAX (~0ULL) +#define PY_LLONG_MAX ((long long)(PY_ULLONG_MAX>>1)) +#define PY_LLONG_MIN (-PY_LLONG_MAX-1) +#endif /* LLONG_MAX */ +#endif +#endif /* HAVE_LONG_LONG */ + +/* a build with 30-bit digits for Python long integers needs an exact-width + * 32-bit unsigned integer type to store those digits. (We could just use + * type 'unsigned long', but that would be wasteful on a system where longs + * are 64-bits.) On Unix systems, the autoconf macro AC_TYPE_UINT32_T defines + * uint32_t to be such a type unless stdint.h or inttypes.h defines uint32_t. + * However, it doesn't set HAVE_UINT32_T, so we do that here. + */ +#ifdef uint32_t +#define HAVE_UINT32_T 1 +#endif + +#ifdef HAVE_UINT32_T +#ifndef PY_UINT32_T +#define PY_UINT32_T uint32_t +#endif +#endif + +/* Macros for a 64-bit unsigned integer type; used for type 'twodigits' in the + * long integer implementation, when 30-bit digits are enabled. + */ +#ifdef uint64_t +#define HAVE_UINT64_T 1 +#endif + +#ifdef HAVE_UINT64_T +#ifndef PY_UINT64_T +#define PY_UINT64_T uint64_t +#endif +#endif + +/* Signed variants of the above */ +#ifdef int32_t +#define HAVE_INT32_T 1 +#endif + +#ifdef HAVE_INT32_T +#ifndef PY_INT32_T +#define PY_INT32_T int32_t +#endif +#endif + +#ifdef int64_t +#define HAVE_INT64_T 1 +#endif + +#ifdef HAVE_INT64_T +#ifndef PY_INT64_T +#define PY_INT64_T int64_t +#endif +#endif + +/* If PYLONG_BITS_IN_DIGIT is not defined then we'll use 30-bit digits if all + the necessary integer types are available, and we're on a 64-bit platform + (as determined by SIZEOF_VOID_P); otherwise we use 15-bit digits. */ + +#ifndef PYLONG_BITS_IN_DIGIT +#if (defined HAVE_UINT64_T && defined HAVE_INT64_T && \ + defined HAVE_UINT32_T && defined HAVE_INT32_T && SIZEOF_VOID_P >= 8) +#define PYLONG_BITS_IN_DIGIT 30 +#else +#define PYLONG_BITS_IN_DIGIT 15 +#endif +#endif + +/* uintptr_t is the C9X name for an unsigned integral type such that a + * legitimate void* can be cast to uintptr_t and then back to void* again + * without loss of information. Similarly for intptr_t, wrt a signed + * integral type. + */ +#ifdef HAVE_UINTPTR_T +typedef uintptr_t Py_uintptr_t; +typedef intptr_t Py_intptr_t; + +#elif SIZEOF_VOID_P <= SIZEOF_INT +typedef unsigned int Py_uintptr_t; +typedef int Py_intptr_t; + +#elif SIZEOF_VOID_P <= SIZEOF_LONG +typedef unsigned long Py_uintptr_t; +typedef long Py_intptr_t; + +#elif defined(HAVE_LONG_LONG) && (SIZEOF_VOID_P <= SIZEOF_LONG_LONG) +typedef unsigned PY_LONG_LONG Py_uintptr_t; +typedef PY_LONG_LONG Py_intptr_t; + +#else +# error "Python needs a typedef for Py_uintptr_t in pyport.h." +#endif /* HAVE_UINTPTR_T */ + +/* Py_ssize_t is a signed integral type such that sizeof(Py_ssize_t) == + * sizeof(size_t). C99 doesn't define such a thing directly (size_t is an + * unsigned integral type). See PEP 353 for details. + */ +#ifdef HAVE_SSIZE_T +typedef ssize_t Py_ssize_t; +#elif SIZEOF_VOID_P == SIZEOF_SIZE_T +typedef Py_intptr_t Py_ssize_t; +#else +# error "Python needs a typedef for Py_ssize_t in pyport.h." +#endif + +/* Largest possible value of size_t. + SIZE_MAX is part of C99, so it might be defined on some + platforms. If it is not defined, (size_t)-1 is a portable + definition for C89, due to the way signed->unsigned + conversion is defined. */ +#ifdef SIZE_MAX +#define PY_SIZE_MAX SIZE_MAX +#else +#define PY_SIZE_MAX ((size_t)-1) +#endif + +/* Largest positive value of type Py_ssize_t. */ +#define PY_SSIZE_T_MAX ((Py_ssize_t)(((size_t)-1)>>1)) +/* Smallest negative value of type Py_ssize_t. */ +#define PY_SSIZE_T_MIN (-PY_SSIZE_T_MAX-1) + +#if SIZEOF_PID_T > SIZEOF_LONG +# error "Python doesn't support sizeof(pid_t) > sizeof(long)" +#endif + +/* PY_FORMAT_SIZE_T is a platform-specific modifier for use in a printf + * format to convert an argument with the width of a size_t or Py_ssize_t. + * C99 introduced "z" for this purpose, but not all platforms support that; + * e.g., MS compilers use "I" instead. + * + * These "high level" Python format functions interpret "z" correctly on + * all platforms (Python interprets the format string itself, and does whatever + * the platform C requires to convert a size_t/Py_ssize_t argument): + * + * PyString_FromFormat + * PyErr_Format + * PyString_FromFormatV + * + * Lower-level uses require that you interpolate the correct format modifier + * yourself (e.g., calling printf, fprintf, sprintf, PyOS_snprintf); for + * example, + * + * Py_ssize_t index; + * fprintf(stderr, "index %" PY_FORMAT_SIZE_T "d sucks\n", index); + * + * That will expand to %ld, or %Id, or to something else correct for a + * Py_ssize_t on the platform. + */ +#ifndef PY_FORMAT_SIZE_T +# if SIZEOF_SIZE_T == SIZEOF_INT && !defined(__APPLE__) +# define PY_FORMAT_SIZE_T "" +# elif SIZEOF_SIZE_T == SIZEOF_LONG +# define PY_FORMAT_SIZE_T "l" +# elif defined(MS_WINDOWS) +# define PY_FORMAT_SIZE_T "I" +# else +# error "This platform's pyconfig.h needs to define PY_FORMAT_SIZE_T" +# endif +#endif + +/* PY_FORMAT_LONG_LONG is analogous to PY_FORMAT_SIZE_T above, but for + * the long long type instead of the size_t type. It's only available + * when HAVE_LONG_LONG is defined. The "high level" Python format + * functions listed above will interpret "lld" or "llu" correctly on + * all platforms. + */ +#ifdef HAVE_LONG_LONG +# ifndef PY_FORMAT_LONG_LONG +# if defined(MS_WIN64) || defined(MS_WINDOWS) +# define PY_FORMAT_LONG_LONG "I64" +# else +# error "This platform's pyconfig.h needs to define PY_FORMAT_LONG_LONG" +# endif +# endif +#endif + +/* Py_LOCAL can be used instead of static to get the fastest possible calling + * convention for functions that are local to a given module. + * + * Py_LOCAL_INLINE does the same thing, and also explicitly requests inlining, + * for platforms that support that. + * + * If PY_LOCAL_AGGRESSIVE is defined before python.h is included, more + * "aggressive" inlining/optimizaion is enabled for the entire module. This + * may lead to code bloat, and may slow things down for those reasons. It may + * also lead to errors, if the code relies on pointer aliasing. Use with + * care. + * + * NOTE: You can only use this for functions that are entirely local to a + * module; functions that are exported via method tables, callbacks, etc, + * should keep using static. + */ + +#undef USE_INLINE /* XXX - set via configure? */ + +#if defined(_MSC_VER) +#if defined(PY_LOCAL_AGGRESSIVE) +/* enable more aggressive optimization for visual studio */ +#pragma optimize("agtw", on) +#endif +/* ignore warnings if the compiler decides not to inline a function */ +#pragma warning(disable: 4710) +/* fastest possible local call under MSVC */ +#define Py_LOCAL(type) static type __fastcall +#define Py_LOCAL_INLINE(type) static __inline type __fastcall +#elif defined(USE_INLINE) +#define Py_LOCAL(type) static type +#define Py_LOCAL_INLINE(type) static inline type +#else +#define Py_LOCAL(type) static type +#define Py_LOCAL_INLINE(type) static type +#endif + +/* Py_MEMCPY can be used instead of memcpy in cases where the copied blocks + * are often very short. While most platforms have highly optimized code for + * large transfers, the setup costs for memcpy are often quite high. MEMCPY + * solves this by doing short copies "in line". + */ + +#if defined(_MSC_VER) +#define Py_MEMCPY(target, source, length) do { \ + size_t i_, n_ = (length); \ + char *t_ = (void*) (target); \ + const char *s_ = (void*) (source); \ + if (n_ >= 16) \ + memcpy(t_, s_, n_); \ + else \ + for (i_ = 0; i_ < n_; i_++) \ + t_[i_] = s_[i_]; \ + } while (0) +#else +#define Py_MEMCPY memcpy +#endif + +#include <stdlib.h> + +#ifdef HAVE_IEEEFP_H +#include <ieeefp.h> /* needed for 'finite' declaration on some platforms */ +#endif + +#include <math.h> /* Moved here from the math section, before extern "C" */ + +/******************************************** + * WRAPPER FOR <time.h> and/or <sys/time.h> * + ********************************************/ + +#ifdef TIME_WITH_SYS_TIME +#include <sys/time.h> +#include <time.h> +#else /* !TIME_WITH_SYS_TIME */ +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#else /* !HAVE_SYS_TIME_H */ +#include <time.h> +#endif /* !HAVE_SYS_TIME_H */ +#endif /* !TIME_WITH_SYS_TIME */ + + +/****************************** + * WRAPPER FOR <sys/select.h> * + ******************************/ + +/* NB caller must include <sys/types.h> */ + +#ifdef HAVE_SYS_SELECT_H + +#include <sys/select.h> + +#endif /* !HAVE_SYS_SELECT_H */ + +/******************************* + * stat() and fstat() fiddling * + *******************************/ + +/* We expect that stat and fstat exist on most systems. + * It's confirmed on Unix, Mac and Windows. + * If you don't have them, add + * #define DONT_HAVE_STAT + * and/or + * #define DONT_HAVE_FSTAT + * to your pyconfig.h. Python code beyond this should check HAVE_STAT and + * HAVE_FSTAT instead. + * Also + * #define HAVE_SYS_STAT_H + * if <sys/stat.h> exists on your platform, and + * #define HAVE_STAT_H + * if <stat.h> does. + */ +#ifndef DONT_HAVE_STAT +#define HAVE_STAT +#endif + +#ifndef DONT_HAVE_FSTAT +#define HAVE_FSTAT +#endif + +#ifdef RISCOS +#include <sys/types.h> +#include "unixstuff.h" +#endif + +#ifdef HAVE_SYS_STAT_H +#if defined(PYOS_OS2) && defined(PYCC_GCC) +#include <sys/types.h> +#endif +#include <sys/stat.h> +#elif defined(HAVE_STAT_H) +#include <stat.h> +#endif + +#if defined(PYCC_VACPP) +/* VisualAge C/C++ Failed to Define MountType Field in sys/stat.h */ +#define S_IFMT (S_IFDIR|S_IFCHR|S_IFREG) +#endif + +#ifndef S_ISREG +#define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) +#endif + +#ifndef S_ISDIR +#define S_ISDIR(x) (((x) & S_IFMT) == S_IFDIR) +#endif + + +#ifdef __cplusplus +/* Move this down here since some C++ #include's don't like to be included + inside an extern "C" */ +extern "C" { +#endif + + +/* Py_ARITHMETIC_RIGHT_SHIFT + * C doesn't define whether a right-shift of a signed integer sign-extends + * or zero-fills. Here a macro to force sign extension: + * Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) + * Return I >> J, forcing sign extension. Arithmetically, return the + * floor of I/2**J. + * Requirements: + * I should have signed integer type. In the terminology of C99, this can + * be either one of the five standard signed integer types (signed char, + * short, int, long, long long) or an extended signed integer type. + * J is an integer >= 0 and strictly less than the number of bits in the + * type of I (because C doesn't define what happens for J outside that + * range either). + * TYPE used to specify the type of I, but is now ignored. It's been left + * in for backwards compatibility with versions <= 2.6 or 3.0. + * Caution: + * I may be evaluated more than once. + */ +#ifdef SIGNED_RIGHT_SHIFT_ZERO_FILLS +#define Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) \ + ((I) < 0 ? -1-((-1-(I)) >> (J)) : (I) >> (J)) +#else +#define Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) ((I) >> (J)) +#endif + +/* Py_FORCE_EXPANSION(X) + * "Simply" returns its argument. However, macro expansions within the + * argument are evaluated. This unfortunate trickery is needed to get + * token-pasting to work as desired in some cases. + */ +#define Py_FORCE_EXPANSION(X) X + +/* Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) + * Cast VALUE to type NARROW from type WIDE. In Py_DEBUG mode, this + * assert-fails if any information is lost. + * Caution: + * VALUE may be evaluated more than once. + */ +#ifdef Py_DEBUG +#define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) \ + (assert((WIDE)(NARROW)(VALUE) == (VALUE)), (NARROW)(VALUE)) +#else +#define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) (NARROW)(VALUE) +#endif + +/* Py_SET_ERRNO_ON_MATH_ERROR(x) + * If a libm function did not set errno, but it looks like the result + * overflowed or not-a-number, set errno to ERANGE or EDOM. Set errno + * to 0 before calling a libm function, and invoke this macro after, + * passing the function result. + * Caution: + * This isn't reliable. See Py_OVERFLOWED comments. + * X is evaluated more than once. + */ +#if defined(__FreeBSD__) || defined(__OpenBSD__) || (defined(__hpux) && defined(__ia64)) +#define _Py_SET_EDOM_FOR_NAN(X) if (isnan(X)) errno = EDOM; +#else +#define _Py_SET_EDOM_FOR_NAN(X) ; +#endif +#define Py_SET_ERRNO_ON_MATH_ERROR(X) \ + do { \ + if (errno == 0) { \ + if ((X) == Py_HUGE_VAL || (X) == -Py_HUGE_VAL) \ + errno = ERANGE; \ + else _Py_SET_EDOM_FOR_NAN(X) \ + } \ + } while(0) + +/* Py_SET_ERANGE_ON_OVERFLOW(x) + * An alias of Py_SET_ERRNO_ON_MATH_ERROR for backward-compatibility. + */ +#define Py_SET_ERANGE_IF_OVERFLOW(X) Py_SET_ERRNO_ON_MATH_ERROR(X) + +/* Py_ADJUST_ERANGE1(x) + * Py_ADJUST_ERANGE2(x, y) + * Set errno to 0 before calling a libm function, and invoke one of these + * macros after, passing the function result(s) (Py_ADJUST_ERANGE2 is useful + * for functions returning complex results). This makes two kinds of + * adjustments to errno: (A) If it looks like the platform libm set + * errno=ERANGE due to underflow, clear errno. (B) If it looks like the + * platform libm overflowed but didn't set errno, force errno to ERANGE. In + * effect, we're trying to force a useful implementation of C89 errno + * behavior. + * Caution: + * This isn't reliable. See Py_OVERFLOWED comments. + * X and Y may be evaluated more than once. + */ +#define Py_ADJUST_ERANGE1(X) \ + do { \ + if (errno == 0) { \ + if ((X) == Py_HUGE_VAL || (X) == -Py_HUGE_VAL) \ + errno = ERANGE; \ + } \ + else if (errno == ERANGE && (X) == 0.0) \ + errno = 0; \ + } while(0) + +#define Py_ADJUST_ERANGE2(X, Y) \ + do { \ + if ((X) == Py_HUGE_VAL || (X) == -Py_HUGE_VAL || \ + (Y) == Py_HUGE_VAL || (Y) == -Py_HUGE_VAL) { \ + if (errno == 0) \ + errno = ERANGE; \ + } \ + else if (errno == ERANGE) \ + errno = 0; \ + } while(0) + +/* The functions _Py_dg_strtod and _Py_dg_dtoa in Python/dtoa.c (which are + * required to support the short float repr introduced in Python 3.1) require + * that the floating-point unit that's being used for arithmetic operations + * on C doubles is set to use 53-bit precision. It also requires that the + * FPU rounding mode is round-half-to-even, but that's less often an issue. + * + * If your FPU isn't already set to 53-bit precision/round-half-to-even, and + * you want to make use of _Py_dg_strtod and _Py_dg_dtoa, then you should + * + * #define HAVE_PY_SET_53BIT_PRECISION 1 + * + * and also give appropriate definitions for the following three macros: + * + * _PY_SET_53BIT_PRECISION_START : store original FPU settings, and + * set FPU to 53-bit precision/round-half-to-even + * _PY_SET_53BIT_PRECISION_END : restore original FPU settings + * _PY_SET_53BIT_PRECISION_HEADER : any variable declarations needed to + * use the two macros above. + * + * The macros are designed to be used within a single C function: see + * Python/pystrtod.c for an example of their use. + */ + +/* get and set x87 control word for gcc/x86 */ +#ifdef HAVE_GCC_ASM_FOR_X87 +#define HAVE_PY_SET_53BIT_PRECISION 1 +/* _Py_get/set_387controlword functions are defined in Python/pymath.c */ +#define _Py_SET_53BIT_PRECISION_HEADER \ + unsigned short old_387controlword, new_387controlword +#define _Py_SET_53BIT_PRECISION_START \ + do { \ + old_387controlword = _Py_get_387controlword(); \ + new_387controlword = (old_387controlword & ~0x0f00) | 0x0200; \ + if (new_387controlword != old_387controlword) \ + _Py_set_387controlword(new_387controlword); \ + } while (0) +#define _Py_SET_53BIT_PRECISION_END \ + if (new_387controlword != old_387controlword) \ + _Py_set_387controlword(old_387controlword) +#endif + +/* get and set x87 control word for VisualStudio/x86 */ +#if defined(_MSC_VER) && !defined(_WIN64) /* x87 not supported in 64-bit */ +#define HAVE_PY_SET_53BIT_PRECISION 1 +#define _Py_SET_53BIT_PRECISION_HEADER \ + unsigned int old_387controlword, new_387controlword, out_387controlword +/* We use the __control87_2 function to set only the x87 control word. + The SSE control word is unaffected. */ +#define _Py_SET_53BIT_PRECISION_START \ + do { \ + __control87_2(0, 0, &old_387controlword, NULL); \ + new_387controlword = \ + (old_387controlword & ~(_MCW_PC | _MCW_RC)) | (_PC_53 | _RC_NEAR); \ + if (new_387controlword != old_387controlword) \ + __control87_2(new_387controlword, _MCW_PC | _MCW_RC, \ + &out_387controlword, NULL); \ + } while (0) +#define _Py_SET_53BIT_PRECISION_END \ + do { \ + if (new_387controlword != old_387controlword) \ + __control87_2(old_387controlword, _MCW_PC | _MCW_RC, \ + &out_387controlword, NULL); \ + } while (0) +#endif + +/* default definitions are empty */ +#ifndef HAVE_PY_SET_53BIT_PRECISION +#define _Py_SET_53BIT_PRECISION_HEADER +#define _Py_SET_53BIT_PRECISION_START +#define _Py_SET_53BIT_PRECISION_END +#endif + +/* If we can't guarantee 53-bit precision, don't use the code + in Python/dtoa.c, but fall back to standard code. This + means that repr of a float will be long (17 sig digits). + + Realistically, there are two things that could go wrong: + + (1) doubles aren't IEEE 754 doubles, or + (2) we're on x86 with the rounding precision set to 64-bits + (extended precision), and we don't know how to change + the rounding precision. + */ + +#if !defined(DOUBLE_IS_LITTLE_ENDIAN_IEEE754) && \ + !defined(DOUBLE_IS_BIG_ENDIAN_IEEE754) && \ + !defined(DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754) +#define PY_NO_SHORT_FLOAT_REPR +#endif + +/* double rounding is symptomatic of use of extended precision on x86. If + we're seeing double rounding, and we don't have any mechanism available for + changing the FPU rounding precision, then don't use Python/dtoa.c. */ +#if defined(X87_DOUBLE_ROUNDING) && !defined(HAVE_PY_SET_53BIT_PRECISION) +#define PY_NO_SHORT_FLOAT_REPR +#endif + +/* Py_DEPRECATED(version) + * Declare a variable, type, or function deprecated. + * Usage: + * extern int old_var Py_DEPRECATED(2.3); + * typedef int T1 Py_DEPRECATED(2.4); + * extern int x() Py_DEPRECATED(2.5); + */ +#if defined(__GNUC__) && ((__GNUC__ >= 4) || \ + (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)) +#define Py_DEPRECATED(VERSION_UNUSED) __attribute__((__deprecated__)) +#else +#define Py_DEPRECATED(VERSION_UNUSED) +#endif + +/************************************************************************** +Prototypes that are missing from the standard include files on some systems +(and possibly only some versions of such systems.) + +Please be conservative with adding new ones, document them and enclose them +in platform-specific #ifdefs. +**************************************************************************/ + +#ifdef SOLARIS +/* Unchecked */ +extern int gethostname(char *, int); +#endif + +#ifdef __BEOS__ +/* Unchecked */ +/* It's in the libs, but not the headers... - [cjh] */ +int shutdown( int, int ); +#endif + +#ifdef HAVE__GETPTY +#include <sys/types.h> /* we need to import mode_t */ +extern char * _getpty(int *, int, mode_t, int); +#endif + +/* On QNX 6, struct termio must be declared by including sys/termio.h + if TCGETA, TCSETA, TCSETAW, or TCSETAF are used. sys/termio.h must + be included before termios.h or it will generate an error. */ +#if defined(HAVE_SYS_TERMIO_H) && !defined(__hpux) +#include <sys/termio.h> +#endif + +#if defined(HAVE_OPENPTY) || defined(HAVE_FORKPTY) +#if !defined(HAVE_PTY_H) && !defined(HAVE_LIBUTIL_H) && !defined(HAVE_UTIL_H) +/* BSDI does not supply a prototype for the 'openpty' and 'forkpty' + functions, even though they are included in libutil. */ +#include <termios.h> +extern int openpty(int *, int *, char *, struct termios *, struct winsize *); +extern pid_t forkpty(int *, char *, struct termios *, struct winsize *); +#endif /* !defined(HAVE_PTY_H) && !defined(HAVE_LIBUTIL_H) */ +#endif /* defined(HAVE_OPENPTY) || defined(HAVE_FORKPTY) */ + + +/* These are pulled from various places. It isn't obvious on what platforms + they are necessary, nor what the exact prototype should look like (which + is likely to vary between platforms!) If you find you need one of these + declarations, please move them to a platform-specific block and include + proper prototypes. */ +#if 0 + +/* From Modules/resource.c */ +extern int getrusage(); +extern int getpagesize(); + +/* From Python/sysmodule.c and Modules/posixmodule.c */ +extern int fclose(FILE *); + +/* From Modules/posixmodule.c */ +extern int fdatasync(int); +#endif /* 0 */ + + +/* On 4.4BSD-descendants, ctype functions serves the whole range of + * wchar_t character set rather than single byte code points only. + * This characteristic can break some operations of string object + * including str.upper() and str.split() on UTF-8 locales. This + * workaround was provided by Tim Robbins of FreeBSD project. + */ + +#ifdef __FreeBSD__ +#include <osreldate.h> +#if __FreeBSD_version > 500039 +# define _PY_PORT_CTYPE_UTF8_ISSUE +#endif +#endif + + +#if defined(__APPLE__) +# define _PY_PORT_CTYPE_UTF8_ISSUE +#endif + +#ifdef _PY_PORT_CTYPE_UTF8_ISSUE +#include <ctype.h> +#include <wctype.h> +#undef isalnum +#define isalnum(c) iswalnum(btowc(c)) +#undef isalpha +#define isalpha(c) iswalpha(btowc(c)) +#undef islower +#define islower(c) iswlower(btowc(c)) +#undef isspace +#define isspace(c) iswspace(btowc(c)) +#undef isupper +#define isupper(c) iswupper(btowc(c)) +#undef tolower +#define tolower(c) towlower(btowc(c)) +#undef toupper +#define toupper(c) towupper(btowc(c)) +#endif + + +/* Declarations for symbol visibility. + + PyAPI_FUNC(type): Declares a public Python API function and return type + PyAPI_DATA(type): Declares public Python data and its type + PyMODINIT_FUNC: A Python module init function. If these functions are + inside the Python core, they are private to the core. + If in an extension module, it may be declared with + external linkage depending on the platform. + + As a number of platforms support/require "__declspec(dllimport/dllexport)", + we support a HAVE_DECLSPEC_DLL macro to save duplication. +*/ + +/* + All windows ports, except cygwin, are handled in PC/pyconfig.h. + + BeOS and cygwin are the only other autoconf platform requiring special + linkage handling and both of these use __declspec(). +*/ +#if defined(__CYGWIN__) || defined(__BEOS__) +# define HAVE_DECLSPEC_DLL +#endif + +/* only get special linkage if built as shared or platform is Cygwin */ +#if defined(Py_ENABLE_SHARED) || defined(__CYGWIN__) +# if defined(HAVE_DECLSPEC_DLL) +# ifdef Py_BUILD_CORE +# define PyAPI_FUNC(RTYPE) __declspec(dllexport) RTYPE +# define PyAPI_DATA(RTYPE) extern __declspec(dllexport) RTYPE + /* module init functions inside the core need no external linkage */ + /* except for Cygwin to handle embedding (FIXME: BeOS too?) */ +# if defined(__CYGWIN__) +# define PyMODINIT_FUNC __declspec(dllexport) void +# else /* __CYGWIN__ */ +# define PyMODINIT_FUNC void +# endif /* __CYGWIN__ */ +# else /* Py_BUILD_CORE */ + /* Building an extension module, or an embedded situation */ + /* public Python functions and data are imported */ + /* Under Cygwin, auto-import functions to prevent compilation */ + /* failures similar to those described at the bottom of 4.1: */ + /* http://docs.python.org/extending/windows.html#a-cookbook-approach */ +# if !defined(__CYGWIN__) +# define PyAPI_FUNC(RTYPE) __declspec(dllimport) RTYPE +# endif /* !__CYGWIN__ */ +# define PyAPI_DATA(RTYPE) extern __declspec(dllimport) RTYPE + /* module init functions outside the core must be exported */ +# if defined(__cplusplus) +# define PyMODINIT_FUNC extern "C" __declspec(dllexport) void +# else /* __cplusplus */ +# define PyMODINIT_FUNC __declspec(dllexport) void +# endif /* __cplusplus */ +# endif /* Py_BUILD_CORE */ +# endif /* HAVE_DECLSPEC */ +#endif /* Py_ENABLE_SHARED */ + +/* If no external linkage macros defined by now, create defaults */ +#ifndef PyAPI_FUNC +# define PyAPI_FUNC(RTYPE) RTYPE +#endif +#ifndef PyAPI_DATA +# define PyAPI_DATA(RTYPE) extern RTYPE +#endif +#ifndef PyMODINIT_FUNC +# if defined(__cplusplus) +# define PyMODINIT_FUNC extern "C" void +# else /* __cplusplus */ +# define PyMODINIT_FUNC void +# endif /* __cplusplus */ +#endif + +/* Deprecated DL_IMPORT and DL_EXPORT macros */ +#if defined(Py_ENABLE_SHARED) && defined (HAVE_DECLSPEC_DLL) +# if defined(Py_BUILD_CORE) +# define DL_IMPORT(RTYPE) __declspec(dllexport) RTYPE +# define DL_EXPORT(RTYPE) __declspec(dllexport) RTYPE +# else +# define DL_IMPORT(RTYPE) __declspec(dllimport) RTYPE +# define DL_EXPORT(RTYPE) __declspec(dllexport) RTYPE +# endif +#endif +#ifndef DL_EXPORT +# define DL_EXPORT(RTYPE) RTYPE +#endif +#ifndef DL_IMPORT +# define DL_IMPORT(RTYPE) RTYPE +#endif +/* End of deprecated DL_* macros */ + +/* If the fd manipulation macros aren't defined, + here is a set that should do the job */ + +#if 0 /* disabled and probably obsolete */ + +#ifndef FD_SETSIZE +#define FD_SETSIZE 256 +#endif + +#ifndef FD_SET + +typedef long fd_mask; + +#define NFDBITS (sizeof(fd_mask) * NBBY) /* bits per mask */ +#ifndef howmany +#define howmany(x, y) (((x)+((y)-1))/(y)) +#endif /* howmany */ + +typedef struct fd_set { + fd_mask fds_bits[howmany(FD_SETSIZE, NFDBITS)]; +} fd_set; + +#define FD_SET(n, p) ((p)->fds_bits[(n)/NFDBITS] |= (1 << ((n) % NFDBITS))) +#define FD_CLR(n, p) ((p)->fds_bits[(n)/NFDBITS] &= ~(1 << ((n) % NFDBITS))) +#define FD_ISSET(n, p) ((p)->fds_bits[(n)/NFDBITS] & (1 << ((n) % NFDBITS))) +#define FD_ZERO(p) memset((char *)(p), '\0', sizeof(*(p))) + +#endif /* FD_SET */ + +#endif /* fd manipulation macros */ + + +/* limits.h constants that may be missing */ + +#ifndef INT_MAX +#define INT_MAX 2147483647 +#endif + +#ifndef LONG_MAX +#if SIZEOF_LONG == 4 +#define LONG_MAX 0X7FFFFFFFL +#elif SIZEOF_LONG == 8 +#define LONG_MAX 0X7FFFFFFFFFFFFFFFL +#else +#error "could not set LONG_MAX in pyport.h" +#endif +#endif + +#ifndef LONG_MIN +#define LONG_MIN (-LONG_MAX-1) +#endif + +#ifndef LONG_BIT +#define LONG_BIT (8 * SIZEOF_LONG) +#endif + +#if LONG_BIT != 8 * SIZEOF_LONG +/* 04-Oct-2000 LONG_BIT is apparently (mis)defined as 64 on some recent + * 32-bit platforms using gcc. We try to catch that here at compile-time + * rather than waiting for integer multiplication to trigger bogus + * overflows. + */ +#error "LONG_BIT definition appears wrong for platform (bad gcc/glibc config?)." +#endif + +#ifdef __cplusplus +} +#endif + +/* + * Hide GCC attributes from compilers that don't support them. + */ +#if (!defined(__GNUC__) || __GNUC__ < 2 || \ + (__GNUC__ == 2 && __GNUC_MINOR__ < 7) ) && \ + !defined(RISCOS) +#define Py_GCC_ATTRIBUTE(x) +#else +#define Py_GCC_ATTRIBUTE(x) __attribute__(x) +#endif + +/* + * Add PyArg_ParseTuple format where available. + */ +#ifdef HAVE_ATTRIBUTE_FORMAT_PARSETUPLE +#define Py_FORMAT_PARSETUPLE(func,p1,p2) __attribute__((format(func,p1,p2))) +#else +#define Py_FORMAT_PARSETUPLE(func,p1,p2) +#endif + +/* + * Specify alignment on compilers that support it. + */ +#if defined(__GNUC__) && __GNUC__ >= 3 +#define Py_ALIGNED(x) __attribute__((aligned(x))) +#else +#define Py_ALIGNED(x) +#endif + +/* Eliminate end-of-loop code not reached warnings from SunPro C + * when using do{...}while(0) macros + */ +#ifdef __SUNPRO_C +#pragma error_messages (off,E_END_OF_LOOP_CODE_NOT_REACHED) +#endif + +/* + * Older Microsoft compilers don't support the C99 long long literal suffixes, + * so these will be defined in PC/pyconfig.h for those compilers. + */ +#ifndef Py_LL +#define Py_LL(x) x##LL +#endif + +#ifndef Py_ULL +#define Py_ULL(x) Py_LL(x##U) +#endif + +#endif /* Py_PYPORT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pystate.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,200 @@ + +/* Thread and interpreter state structures and their interfaces */ + + +#ifndef Py_PYSTATE_H +#define Py_PYSTATE_H +#ifdef __cplusplus +extern "C" { +#endif + +/* State shared between threads */ + +struct _ts; /* Forward */ +struct _is; /* Forward */ + +typedef struct _is { + + struct _is *next; + struct _ts *tstate_head; + + PyObject *modules; + PyObject *sysdict; + PyObject *builtins; + PyObject *modules_reloading; + + PyObject *codec_search_path; + PyObject *codec_search_cache; + PyObject *codec_error_registry; + +#ifdef HAVE_DLOPEN + int dlopenflags; +#endif +#ifdef WITH_TSC + int tscdump; +#endif + +} PyInterpreterState; + + +/* State unique per thread */ + +struct _frame; /* Avoid including frameobject.h */ + +/* Py_tracefunc return -1 when raising an exception, or 0 for success. */ +typedef int (*Py_tracefunc)(PyObject *, struct _frame *, int, PyObject *); + +/* The following values are used for 'what' for tracefunc functions: */ +#define PyTrace_CALL 0 +#define PyTrace_EXCEPTION 1 +#define PyTrace_LINE 2 +#define PyTrace_RETURN 3 +#define PyTrace_C_CALL 4 +#define PyTrace_C_EXCEPTION 5 +#define PyTrace_C_RETURN 6 + +typedef struct _ts { + /* See Python/ceval.c for comments explaining most fields */ + + struct _ts *next; + PyInterpreterState *interp; + + struct _frame *frame; + int recursion_depth; + /* 'tracing' keeps track of the execution depth when tracing/profiling. + This is to prevent the actual trace/profile code from being recorded in + the trace/profile. */ + int tracing; + int use_tracing; + + Py_tracefunc c_profilefunc; + Py_tracefunc c_tracefunc; + PyObject *c_profileobj; + PyObject *c_traceobj; + + PyObject *curexc_type; + PyObject *curexc_value; + PyObject *curexc_traceback; + + PyObject *exc_type; + PyObject *exc_value; + PyObject *exc_traceback; + + PyObject *dict; /* Stores per-thread state */ + + /* tick_counter is incremented whenever the check_interval ticker + * reaches zero. The purpose is to give a useful measure of the number + * of interpreted bytecode instructions in a given thread. This + * extremely lightweight statistic collector may be of interest to + * profilers (like psyco.jit()), although nothing in the core uses it. + */ + int tick_counter; + + int gilstate_counter; + + PyObject *async_exc; /* Asynchronous exception to raise */ + long thread_id; /* Thread id where this tstate was created */ + + int trash_delete_nesting; + PyObject *trash_delete_later; + + /* XXX signal handlers should also be here */ + +} PyThreadState; + + +PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void); +PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *); +PyAPI_FUNC(void) PyInterpreterState_Delete(PyInterpreterState *); + +PyAPI_FUNC(PyThreadState *) PyThreadState_New(PyInterpreterState *); +PyAPI_FUNC(PyThreadState *) _PyThreadState_Prealloc(PyInterpreterState *); +PyAPI_FUNC(void) _PyThreadState_Init(PyThreadState *); +PyAPI_FUNC(void) PyThreadState_Clear(PyThreadState *); +PyAPI_FUNC(void) PyThreadState_Delete(PyThreadState *); +#ifdef WITH_THREAD +PyAPI_FUNC(void) PyThreadState_DeleteCurrent(void); +#endif + +PyAPI_FUNC(PyThreadState *) PyThreadState_Get(void); +PyAPI_FUNC(PyThreadState *) PyThreadState_Swap(PyThreadState *); +PyAPI_FUNC(PyObject *) PyThreadState_GetDict(void); +PyAPI_FUNC(int) PyThreadState_SetAsyncExc(long, PyObject *); + + +/* Variable and macro for in-line access to current thread state */ + +PyAPI_DATA(PyThreadState *) _PyThreadState_Current; + +#ifdef Py_DEBUG +#define PyThreadState_GET() PyThreadState_Get() +#else +#define PyThreadState_GET() (_PyThreadState_Current) +#endif + +typedef + enum {PyGILState_LOCKED, PyGILState_UNLOCKED} + PyGILState_STATE; + +/* Ensure that the current thread is ready to call the Python + C API, regardless of the current state of Python, or of its + thread lock. This may be called as many times as desired + by a thread so long as each call is matched with a call to + PyGILState_Release(). In general, other thread-state APIs may + be used between _Ensure() and _Release() calls, so long as the + thread-state is restored to its previous state before the Release(). + For example, normal use of the Py_BEGIN_ALLOW_THREADS/ + Py_END_ALLOW_THREADS macros are acceptable. + + The return value is an opaque "handle" to the thread state when + PyGILState_Ensure() was called, and must be passed to + PyGILState_Release() to ensure Python is left in the same state. Even + though recursive calls are allowed, these handles can *not* be shared - + each unique call to PyGILState_Ensure must save the handle for its + call to PyGILState_Release. + + When the function returns, the current thread will hold the GIL. + + Failure is a fatal error. +*/ +PyAPI_FUNC(PyGILState_STATE) PyGILState_Ensure(void); + +/* Release any resources previously acquired. After this call, Python's + state will be the same as it was prior to the corresponding + PyGILState_Ensure() call (but generally this state will be unknown to + the caller, hence the use of the GILState API.) + + Every call to PyGILState_Ensure must be matched by a call to + PyGILState_Release on the same thread. +*/ +PyAPI_FUNC(void) PyGILState_Release(PyGILState_STATE); + +/* Helper/diagnostic function - get the current thread state for + this thread. May return NULL if no GILState API has been used + on the current thread. Note that the main thread always has such a + thread-state, even if no auto-thread-state call has been made + on the main thread. +*/ +PyAPI_FUNC(PyThreadState *) PyGILState_GetThisThreadState(void); + +/* The implementation of sys._current_frames() Returns a dict mapping + thread id to that thread's current frame. +*/ +PyAPI_FUNC(PyObject *) _PyThread_CurrentFrames(void); + +/* Routines for advanced debuggers, requested by David Beazley. + Don't use unless you know what you are doing! */ +PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Head(void); +PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Next(PyInterpreterState *); +PyAPI_FUNC(PyThreadState *) PyInterpreterState_ThreadHead(PyInterpreterState *); +PyAPI_FUNC(PyThreadState *) PyThreadState_Next(PyThreadState *); + +typedef struct _frame *(*PyThreadFrameGetter)(PyThreadState *self_); + +/* hook for PyEval_GetFrame(), requested for Psyco */ +PyAPI_DATA(PyThreadFrameGetter) _PyThreadState_GetFrame; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYSTATE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pystrcmp.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,23 @@ +#ifndef Py_STRCMP_H +#define Py_STRCMP_H + +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(int) PyOS_mystrnicmp(const char *, const char *, Py_ssize_t); +PyAPI_FUNC(int) PyOS_mystricmp(const char *, const char *); + +#if defined(MS_WINDOWS) || defined(PYOS_OS2) +#define PyOS_strnicmp strnicmp +#define PyOS_stricmp stricmp +#else +#define PyOS_strnicmp PyOS_mystrnicmp +#define PyOS_stricmp PyOS_mystricmp +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_STRCMP_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pystrtod.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,45 @@ +#ifndef Py_STRTOD_H +#define Py_STRTOD_H + +#ifdef __cplusplus +extern "C" { +#endif + + +PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr); +PyAPI_FUNC(double) PyOS_ascii_atof(const char *str); + +/* Deprecated in 2.7 and 3.1. Will disappear in 2.8 (if it exists) and 3.2 */ +PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, + const char *format, double d); +PyAPI_FUNC(double) PyOS_string_to_double(const char *str, + char **endptr, + PyObject *overflow_exception); + +/* The caller is responsible for calling PyMem_Free to free the buffer + that's is returned. */ +PyAPI_FUNC(char *) PyOS_double_to_string(double val, + char format_code, + int precision, + int flags, + int *type); + +PyAPI_FUNC(double) _Py_parse_inf_or_nan(const char *p, char **endptr); + + +/* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */ +#define Py_DTSF_SIGN 0x01 /* always add the sign */ +#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */ +#define Py_DTSF_ALT 0x04 /* "alternate" formatting. it's format_code + specific */ + +/* PyOS_double_to_string's "type", if non-NULL, will be set to one of: */ +#define Py_DTST_FINITE 0 +#define Py_DTST_INFINITE 1 +#define Py_DTST_NAN 2 + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_STRTOD_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pythonrun.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,182 @@ + +/* Interfaces to parse and execute pieces of python code */ + +#ifndef Py_PYTHONRUN_H +#define Py_PYTHONRUN_H +#ifdef __cplusplus +extern "C" { +#endif + +#define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | \ + CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | \ + CO_FUTURE_UNICODE_LITERALS) +#define PyCF_MASK_OBSOLETE (CO_NESTED) +#define PyCF_SOURCE_IS_UTF8 0x0100 +#define PyCF_DONT_IMPLY_DEDENT 0x0200 +#define PyCF_ONLY_AST 0x0400 + +typedef struct { + int cf_flags; /* bitmask of CO_xxx flags relevant to future */ +} PyCompilerFlags; + +PyAPI_FUNC(void) Py_SetProgramName(char *); +PyAPI_FUNC(char *) Py_GetProgramName(void); + +PyAPI_FUNC(void) Py_SetPythonHome(char *); +PyAPI_FUNC(char *) Py_GetPythonHome(void); + +PyAPI_FUNC(void) Py_Initialize(void); +PyAPI_FUNC(void) Py_InitializeEx(int); +PyAPI_FUNC(void) Py_Finalize(void); +PyAPI_FUNC(int) Py_IsInitialized(void); +PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void); +PyAPI_FUNC(void) Py_EndInterpreter(PyThreadState *); + +PyAPI_FUNC(int) PyRun_AnyFileFlags(FILE *, const char *, PyCompilerFlags *); +PyAPI_FUNC(int) PyRun_AnyFileExFlags(FILE *, const char *, int, PyCompilerFlags *); +PyAPI_FUNC(int) PyRun_SimpleStringFlags(const char *, PyCompilerFlags *); +PyAPI_FUNC(int) PyRun_SimpleFileExFlags(FILE *, const char *, int, PyCompilerFlags *); +PyAPI_FUNC(int) PyRun_InteractiveOneFlags(FILE *, const char *, PyCompilerFlags *); +PyAPI_FUNC(int) PyRun_InteractiveLoopFlags(FILE *, const char *, PyCompilerFlags *); + +PyAPI_FUNC(struct _mod *) PyParser_ASTFromString(const char *, const char *, + int, PyCompilerFlags *flags, + PyArena *); +PyAPI_FUNC(struct _mod *) PyParser_ASTFromFile(FILE *, const char *, int, + char *, char *, + PyCompilerFlags *, int *, + PyArena *); +#define PyParser_SimpleParseString(S, B) \ + PyParser_SimpleParseStringFlags(S, B, 0) +#define PyParser_SimpleParseFile(FP, S, B) \ + PyParser_SimpleParseFileFlags(FP, S, B, 0) +PyAPI_FUNC(struct _node *) PyParser_SimpleParseStringFlags(const char *, int, + int); +PyAPI_FUNC(struct _node *) PyParser_SimpleParseFileFlags(FILE *, const char *, + int, int); + +PyAPI_FUNC(PyObject *) PyRun_StringFlags(const char *, int, PyObject *, + PyObject *, PyCompilerFlags *); + +PyAPI_FUNC(PyObject *) PyRun_FileExFlags(FILE *, const char *, int, + PyObject *, PyObject *, int, + PyCompilerFlags *); + +#define Py_CompileString(str, p, s) Py_CompileStringFlags(str, p, s, NULL) +PyAPI_FUNC(PyObject *) Py_CompileStringFlags(const char *, const char *, int, + PyCompilerFlags *); +PyAPI_FUNC(struct symtable *) Py_SymtableString(const char *, const char *, int); + +PyAPI_FUNC(void) PyErr_Print(void); +PyAPI_FUNC(void) PyErr_PrintEx(int); +PyAPI_FUNC(void) PyErr_Display(PyObject *, PyObject *, PyObject *); + +PyAPI_FUNC(int) Py_AtExit(void (*func)(void)); + +PyAPI_FUNC(void) Py_Exit(int); + +PyAPI_FUNC(int) Py_FdIsInteractive(FILE *, const char *); + +/* Bootstrap */ +PyAPI_FUNC(int) Py_Main(int argc, char **argv); + +/* Use macros for a bunch of old variants */ +#define PyRun_String(str, s, g, l) PyRun_StringFlags(str, s, g, l, NULL) +#define PyRun_AnyFile(fp, name) PyRun_AnyFileExFlags(fp, name, 0, NULL) +#define PyRun_AnyFileEx(fp, name, closeit) \ + PyRun_AnyFileExFlags(fp, name, closeit, NULL) +#define PyRun_AnyFileFlags(fp, name, flags) \ + PyRun_AnyFileExFlags(fp, name, 0, flags) +#define PyRun_SimpleString(s) PyRun_SimpleStringFlags(s, NULL) +#define PyRun_SimpleFile(f, p) PyRun_SimpleFileExFlags(f, p, 0, NULL) +#define PyRun_SimpleFileEx(f, p, c) PyRun_SimpleFileExFlags(f, p, c, NULL) +#define PyRun_InteractiveOne(f, p) PyRun_InteractiveOneFlags(f, p, NULL) +#define PyRun_InteractiveLoop(f, p) PyRun_InteractiveLoopFlags(f, p, NULL) +#define PyRun_File(fp, p, s, g, l) \ + PyRun_FileExFlags(fp, p, s, g, l, 0, NULL) +#define PyRun_FileEx(fp, p, s, g, l, c) \ + PyRun_FileExFlags(fp, p, s, g, l, c, NULL) +#define PyRun_FileFlags(fp, p, s, g, l, flags) \ + PyRun_FileExFlags(fp, p, s, g, l, 0, flags) + +/* In getpath.c */ +PyAPI_FUNC(char *) Py_GetProgramFullPath(void); +PyAPI_FUNC(char *) Py_GetPrefix(void); +PyAPI_FUNC(char *) Py_GetExecPrefix(void); +PyAPI_FUNC(char *) Py_GetPath(void); + +/* In their own files */ +PyAPI_FUNC(const char *) Py_GetVersion(void); +PyAPI_FUNC(const char *) Py_GetPlatform(void); +PyAPI_FUNC(const char *) Py_GetCopyright(void); +PyAPI_FUNC(const char *) Py_GetCompiler(void); +PyAPI_FUNC(const char *) Py_GetBuildInfo(void); +PyAPI_FUNC(const char *) _Py_svnversion(void); +PyAPI_FUNC(const char *) Py_SubversionRevision(void); +PyAPI_FUNC(const char *) Py_SubversionShortBranch(void); +PyAPI_FUNC(const char *) _Py_hgidentifier(void); +PyAPI_FUNC(const char *) _Py_hgversion(void); + +/* Internal -- various one-time initializations */ +PyAPI_FUNC(PyObject *) _PyBuiltin_Init(void); +PyAPI_FUNC(PyObject *) _PySys_Init(void); +PyAPI_FUNC(void) _PyImport_Init(void); +PyAPI_FUNC(void) _PyExc_Init(void); +PyAPI_FUNC(void) _PyImportHooks_Init(void); +PyAPI_FUNC(int) _PyFrame_Init(void); +PyAPI_FUNC(int) _PyInt_Init(void); +PyAPI_FUNC(int) _PyLong_Init(void); +PyAPI_FUNC(void) _PyFloat_Init(void); +PyAPI_FUNC(int) PyByteArray_Init(void); +PyAPI_FUNC(void) _PyRandom_Init(void); + +/* Various internal finalizers */ +PyAPI_FUNC(void) _PyExc_Fini(void); +PyAPI_FUNC(void) _PyImport_Fini(void); +PyAPI_FUNC(void) PyMethod_Fini(void); +PyAPI_FUNC(void) PyFrame_Fini(void); +PyAPI_FUNC(void) PyCFunction_Fini(void); +PyAPI_FUNC(void) PyDict_Fini(void); +PyAPI_FUNC(void) PyTuple_Fini(void); +PyAPI_FUNC(void) PyList_Fini(void); +PyAPI_FUNC(void) PySet_Fini(void); +PyAPI_FUNC(void) PyString_Fini(void); +PyAPI_FUNC(void) PyInt_Fini(void); +PyAPI_FUNC(void) PyFloat_Fini(void); +PyAPI_FUNC(void) PyOS_FiniInterrupts(void); +PyAPI_FUNC(void) PyByteArray_Fini(void); +PyAPI_FUNC(void) _PyRandom_Fini(void); + +/* Stuff with no proper home (yet) */ +PyAPI_FUNC(char *) PyOS_Readline(FILE *, FILE *, char *); +PyAPI_DATA(int) (*PyOS_InputHook)(void); +PyAPI_DATA(char) *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, char *); +PyAPI_DATA(PyThreadState*) _PyOS_ReadlineTState; + +/* Stack size, in "pointers" (so we get extra safety margins + on 64-bit platforms). On a 32-bit platform, this translates + to an 8k margin. */ +#define PYOS_STACK_MARGIN 2048 + +#if defined(WIN32) && !defined(MS_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1300 +/* Enable stack checking under Microsoft C */ +#define USE_STACKCHECK +#endif + +#ifdef USE_STACKCHECK +/* Check that we aren't overflowing our stack */ +PyAPI_FUNC(int) PyOS_CheckStack(void); +#endif + +/* Signals */ +typedef void (*PyOS_sighandler_t)(int); +PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int); +PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t); + +/* Random */ +PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYTHONRUN_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/pythread.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,41 @@ + +#ifndef Py_PYTHREAD_H +#define Py_PYTHREAD_H + +typedef void *PyThread_type_lock; +typedef void *PyThread_type_sema; + +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(void) PyThread_init_thread(void); +PyAPI_FUNC(long) PyThread_start_new_thread(void (*)(void *), void *); +PyAPI_FUNC(void) PyThread_exit_thread(void); +PyAPI_FUNC(long) PyThread_get_thread_ident(void); + +PyAPI_FUNC(PyThread_type_lock) PyThread_allocate_lock(void); +PyAPI_FUNC(void) PyThread_free_lock(PyThread_type_lock); +PyAPI_FUNC(int) PyThread_acquire_lock(PyThread_type_lock, int); +#define WAIT_LOCK 1 +#define NOWAIT_LOCK 0 +PyAPI_FUNC(void) PyThread_release_lock(PyThread_type_lock); + +PyAPI_FUNC(size_t) PyThread_get_stacksize(void); +PyAPI_FUNC(int) PyThread_set_stacksize(size_t); + +/* Thread Local Storage (TLS) API */ +PyAPI_FUNC(int) PyThread_create_key(void); +PyAPI_FUNC(void) PyThread_delete_key(int); +PyAPI_FUNC(int) PyThread_set_key_value(int, void *); +PyAPI_FUNC(void *) PyThread_get_key_value(int); +PyAPI_FUNC(void) PyThread_delete_key_value(int key); + +/* Cleanup after a fork */ +PyAPI_FUNC(void) PyThread_ReInitTLS(void); + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_PYTHREAD_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/rangeobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,28 @@ + +/* Range object interface */ + +#ifndef Py_RANGEOBJECT_H +#define Py_RANGEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* This is about the type 'xrange', not the built-in function range(), which + returns regular lists. */ + +/* +A range object represents an integer range. This is an immutable object; +a range cannot change its value after creation. + +Range objects behave like the corresponding tuple objects except that +they are represented by a start, stop, and step datamembers. +*/ + +PyAPI_DATA(PyTypeObject) PyRange_Type; + +#define PyRange_Check(op) (Py_TYPE(op) == &PyRange_Type) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_RANGEOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/setobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,99 @@ +/* Set object interface */ + +#ifndef Py_SETOBJECT_H +#define Py_SETOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* +There are three kinds of slots in the table: + +1. Unused: key == NULL +2. Active: key != NULL and key != dummy +3. Dummy: key == dummy + +Note: .pop() abuses the hash field of an Unused or Dummy slot to +hold a search finger. The hash field of Unused or Dummy slots has +no meaning otherwise. +*/ + +#define PySet_MINSIZE 8 + +typedef struct { + long hash; /* cached hash code for the entry key */ + PyObject *key; +} setentry; + + +/* +This data structure is shared by set and frozenset objects. +*/ + +typedef struct _setobject PySetObject; +struct _setobject { + PyObject_HEAD + + Py_ssize_t fill; /* # Active + # Dummy */ + Py_ssize_t used; /* # Active */ + + /* The table contains mask + 1 slots, and that's a power of 2. + * We store the mask instead of the size because the mask is more + * frequently needed. + */ + Py_ssize_t mask; + + /* table points to smalltable for small tables, else to + * additional malloc'ed memory. table is never NULL! This rule + * saves repeated runtime null-tests. + */ + setentry *table; + setentry *(*lookup)(PySetObject *so, PyObject *key, long hash); + setentry smalltable[PySet_MINSIZE]; + + long hash; /* only used by frozenset objects */ + PyObject *weakreflist; /* List of weak references */ +}; + +PyAPI_DATA(PyTypeObject) PySet_Type; +PyAPI_DATA(PyTypeObject) PyFrozenSet_Type; + +/* Invariants for frozensets: + * data is immutable. + * hash is the hash of the frozenset or -1 if not computed yet. + * Invariants for sets: + * hash is -1 + */ + +#define PyFrozenSet_CheckExact(ob) (Py_TYPE(ob) == &PyFrozenSet_Type) +#define PyAnySet_CheckExact(ob) \ + (Py_TYPE(ob) == &PySet_Type || Py_TYPE(ob) == &PyFrozenSet_Type) +#define PyAnySet_Check(ob) \ + (Py_TYPE(ob) == &PySet_Type || Py_TYPE(ob) == &PyFrozenSet_Type || \ + PyType_IsSubtype(Py_TYPE(ob), &PySet_Type) || \ + PyType_IsSubtype(Py_TYPE(ob), &PyFrozenSet_Type)) +#define PySet_Check(ob) \ + (Py_TYPE(ob) == &PySet_Type || \ + PyType_IsSubtype(Py_TYPE(ob), &PySet_Type)) +#define PyFrozenSet_Check(ob) \ + (Py_TYPE(ob) == &PyFrozenSet_Type || \ + PyType_IsSubtype(Py_TYPE(ob), &PyFrozenSet_Type)) + +PyAPI_FUNC(PyObject *) PySet_New(PyObject *); +PyAPI_FUNC(PyObject *) PyFrozenSet_New(PyObject *); +PyAPI_FUNC(Py_ssize_t) PySet_Size(PyObject *anyset); +#define PySet_GET_SIZE(so) (((PySetObject *)(so))->used) +PyAPI_FUNC(int) PySet_Clear(PyObject *set); +PyAPI_FUNC(int) PySet_Contains(PyObject *anyset, PyObject *key); +PyAPI_FUNC(int) PySet_Discard(PyObject *set, PyObject *key); +PyAPI_FUNC(int) PySet_Add(PyObject *set, PyObject *key); +PyAPI_FUNC(int) _PySet_Next(PyObject *set, Py_ssize_t *pos, PyObject **key); +PyAPI_FUNC(int) _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, long *hash); +PyAPI_FUNC(PyObject *) PySet_Pop(PyObject *set); +PyAPI_FUNC(int) _PySet_Update(PyObject *set, PyObject *iterable); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_SETOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/sliceobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,44 @@ +#ifndef Py_SLICEOBJECT_H +#define Py_SLICEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* The unique ellipsis object "..." */ + +PyAPI_DATA(PyObject) _Py_EllipsisObject; /* Don't use this directly */ + +#define Py_Ellipsis (&_Py_EllipsisObject) + +/* Slice object interface */ + +/* + +A slice object containing start, stop, and step data members (the +names are from range). After much talk with Guido, it was decided to +let these be any arbitrary python type. Py_None stands for omitted values. +*/ + +typedef struct { + PyObject_HEAD + PyObject *start, *stop, *step; /* not NULL */ +} PySliceObject; + +PyAPI_DATA(PyTypeObject) PySlice_Type; +PyAPI_DATA(PyTypeObject) PyEllipsis_Type; + +#define PySlice_Check(op) (Py_TYPE(op) == &PySlice_Type) + +PyAPI_FUNC(PyObject *) PySlice_New(PyObject* start, PyObject* stop, + PyObject* step); +PyAPI_FUNC(PyObject *) _PySlice_FromIndices(Py_ssize_t start, Py_ssize_t stop); +PyAPI_FUNC(int) PySlice_GetIndices(PySliceObject *r, Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step); +PyAPI_FUNC(int) PySlice_GetIndicesEx(PySliceObject *r, Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, + Py_ssize_t *step, Py_ssize_t *slicelength); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_SLICEOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/stringobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,210 @@ + +/* String (str/bytes) object interface */ + +#ifndef Py_STRINGOBJECT_H +#define Py_STRINGOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdarg.h> + +/* +Type PyStringObject represents a character string. An extra zero byte is +reserved at the end to ensure it is zero-terminated, but a size is +present so strings with null bytes in them can be represented. This +is an immutable object type. + +There are functions to create new string objects, to test +an object for string-ness, and to get the +string value. The latter function returns a null pointer +if the object is not of the proper type. +There is a variant that takes an explicit size as well as a +variant that assumes a zero-terminated string. Note that none of the +functions should be applied to nil objects. +*/ + +/* Caching the hash (ob_shash) saves recalculation of a string's hash value. + Interning strings (ob_sstate) tries to ensure that only one string + object with a given value exists, so equality tests can be one pointer + comparison. This is generally restricted to strings that "look like" + Python identifiers, although the intern() builtin can be used to force + interning of any string. + Together, these sped the interpreter by up to 20%. */ + +typedef struct { + PyObject_VAR_HEAD + long ob_shash; + int ob_sstate; + char ob_sval[1]; + + /* Invariants: + * ob_sval contains space for 'ob_size+1' elements. + * ob_sval[ob_size] == 0. + * ob_shash is the hash of the string or -1 if not computed yet. + * ob_sstate != 0 iff the string object is in stringobject.c's + * 'interned' dictionary; in this case the two references + * from 'interned' to this object are *not counted* in ob_refcnt. + */ +} PyStringObject; + +#define SSTATE_NOT_INTERNED 0 +#define SSTATE_INTERNED_MORTAL 1 +#define SSTATE_INTERNED_IMMORTAL 2 + +PyAPI_DATA(PyTypeObject) PyBaseString_Type; +PyAPI_DATA(PyTypeObject) PyString_Type; + +#define PyString_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_STRING_SUBCLASS) +#define PyString_CheckExact(op) (Py_TYPE(op) == &PyString_Type) + +PyAPI_FUNC(PyObject *) PyString_FromStringAndSize(const char *, Py_ssize_t); +PyAPI_FUNC(PyObject *) PyString_FromString(const char *); +PyAPI_FUNC(PyObject *) PyString_FromFormatV(const char*, va_list) + Py_GCC_ATTRIBUTE((format(printf, 1, 0))); +PyAPI_FUNC(PyObject *) PyString_FromFormat(const char*, ...) + Py_GCC_ATTRIBUTE((format(printf, 1, 2))); +PyAPI_FUNC(Py_ssize_t) PyString_Size(PyObject *); +PyAPI_FUNC(char *) PyString_AsString(PyObject *); +PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int); +PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *); +PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *); +PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t); +PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*); +PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int, + int, char**, int*); +PyAPI_FUNC(PyObject *) PyString_DecodeEscape(const char *, Py_ssize_t, + const char *, Py_ssize_t, + const char *); + +PyAPI_FUNC(void) PyString_InternInPlace(PyObject **); +PyAPI_FUNC(void) PyString_InternImmortal(PyObject **); +PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *); +PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void); + +/* Use only if you know it's a string */ +#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate) + +/* Macro, trading safety for speed */ +#define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval) +#define PyString_GET_SIZE(op) Py_SIZE(op) + +/* _PyString_Join(sep, x) is like sep.join(x). sep must be PyStringObject*, + x must be an iterable object. */ +PyAPI_FUNC(PyObject *) _PyString_Join(PyObject *sep, PyObject *x); + +/* --- Generic Codecs ----------------------------------------------------- */ + +/* Create an object by decoding the encoded string s of the + given size. */ + +PyAPI_FUNC(PyObject*) PyString_Decode( + const char *s, /* encoded string */ + Py_ssize_t size, /* size of buffer */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Encodes a char buffer of the given size and returns a + Python object. */ + +PyAPI_FUNC(PyObject*) PyString_Encode( + const char *s, /* string char buffer */ + Py_ssize_t size, /* number of chars to encode */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Encodes a string object and returns the result as Python + object. */ + +PyAPI_FUNC(PyObject*) PyString_AsEncodedObject( + PyObject *str, /* string object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Encodes a string object and returns the result as Python string + object. + + If the codec returns a Unicode object, the object is converted + back to a string using the default encoding. + + DEPRECATED - use PyString_AsEncodedObject() instead. */ + +PyAPI_FUNC(PyObject*) PyString_AsEncodedString( + PyObject *str, /* string object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Decodes a string object and returns the result as Python + object. */ + +PyAPI_FUNC(PyObject*) PyString_AsDecodedObject( + PyObject *str, /* string object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Decodes a string object and returns the result as Python string + object. + + If the codec returns a Unicode object, the object is converted + back to a string using the default encoding. + + DEPRECATED - use PyString_AsDecodedObject() instead. */ + +PyAPI_FUNC(PyObject*) PyString_AsDecodedString( + PyObject *str, /* string object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Provides access to the internal data buffer and size of a string + object or the default encoded version of a Unicode object. Passing + NULL as *len parameter will force the string buffer to be + 0-terminated (passing a string with embedded NULL characters will + cause an exception). */ + +PyAPI_FUNC(int) PyString_AsStringAndSize( + register PyObject *obj, /* string or Unicode object */ + register char **s, /* pointer to buffer variable */ + register Py_ssize_t *len /* pointer to length variable or NULL + (only possible for 0-terminated + strings) */ + ); + + +/* Using the current locale, insert the thousands grouping + into the string pointed to by buffer. For the argument descriptions, + see Objects/stringlib/localeutil.h */ +PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGroupingLocale(char *buffer, + Py_ssize_t n_buffer, + char *digits, + Py_ssize_t n_digits, + Py_ssize_t min_width); + +/* Using explicit passed-in values, insert the thousands grouping + into the string pointed to by buffer. For the argument descriptions, + see Objects/stringlib/localeutil.h */ +PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGrouping(char *buffer, + Py_ssize_t n_buffer, + char *digits, + Py_ssize_t n_digits, + Py_ssize_t min_width, + const char *grouping, + const char *thousands_sep); + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyBytes_FormatAdvanced(PyObject *obj, + char *format_spec, + Py_ssize_t format_spec_len); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_STRINGOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/structmember.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,99 @@ +#ifndef Py_STRUCTMEMBER_H +#define Py_STRUCTMEMBER_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Interface to map C struct members to Python object attributes */ + +#include <stddef.h> /* For offsetof */ + +/* The offsetof() macro calculates the offset of a structure member + in its structure. Unfortunately this cannot be written down + portably, hence it is provided by a Standard C header file. + For pre-Standard C compilers, here is a version that usually works + (but watch out!): */ + +#ifndef offsetof +#define offsetof(type, member) ( (int) & ((type*)0) -> member ) +#endif + +/* An array of memberlist structures defines the name, type and offset + of selected members of a C structure. These can be read by + PyMember_Get() and set by PyMember_Set() (except if their READONLY flag + is set). The array must be terminated with an entry whose name + pointer is NULL. */ + +struct memberlist { + /* Obsolete version, for binary backwards compatibility */ + char *name; + int type; + int offset; + int flags; +}; + +typedef struct PyMemberDef { + /* Current version, use this */ + char *name; + int type; + Py_ssize_t offset; + int flags; + char *doc; +} PyMemberDef; + +/* Types */ +#define T_SHORT 0 +#define T_INT 1 +#define T_LONG 2 +#define T_FLOAT 3 +#define T_DOUBLE 4 +#define T_STRING 5 +#define T_OBJECT 6 +/* XXX the ordering here is weird for binary compatibility */ +#define T_CHAR 7 /* 1-character string */ +#define T_BYTE 8 /* 8-bit signed int */ +/* unsigned variants: */ +#define T_UBYTE 9 +#define T_USHORT 10 +#define T_UINT 11 +#define T_ULONG 12 + +/* Added by Jack: strings contained in the structure */ +#define T_STRING_INPLACE 13 + +/* Added by Lillo: bools contained in the structure (assumed char) */ +#define T_BOOL 14 + +#define T_OBJECT_EX 16 /* Like T_OBJECT, but raises AttributeError + when the value is NULL, instead of + converting to None. */ +#ifdef HAVE_LONG_LONG +#define T_LONGLONG 17 +#define T_ULONGLONG 18 +#endif /* HAVE_LONG_LONG */ + +#define T_PYSSIZET 19 /* Py_ssize_t */ + + +/* Flags */ +#define READONLY 1 +#define RO READONLY /* Shorthand */ +#define READ_RESTRICTED 2 +#define PY_WRITE_RESTRICTED 4 +#define RESTRICTED (READ_RESTRICTED | PY_WRITE_RESTRICTED) + + +/* Obsolete API, for binary backwards compatibility */ +PyAPI_FUNC(PyObject *) PyMember_Get(const char *, struct memberlist *, const char *); +PyAPI_FUNC(int) PyMember_Set(char *, struct memberlist *, const char *, PyObject *); + +/* Current API, use this */ +PyAPI_FUNC(PyObject *) PyMember_GetOne(const char *, struct PyMemberDef *); +PyAPI_FUNC(int) PyMember_SetOne(char *, struct PyMemberDef *, PyObject *); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_STRUCTMEMBER_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/structseq.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,41 @@ + +/* Tuple object interface */ + +#ifndef Py_STRUCTSEQ_H +#define Py_STRUCTSEQ_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct PyStructSequence_Field { + char *name; + char *doc; +} PyStructSequence_Field; + +typedef struct PyStructSequence_Desc { + char *name; + char *doc; + struct PyStructSequence_Field *fields; + int n_in_sequence; +} PyStructSequence_Desc; + +extern char* PyStructSequence_UnnamedField; + +PyAPI_FUNC(void) PyStructSequence_InitType(PyTypeObject *type, + PyStructSequence_Desc *desc); + +PyAPI_FUNC(PyObject *) PyStructSequence_New(PyTypeObject* type); + +typedef struct { + PyObject_VAR_HEAD + PyObject *ob_item[1]; +} PyStructSequence; + +/* Macro, *only* to be used to fill in brand new objects */ +#define PyStructSequence_SET_ITEM(op, i, v) \ + (((PyStructSequence *)(op))->ob_item[i] = v) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_STRUCTSEQ_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/symtable.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,98 @@ +#ifndef Py_SYMTABLE_H +#define Py_SYMTABLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum _block_type { FunctionBlock, ClassBlock, ModuleBlock } + _Py_block_ty; + +struct _symtable_entry; + +struct symtable { + const char *st_filename; /* name of file being compiled */ + struct _symtable_entry *st_cur; /* current symbol table entry */ + struct _symtable_entry *st_top; /* module entry */ + PyObject *st_symbols; /* dictionary of symbol table entries */ + PyObject *st_stack; /* stack of namespace info */ + PyObject *st_global; /* borrowed ref to MODULE in st_symbols */ + int st_nblocks; /* number of blocks */ + PyObject *st_private; /* name of current class or NULL */ + PyFutureFeatures *st_future; /* module's future features */ +}; + +typedef struct _symtable_entry { + PyObject_HEAD + PyObject *ste_id; /* int: key in st_symbols */ + PyObject *ste_symbols; /* dict: name to flags */ + PyObject *ste_name; /* string: name of block */ + PyObject *ste_varnames; /* list of variable names */ + PyObject *ste_children; /* list of child ids */ + _Py_block_ty ste_type; /* module, class, or function */ + int ste_unoptimized; /* false if namespace is optimized */ + int ste_nested; /* true if block is nested */ + unsigned ste_free : 1; /* true if block has free variables */ + unsigned ste_child_free : 1; /* true if a child block has free vars, + including free refs to globals */ + unsigned ste_generator : 1; /* true if namespace is a generator */ + unsigned ste_varargs : 1; /* true if block has varargs */ + unsigned ste_varkeywords : 1; /* true if block has varkeywords */ + unsigned ste_returns_value : 1; /* true if namespace uses return with + an argument */ + int ste_lineno; /* first line of block */ + int ste_opt_lineno; /* lineno of last exec or import * */ + int ste_tmpname; /* counter for listcomp temp vars */ + struct symtable *ste_table; +} PySTEntryObject; + +PyAPI_DATA(PyTypeObject) PySTEntry_Type; + +#define PySTEntry_Check(op) (Py_TYPE(op) == &PySTEntry_Type) + +PyAPI_FUNC(int) PyST_GetScope(PySTEntryObject *, PyObject *); + +PyAPI_FUNC(struct symtable *) PySymtable_Build(mod_ty, const char *, + PyFutureFeatures *); +PyAPI_FUNC(PySTEntryObject *) PySymtable_Lookup(struct symtable *, void *); + +PyAPI_FUNC(void) PySymtable_Free(struct symtable *); + +/* Flags for def-use information */ + +#define DEF_GLOBAL 1 /* global stmt */ +#define DEF_LOCAL 2 /* assignment in code block */ +#define DEF_PARAM 2<<1 /* formal parameter */ +#define USE 2<<2 /* name is used */ +#define DEF_FREE 2<<3 /* name used but not defined in nested block */ +#define DEF_FREE_CLASS 2<<4 /* free variable from class's method */ +#define DEF_IMPORT 2<<5 /* assignment occurred via import */ + +#define DEF_BOUND (DEF_LOCAL | DEF_PARAM | DEF_IMPORT) + +/* GLOBAL_EXPLICIT and GLOBAL_IMPLICIT are used internally by the symbol + table. GLOBAL is returned from PyST_GetScope() for either of them. + It is stored in ste_symbols at bits 12-14. +*/ +#define SCOPE_OFF 11 +#define SCOPE_MASK 7 + +#define LOCAL 1 +#define GLOBAL_EXPLICIT 2 +#define GLOBAL_IMPLICIT 3 +#define FREE 4 +#define CELL 5 + +/* The following three names are used for the ste_unoptimized bit field */ +#define OPT_IMPORT_STAR 1 +#define OPT_EXEC 2 +#define OPT_BARE_EXEC 4 +#define OPT_TOPLEVEL 8 /* top-level names, including eval and exec */ + +#define GENERATOR 1 +#define GENERATOR_EXPRESSION 2 + +#ifdef __cplusplus +} +#endif +#endif /* !Py_SYMTABLE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/sysmodule.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,31 @@ + +/* System module interface */ + +#ifndef Py_SYSMODULE_H +#define Py_SYSMODULE_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(PyObject *) PySys_GetObject(char *); +PyAPI_FUNC(int) PySys_SetObject(char *, PyObject *); +PyAPI_FUNC(FILE *) PySys_GetFile(char *, FILE *); +PyAPI_FUNC(void) PySys_SetArgv(int, char **); +PyAPI_FUNC(void) PySys_SetArgvEx(int, char **, int); +PyAPI_FUNC(void) PySys_SetPath(char *); + +PyAPI_FUNC(void) PySys_WriteStdout(const char *format, ...) + Py_GCC_ATTRIBUTE((format(printf, 1, 2))); +PyAPI_FUNC(void) PySys_WriteStderr(const char *format, ...) + Py_GCC_ATTRIBUTE((format(printf, 1, 2))); + +PyAPI_FUNC(void) PySys_ResetWarnOptions(void); +PyAPI_FUNC(void) PySys_AddWarnOption(char *); +PyAPI_FUNC(int) PySys_HasWarnOptions(void); + +PyAPI_FUNC(size_t) _PySys_GetSizeOf(PyObject *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_SYSMODULE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/timefuncs.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,26 @@ +/* timefuncs.h + */ + +/* Utility function related to timemodule.c. */ + +#ifndef TIMEFUNCS_H +#define TIMEFUNCS_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Cast double x to time_t, but raise ValueError if x is too large + * to fit in a time_t. ValueError is set on return iff the return + * value is (time_t)-1 and PyErr_Occurred(). + */ +PyAPI_FUNC(time_t) _PyTime_DoubleToTimet(double x); + +/* Get the current time since the epoch in seconds */ +PyAPI_FUNC(double) _PyTime_FloatTime(void); + + +#ifdef __cplusplus +} +#endif +#endif /* TIMEFUNCS_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/token.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,85 @@ + +/* Token types */ + +#ifndef Py_TOKEN_H +#define Py_TOKEN_H +#ifdef __cplusplus +extern "C" { +#endif + +#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ + +#define ENDMARKER 0 +#define NAME 1 +#define NUMBER 2 +#define STRING 3 +#define NEWLINE 4 +#define INDENT 5 +#define DEDENT 6 +#define LPAR 7 +#define RPAR 8 +#define LSQB 9 +#define RSQB 10 +#define COLON 11 +#define COMMA 12 +#define SEMI 13 +#define PLUS 14 +#define MINUS 15 +#define STAR 16 +#define SLASH 17 +#define VBAR 18 +#define AMPER 19 +#define LESS 20 +#define GREATER 21 +#define EQUAL 22 +#define DOT 23 +#define PERCENT 24 +#define BACKQUOTE 25 +#define LBRACE 26 +#define RBRACE 27 +#define EQEQUAL 28 +#define NOTEQUAL 29 +#define LESSEQUAL 30 +#define GREATEREQUAL 31 +#define TILDE 32 +#define CIRCUMFLEX 33 +#define LEFTSHIFT 34 +#define RIGHTSHIFT 35 +#define DOUBLESTAR 36 +#define PLUSEQUAL 37 +#define MINEQUAL 38 +#define STAREQUAL 39 +#define SLASHEQUAL 40 +#define PERCENTEQUAL 41 +#define AMPEREQUAL 42 +#define VBAREQUAL 43 +#define CIRCUMFLEXEQUAL 44 +#define LEFTSHIFTEQUAL 45 +#define RIGHTSHIFTEQUAL 46 +#define DOUBLESTAREQUAL 47 +#define DOUBLESLASH 48 +#define DOUBLESLASHEQUAL 49 +#define AT 50 +/* Don't forget to update the table _PyParser_TokenNames in tokenizer.c! */ +#define OP 51 +#define ERRORTOKEN 52 +#define N_TOKENS 53 + +/* Special definitions for cooperation with parser */ + +#define NT_OFFSET 256 + +#define ISTERMINAL(x) ((x) < NT_OFFSET) +#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) +#define ISEOF(x) ((x) == ENDMARKER) + + +PyAPI_DATA(char *) _PyParser_TokenNames[]; /* Token names */ +PyAPI_FUNC(int) PyToken_OneChar(int); +PyAPI_FUNC(int) PyToken_TwoChars(int, int); +PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TOKEN_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/traceback.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,31 @@ + +#ifndef Py_TRACEBACK_H +#define Py_TRACEBACK_H +#ifdef __cplusplus +extern "C" { +#endif + +struct _frame; + +/* Traceback interface */ + +typedef struct _traceback { + PyObject_HEAD + struct _traceback *tb_next; + struct _frame *tb_frame; + int tb_lasti; + int tb_lineno; +} PyTracebackObject; + +PyAPI_FUNC(int) PyTraceBack_Here(struct _frame *); +PyAPI_FUNC(int) PyTraceBack_Print(PyObject *, PyObject *); +PyAPI_FUNC(int) _Py_DisplaySourceLine(PyObject *, const char *, int, int); + +/* Reveal traceback type so we can typecheck traceback objects */ +PyAPI_DATA(PyTypeObject) PyTraceBack_Type; +#define PyTraceBack_Check(v) (Py_TYPE(v) == &PyTraceBack_Type) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TRACEBACK_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/tupleobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,61 @@ + +/* Tuple object interface */ + +#ifndef Py_TUPLEOBJECT_H +#define Py_TUPLEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* +Another generally useful object type is a tuple of object pointers. +For Python, this is an immutable type. C code can change the tuple items +(but not their number), and even use tuples are general-purpose arrays of +object references, but in general only brand new tuples should be mutated, +not ones that might already have been exposed to Python code. + +*** WARNING *** PyTuple_SetItem does not increment the new item's reference +count, but does decrement the reference count of the item it replaces, +if not nil. It does *decrement* the reference count if it is *not* +inserted in the tuple. Similarly, PyTuple_GetItem does not increment the +returned item's reference count. +*/ + +typedef struct { + PyObject_VAR_HEAD + PyObject *ob_item[1]; + + /* ob_item contains space for 'ob_size' elements. + * Items must normally not be NULL, except during construction when + * the tuple is not yet visible outside the function that builds it. + */ +} PyTupleObject; + +PyAPI_DATA(PyTypeObject) PyTuple_Type; + +#define PyTuple_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_TUPLE_SUBCLASS) +#define PyTuple_CheckExact(op) (Py_TYPE(op) == &PyTuple_Type) + +PyAPI_FUNC(PyObject *) PyTuple_New(Py_ssize_t size); +PyAPI_FUNC(Py_ssize_t) PyTuple_Size(PyObject *); +PyAPI_FUNC(PyObject *) PyTuple_GetItem(PyObject *, Py_ssize_t); +PyAPI_FUNC(int) PyTuple_SetItem(PyObject *, Py_ssize_t, PyObject *); +PyAPI_FUNC(PyObject *) PyTuple_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t); +PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t); +PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...); +PyAPI_FUNC(void) _PyTuple_MaybeUntrack(PyObject *); + +/* Macro, trading safety for speed */ +#define PyTuple_GET_ITEM(op, i) (((PyTupleObject *)(op))->ob_item[i]) +#define PyTuple_GET_SIZE(op) Py_SIZE(op) + +/* Macro, *only* to be used to fill in brand new tuples */ +#define PyTuple_SET_ITEM(op, i, v) (((PyTupleObject *)(op))->ob_item[i] = v) + +PyAPI_FUNC(int) PyTuple_ClearFreeList(void); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TUPLEOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/ucnhash.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,33 @@ +/* Unicode name database interface */ + +#ifndef Py_UCNHASH_H +#define Py_UCNHASH_H +#ifdef __cplusplus +extern "C" { +#endif + +/* revised ucnhash CAPI interface (exported through a "wrapper") */ + +#define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI" + +typedef struct { + + /* Size of this struct */ + int size; + + /* Get name for a given character code. Returns non-zero if + success, zero if not. Does not set Python exceptions. + If self is NULL, data come from the default version of the database. + If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */ + int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen); + + /* Get character code for a given name. Same error handling + as for getname. */ + int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code); + +} _PyUnicode_Name_CAPI; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_UCNHASH_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/unicodeobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,1413 @@ +#ifndef Py_UNICODEOBJECT_H +#define Py_UNICODEOBJECT_H + +#include <stdarg.h> + +/* + +Unicode implementation based on original code by Fredrik Lundh, +modified by Marc-Andre Lemburg (mal@lemburg.com) according to the +Unicode Integration Proposal (see file Misc/unicode.txt). + +Copyright (c) Corporation for National Research Initiatives. + + + Original header: + -------------------------------------------------------------------- + + * Yet another Unicode string type for Python. This type supports the + * 16-bit Basic Multilingual Plane (BMP) only. + * + * Written by Fredrik Lundh, January 1999. + * + * Copyright (c) 1999 by Secret Labs AB. + * Copyright (c) 1999 by Fredrik Lundh. + * + * fredrik@pythonware.com + * http://www.pythonware.com + * + * -------------------------------------------------------------------- + * This Unicode String Type is + * + * Copyright (c) 1999 by Secret Labs AB + * Copyright (c) 1999 by Fredrik Lundh + * + * By obtaining, using, and/or copying this software and/or its + * associated documentation, you agree that you have read, understood, + * and will comply with the following terms and conditions: + * + * Permission to use, copy, modify, and distribute this software and its + * associated documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appears in all + * copies, and that both that copyright notice and this permission notice + * appear in supporting documentation, and that the name of Secret Labs + * AB or the author not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior + * permission. + * + * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO + * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * -------------------------------------------------------------------- */ + +#include <ctype.h> + +/* === Internal API ======================================================= */ + +/* --- Internal Unicode Format -------------------------------------------- */ + +#ifndef Py_USING_UNICODE + +#define PyUnicode_Check(op) 0 +#define PyUnicode_CheckExact(op) 0 + +#else + +/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is + properly set, but the default rules below doesn't set it. I'll + sort this out some other day -- fredrik@pythonware.com */ + +#ifndef Py_UNICODE_SIZE +#error Must define Py_UNICODE_SIZE +#endif + +/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode + strings are stored as UCS-2 (with limited support for UTF-16) */ + +#if Py_UNICODE_SIZE >= 4 +#define Py_UNICODE_WIDE +#endif + +/* Set these flags if the platform has "wchar.h", "wctype.h" and the + wchar_t type is a 16-bit unsigned type */ +/* #define HAVE_WCHAR_H */ +/* #define HAVE_USABLE_WCHAR_T */ + +/* Defaults for various platforms */ +#ifndef PY_UNICODE_TYPE + +/* Windows has a usable wchar_t type (unless we're using UCS-4) */ +# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2 +# define HAVE_USABLE_WCHAR_T +# define PY_UNICODE_TYPE wchar_t +# endif + +# if defined(Py_UNICODE_WIDE) +# define PY_UNICODE_TYPE Py_UCS4 +# endif + +#endif + +/* If the compiler provides a wchar_t type we try to support it + through the interface functions PyUnicode_FromWideChar() and + PyUnicode_AsWideChar(). */ + +#ifdef HAVE_USABLE_WCHAR_T +# ifndef HAVE_WCHAR_H +# define HAVE_WCHAR_H +# endif +#endif + +#ifdef HAVE_WCHAR_H +/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ +# ifdef _HAVE_BSDI +# include <time.h> +# endif +# include <wchar.h> +#endif + +/* + * Use this typedef when you need to represent a UTF-16 surrogate pair + * as single unsigned integer. + */ +#if SIZEOF_INT >= 4 +typedef unsigned int Py_UCS4; +#elif SIZEOF_LONG >= 4 +typedef unsigned long Py_UCS4; +#endif + +/* Py_UNICODE is the native Unicode storage format (code unit) used by + Python and represents a single Unicode element in the Unicode + type. */ + +typedef PY_UNICODE_TYPE Py_UNICODE; + +/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */ + +/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds + produce different external names and thus cause import errors in + case Python interpreters and extensions with mixed compiled in + Unicode width assumptions are combined. */ + +#ifndef Py_UNICODE_WIDE + +# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString +# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString +# define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject +# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString +# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String +# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString +# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String +# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String +# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String +# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode +# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString +# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar +# define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist +# define PyUnicode_Compare PyUnicodeUCS2_Compare +# define PyUnicode_Concat PyUnicodeUCS2_Concat +# define PyUnicode_Contains PyUnicodeUCS2_Contains +# define PyUnicode_Count PyUnicodeUCS2_Count +# define PyUnicode_Decode PyUnicodeUCS2_Decode +# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII +# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap +# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1 +# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape +# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32 +# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful +# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16 +# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful +# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8 +# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful +# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape +# define PyUnicode_Encode PyUnicodeUCS2_Encode +# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII +# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap +# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal +# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1 +# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape +# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32 +# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16 +# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8 +# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape +# define PyUnicode_Find PyUnicodeUCS2_Find +# define PyUnicode_Format PyUnicodeUCS2_Format +# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject +# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat +# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV +# define PyUnicode_FromObject PyUnicodeUCS2_FromObject +# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal +# define PyUnicode_FromString PyUnicodeUCS2_FromString +# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize +# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode +# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar +# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding +# define PyUnicode_GetMax PyUnicodeUCS2_GetMax +# define PyUnicode_GetSize PyUnicodeUCS2_GetSize +# define PyUnicode_Join PyUnicodeUCS2_Join +# define PyUnicode_Partition PyUnicodeUCS2_Partition +# define PyUnicode_RPartition PyUnicodeUCS2_RPartition +# define PyUnicode_RSplit PyUnicodeUCS2_RSplit +# define PyUnicode_Replace PyUnicodeUCS2_Replace +# define PyUnicode_Resize PyUnicodeUCS2_Resize +# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare +# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding +# define PyUnicode_Split PyUnicodeUCS2_Split +# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines +# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch +# define PyUnicode_Translate PyUnicodeUCS2_Translate +# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap +# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString +# define _PyUnicode_Fini _PyUnicodeUCS2_Fini +# define _PyUnicode_Init _PyUnicodeUCS2_Init +# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha +# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit +# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit +# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak +# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase +# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric +# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase +# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase +# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace +# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit +# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit +# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase +# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric +# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase +# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase + +#else + +# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString +# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString +# define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject +# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString +# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String +# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString +# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String +# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String +# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String +# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode +# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString +# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar +# define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist +# define PyUnicode_Compare PyUnicodeUCS4_Compare +# define PyUnicode_Concat PyUnicodeUCS4_Concat +# define PyUnicode_Contains PyUnicodeUCS4_Contains +# define PyUnicode_Count PyUnicodeUCS4_Count +# define PyUnicode_Decode PyUnicodeUCS4_Decode +# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII +# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap +# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1 +# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape +# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32 +# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful +# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16 +# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful +# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8 +# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful +# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape +# define PyUnicode_Encode PyUnicodeUCS4_Encode +# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII +# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap +# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal +# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1 +# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape +# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32 +# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16 +# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8 +# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape +# define PyUnicode_Find PyUnicodeUCS4_Find +# define PyUnicode_Format PyUnicodeUCS4_Format +# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject +# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat +# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV +# define PyUnicode_FromObject PyUnicodeUCS4_FromObject +# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal +# define PyUnicode_FromString PyUnicodeUCS4_FromString +# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize +# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode +# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar +# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding +# define PyUnicode_GetMax PyUnicodeUCS4_GetMax +# define PyUnicode_GetSize PyUnicodeUCS4_GetSize +# define PyUnicode_Join PyUnicodeUCS4_Join +# define PyUnicode_Partition PyUnicodeUCS4_Partition +# define PyUnicode_RPartition PyUnicodeUCS4_RPartition +# define PyUnicode_RSplit PyUnicodeUCS4_RSplit +# define PyUnicode_Replace PyUnicodeUCS4_Replace +# define PyUnicode_Resize PyUnicodeUCS4_Resize +# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare +# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding +# define PyUnicode_Split PyUnicodeUCS4_Split +# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines +# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch +# define PyUnicode_Translate PyUnicodeUCS4_Translate +# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap +# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString +# define _PyUnicode_Fini _PyUnicodeUCS4_Fini +# define _PyUnicode_Init _PyUnicodeUCS4_Init +# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha +# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit +# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit +# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak +# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase +# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric +# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase +# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase +# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace +# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit +# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit +# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase +# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric +# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase +# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase + + +#endif + +/* --- Internal Unicode Operations ---------------------------------------- */ + +/* If you want Python to use the compiler's wctype.h functions instead + of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or + configure Python using --with-wctype-functions. This reduces the + interpreter's code size. */ + +#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS) + +#include <wctype.h> + +#define Py_UNICODE_ISSPACE(ch) iswspace(ch) + +#define Py_UNICODE_ISLOWER(ch) iswlower(ch) +#define Py_UNICODE_ISUPPER(ch) iswupper(ch) +#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) +#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) + +#define Py_UNICODE_TOLOWER(ch) towlower(ch) +#define Py_UNICODE_TOUPPER(ch) towupper(ch) +#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) + +#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) +#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) +#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) + +#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) +#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) +#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) + +#define Py_UNICODE_ISALPHA(ch) iswalpha(ch) + +#else + +/* Since splitting on whitespace is an important use case, and + whitespace in most situations is solely ASCII whitespace, we + optimize for the common case by using a quick look-up table + _Py_ascii_whitespace (see below) with an inlined check. + + */ +#define Py_UNICODE_ISSPACE(ch) \ + ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) + +#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) +#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) +#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) +#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) + +#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) +#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) +#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) + +#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) +#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) +#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) + +#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) +#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) +#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) + +#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) + +#endif + +#define Py_UNICODE_ISALNUM(ch) \ + (Py_UNICODE_ISALPHA(ch) || \ + Py_UNICODE_ISDECIMAL(ch) || \ + Py_UNICODE_ISDIGIT(ch) || \ + Py_UNICODE_ISNUMERIC(ch)) + +#define Py_UNICODE_COPY(target, source, length) \ + Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE)) + +#define Py_UNICODE_FILL(target, value, length) \ + do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ + for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ + } while (0) + +/* Check if substring matches at given offset. the offset must be + valid, and the substring must not be empty */ + +#define Py_UNICODE_MATCH(string, offset, substring) \ + ((*((string)->str + (offset)) == *((substring)->str)) && \ + ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \ + !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE))) + +#ifdef __cplusplus +extern "C" { +#endif + +/* --- Unicode Type ------------------------------------------------------- */ + +typedef struct { + PyObject_HEAD + Py_ssize_t length; /* Length of raw Unicode data in buffer */ + Py_UNICODE *str; /* Raw Unicode buffer */ + long hash; /* Hash value; -1 if not set */ + PyObject *defenc; /* (Default) Encoded version as Python + string, or NULL; this is used for + implementing the buffer protocol */ +} PyUnicodeObject; + +PyAPI_DATA(PyTypeObject) PyUnicode_Type; + +#define PyUnicode_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) +#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) + +/* Fast access macros */ +#define PyUnicode_GET_SIZE(op) \ + (((PyUnicodeObject *)(op))->length) +#define PyUnicode_GET_DATA_SIZE(op) \ + (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)) +#define PyUnicode_AS_UNICODE(op) \ + (((PyUnicodeObject *)(op))->str) +#define PyUnicode_AS_DATA(op) \ + ((const char *)((PyUnicodeObject *)(op))->str) + +/* --- Constants ---------------------------------------------------------- */ + +/* This Unicode character will be used as replacement character during + decoding if the errors argument is set to "replace". Note: the + Unicode character U+FFFD is the official REPLACEMENT CHARACTER in + Unicode 3.0. */ + +#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD) + +/* === Public API ========================================================= */ + +/* --- Plain Py_UNICODE --------------------------------------------------- */ + +/* Create a Unicode Object from the Py_UNICODE buffer u of the given + size. + + u may be NULL which causes the contents to be undefined. It is the + user's responsibility to fill in the needed data afterwards. Note + that modifying the Unicode object contents after construction is + only allowed if u was set to NULL. + + The buffer is copied into the new object. */ + +PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( + const Py_UNICODE *u, /* Unicode buffer */ + Py_ssize_t size /* size of buffer */ + ); + +/* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ +PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( + const char *u, /* char buffer */ + Py_ssize_t size /* size of buffer */ + ); + +/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated + Latin-1 encoded bytes */ +PyAPI_FUNC(PyObject*) PyUnicode_FromString( + const char *u /* string */ + ); + +/* Return a read-only pointer to the Unicode object's internal + Py_UNICODE buffer. */ + +PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( + PyObject *unicode /* Unicode object */ + ); + +/* Get the length of the Unicode object. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( + PyObject *unicode /* Unicode object */ + ); + +/* Get the maximum ordinal for a Unicode character. */ +PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void); + +/* Resize an already allocated Unicode object to the new size length. + + *unicode is modified to point to the new (resized) object and 0 + returned on success. + + This API may only be called by the function which also called the + Unicode constructor. The refcount on the object must be 1. Otherwise, + an error is returned. + + Error handling is implemented as follows: an exception is set, -1 + is returned and *unicode left untouched. + +*/ + +PyAPI_FUNC(int) PyUnicode_Resize( + PyObject **unicode, /* Pointer to the Unicode object */ + Py_ssize_t length /* New length */ + ); + +/* Coerce obj to a Unicode object and return a reference with + *incremented* refcount. + + Coercion is done in the following way: + + 1. String and other char buffer compatible objects are decoded + under the assumptions that they contain data using the current + default encoding. Decoding is done in "strict" mode. + + 2. All other objects (including Unicode objects) raise an + exception. + + The API returns NULL in case of an error. The caller is responsible + for decref'ing the returned objects. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( + register PyObject *obj, /* Object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Coerce obj to a Unicode object and return a reference with + *incremented* refcount. + + Unicode objects are passed back as-is (subclasses are converted to + true Unicode objects), all other objects are delegated to + PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in + using the default encoding as basis for decoding the object. + + The API returns NULL in case of an error. The caller is responsible + for decref'ing the returned objects. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_FromObject( + register PyObject *obj /* Object */ + ); + +PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list); +PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...); + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, + Py_UNICODE *format_spec, + Py_ssize_t format_spec_len); + +/* --- wchar_t support for platforms which support it --------------------- */ + +#ifdef HAVE_WCHAR_H + +/* Create a Unicode Object from the whcar_t buffer w of the given + size. + + The buffer is copied into the new object. */ + +PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( + register const wchar_t *w, /* wchar_t buffer */ + Py_ssize_t size /* size of buffer */ + ); + +/* Copies the Unicode Object contents into the wchar_t buffer w. At + most size wchar_t characters are copied. + + Note that the resulting wchar_t string may or may not be + 0-terminated. It is the responsibility of the caller to make sure + that the wchar_t string is 0-terminated in case this is required by + the application. + + Returns the number of wchar_t characters copied (excluding a + possibly trailing 0-termination character) or -1 in case of an + error. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( + PyUnicodeObject *unicode, /* Unicode object */ + register wchar_t *w, /* wchar_t buffer */ + Py_ssize_t size /* size of buffer */ + ); + +#endif + +/* --- Unicode ordinals --------------------------------------------------- */ + +/* Create a Unicode Object from the given Unicode code point ordinal. + + The ordinal must be in range(0x10000) on narrow Python builds + (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is + raised in case it is not. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); + +/* --- Free-list management ----------------------------------------------- */ + +/* Clear the free list used by the Unicode implementation. + + This can be used to release memory used for objects on the free + list back to the Python memory allocator. + +*/ + +PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); + +/* === Builtin Codecs ===================================================== + + Many of these APIs take two arguments encoding and errors. These + parameters encoding and errors have the same semantics as the ones + of the builtin unicode() API. + + Setting encoding to NULL causes the default encoding to be used. + + Error handling is set by errors which may also be set to NULL + meaning to use the default handling defined for the codec. Default + error handling for all builtin codecs is "strict" (ValueErrors are + raised). + + The codecs all use a similar interface. Only deviation from the + generic ones are documented. + +*/ + +/* --- Manage the default encoding ---------------------------------------- */ + +/* Return a Python string holding the default encoded value of the + Unicode object. + + The resulting string is cached in the Unicode object for subsequent + usage by this function. The cached version is needed to implement + the character buffer interface and will live (at least) as long as + the Unicode object itself. + + The refcount of the string is *not* incremented. + + *** Exported for internal use by the interpreter only !!! *** + +*/ + +PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString( + PyObject *, const char *); + +/* Returns the currently active default encoding. + + The default encoding is currently implemented as run-time settable + process global. This may change in future versions of the + interpreter to become a parameter which is managed on a per-thread + basis. + + */ + +PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); + +/* Sets the currently active default encoding. + + Returns 0 on success, -1 in case of an error. + + */ + +PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding( + const char *encoding /* Encoding name in standard form */ + ); + +/* --- Generic Codecs ----------------------------------------------------- */ + +/* Create a Unicode object by decoding the encoded string s of the + given size. */ + +PyAPI_FUNC(PyObject*) PyUnicode_Decode( + const char *s, /* encoded string */ + Py_ssize_t size, /* size of buffer */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Encodes a Py_UNICODE buffer of the given size and returns a + Python string object. */ + +PyAPI_FUNC(PyObject*) PyUnicode_Encode( + const Py_UNICODE *s, /* Unicode char buffer */ + Py_ssize_t size, /* number of Py_UNICODE chars to encode */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Encodes a Unicode object and returns the result as Python + object. */ + +PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( + PyObject *unicode, /* Unicode object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Encodes a Unicode object and returns the result as Python string + object. */ + +PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( + PyObject *unicode, /* Unicode object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( + PyObject* string /* 256 character map */ + ); + + +/* --- UTF-7 Codecs ------------------------------------------------------- */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( + const char *string, /* UTF-7 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( + const char *string, /* UTF-7 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed /* bytes consumed */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* number of Py_UNICODE chars to encode */ + int base64SetO, /* Encode RFC2152 Set O characters in base64 */ + int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ + const char *errors /* error handling */ + ); + +/* --- UTF-8 Codecs ------------------------------------------------------- */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( + const char *string, /* UTF-8 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( + const char *string, /* UTF-8 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed /* bytes consumed */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( + PyObject *unicode /* Unicode object */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* number of Py_UNICODE chars to encode */ + const char *errors /* error handling */ + ); + +/* --- UTF-32 Codecs ------------------------------------------------------ */ + +/* Decodes length bytes from a UTF-32 encoded buffer string and returns + the corresponding Unicode object. + + errors (if non-NULL) defines the error handling. It defaults + to "strict". + + If byteorder is non-NULL, the decoder starts decoding using the + given byte order: + + *byteorder == -1: little endian + *byteorder == 0: native order + *byteorder == 1: big endian + + In native mode, the first four bytes of the stream are checked for a + BOM mark. If found, the BOM mark is analysed, the byte order + adjusted and the BOM skipped. In the other modes, no BOM mark + interpretation is done. After completion, *byteorder is set to the + current byte order at the end of input data. + + If byteorder is NULL, the codec starts in native order mode. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( + const char *string, /* UTF-32 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + int *byteorder /* pointer to byteorder to use + 0=native;-1=LE,1=BE; updated on + exit */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( + const char *string, /* UTF-32 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + int *byteorder, /* pointer to byteorder to use + 0=native;-1=LE,1=BE; updated on + exit */ + Py_ssize_t *consumed /* bytes consumed */ + ); + +/* Returns a Python string using the UTF-32 encoding in native byte + order. The string always starts with a BOM mark. */ + +PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( + PyObject *unicode /* Unicode object */ + ); + +/* Returns a Python string object holding the UTF-32 encoded value of + the Unicode data. + + If byteorder is not 0, output is written according to the following + byte order: + + byteorder == -1: little endian + byteorder == 0: native byte order (writes a BOM mark) + byteorder == 1: big endian + + If byteorder is 0, the output string will always start with the + Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is + prepended. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* number of Py_UNICODE chars to encode */ + const char *errors, /* error handling */ + int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ + ); + +/* --- UTF-16 Codecs ------------------------------------------------------ */ + +/* Decodes length bytes from a UTF-16 encoded buffer string and returns + the corresponding Unicode object. + + errors (if non-NULL) defines the error handling. It defaults + to "strict". + + If byteorder is non-NULL, the decoder starts decoding using the + given byte order: + + *byteorder == -1: little endian + *byteorder == 0: native order + *byteorder == 1: big endian + + In native mode, the first two bytes of the stream are checked for a + BOM mark. If found, the BOM mark is analysed, the byte order + adjusted and the BOM skipped. In the other modes, no BOM mark + interpretation is done. After completion, *byteorder is set to the + current byte order at the end of input data. + + If byteorder is NULL, the codec starts in native order mode. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( + const char *string, /* UTF-16 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + int *byteorder /* pointer to byteorder to use + 0=native;-1=LE,1=BE; updated on + exit */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( + const char *string, /* UTF-16 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + int *byteorder, /* pointer to byteorder to use + 0=native;-1=LE,1=BE; updated on + exit */ + Py_ssize_t *consumed /* bytes consumed */ + ); + +/* Returns a Python string using the UTF-16 encoding in native byte + order. The string always starts with a BOM mark. */ + +PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( + PyObject *unicode /* Unicode object */ + ); + +/* Returns a Python string object holding the UTF-16 encoded value of + the Unicode data. + + If byteorder is not 0, output is written according to the following + byte order: + + byteorder == -1: little endian + byteorder == 0: native byte order (writes a BOM mark) + byteorder == 1: big endian + + If byteorder is 0, the output string will always start with the + Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is + prepended. + + Note that Py_UNICODE data is being interpreted as UTF-16 reduced to + UCS-2. This trick makes it possible to add full UTF-16 capabilities + at a later point without compromising the APIs. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* number of Py_UNICODE chars to encode */ + const char *errors, /* error handling */ + int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ + ); + +/* --- Unicode-Escape Codecs ---------------------------------------------- */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( + PyObject *unicode /* Unicode object */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length /* Number of Py_UNICODE chars to encode */ + ); + +/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( + const char *string, /* Raw-Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( + PyObject *unicode /* Unicode object */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length /* Number of Py_UNICODE chars to encode */ + ); + +/* --- Unicode Internal Codec --------------------------------------------- + + Only for internal use in _codecsmodule.c */ + +PyObject *_PyUnicode_DecodeUnicodeInternal( + const char *string, + Py_ssize_t length, + const char *errors + ); + +/* --- Latin-1 Codecs ----------------------------------------------------- + + Note: Latin-1 corresponds to the first 256 Unicode ordinals. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( + const char *string, /* Latin-1 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( + PyObject *unicode /* Unicode object */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + const char *errors /* error handling */ + ); + +/* --- ASCII Codecs ------------------------------------------------------- + + Only 7-bit ASCII data is excepted. All other codes generate errors. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( + const char *string, /* ASCII encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( + PyObject *unicode /* Unicode object */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + const char *errors /* error handling */ + ); + +/* --- Character Map Codecs ----------------------------------------------- + + This codec uses mappings to encode and decode characters. + + Decoding mappings must map single string characters to single + Unicode characters, integers (which are then interpreted as Unicode + ordinals) or None (meaning "undefined mapping" and causing an + error). + + Encoding mappings must map single Unicode characters to single + string characters, integers (which are then interpreted as Latin-1 + ordinals) or None (meaning "undefined mapping" and causing an + error). + + If a character lookup fails with a LookupError, the character is + copied as-is meaning that its ordinal value will be interpreted as + Unicode or Latin-1 ordinal resp. Because of this mappings only need + to contain those mappings which map characters to different code + points. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( + const char *string, /* Encoded string */ + Py_ssize_t length, /* size of string */ + PyObject *mapping, /* character mapping + (char ordinal -> unicode ordinal) */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( + PyObject *unicode, /* Unicode object */ + PyObject *mapping /* character mapping + (unicode ordinal -> char ordinal) */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + PyObject *mapping, /* character mapping + (unicode ordinal -> char ordinal) */ + const char *errors /* error handling */ + ); + +/* Translate a Py_UNICODE buffer of the given length by applying a + character mapping table to it and return the resulting Unicode + object. + + The mapping table must map Unicode ordinal integers to Unicode + ordinal integers or None (causing deletion of the character). + + Mapping tables may be dictionaries or sequences. Unmapped character + ordinals (ones which cause a LookupError) are left untouched and + are copied as-is. + +*/ + +PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + PyObject *table, /* Translate table */ + const char *errors /* error handling */ + ); + +#ifdef MS_WIN32 + +/* --- MBCS codecs for Windows -------------------------------------------- */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( + const char *string, /* MBCS encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( + const char *string, /* MBCS encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed /* bytes consumed */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( + PyObject *unicode /* Unicode object */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + const char *errors /* error handling */ + ); + +#endif /* MS_WIN32 */ + +/* --- Decimal Encoder ---------------------------------------------------- */ + +/* Takes a Unicode string holding a decimal value and writes it into + an output buffer using standard ASCII digit codes. + + The output buffer has to provide at least length+1 bytes of storage + area. The output string is 0-terminated. + + The encoder converts whitespace to ' ', decimal characters to their + corresponding ASCII digit and all other Latin-1 characters except + \0 as-is. Characters outside this range (Unicode ordinals 1-256) + are treated as errors. This includes embedded NULL bytes. + + Error handling is defined by the errors argument: + + NULL or "strict": raise a ValueError + "ignore": ignore the wrong characters (these are not copied to the + output buffer) + "replace": replaces illegal characters with '?' + + Returns 0 on success, -1 on failure. + +*/ + +PyAPI_FUNC(int) PyUnicode_EncodeDecimal( + Py_UNICODE *s, /* Unicode buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + char *output, /* Output buffer; must have size >= length */ + const char *errors /* error handling */ + ); + +/* --- Methods & Slots ---------------------------------------------------- + + These are capable of handling Unicode objects and strings on input + (we refer to them as strings in the descriptions) and return + Unicode objects or integers as apporpriate. */ + +/* Concat two strings giving a new Unicode string. */ + +PyAPI_FUNC(PyObject*) PyUnicode_Concat( + PyObject *left, /* Left string */ + PyObject *right /* Right string */ + ); + +/* Split a string giving a list of Unicode strings. + + If sep is NULL, splitting will be done at all whitespace + substrings. Otherwise, splits occur at the given separator. + + At most maxsplit splits will be done. If negative, no limit is set. + + Separators are not included in the resulting list. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_Split( + PyObject *s, /* String to split */ + PyObject *sep, /* String separator */ + Py_ssize_t maxsplit /* Maxsplit count */ + ); + +/* Dito, but split at line breaks. + + CRLF is considered to be one line break. Line breaks are not + included in the resulting list. */ + +PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( + PyObject *s, /* String to split */ + int keepends /* If true, line end markers are included */ + ); + +/* Partition a string using a given separator. */ + +PyAPI_FUNC(PyObject*) PyUnicode_Partition( + PyObject *s, /* String to partition */ + PyObject *sep /* String separator */ + ); + +/* Partition a string using a given separator, searching from the end of the + string. */ + +PyAPI_FUNC(PyObject*) PyUnicode_RPartition( + PyObject *s, /* String to partition */ + PyObject *sep /* String separator */ + ); + +/* Split a string giving a list of Unicode strings. + + If sep is NULL, splitting will be done at all whitespace + substrings. Otherwise, splits occur at the given separator. + + At most maxsplit splits will be done. But unlike PyUnicode_Split + PyUnicode_RSplit splits from the end of the string. If negative, + no limit is set. + + Separators are not included in the resulting list. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_RSplit( + PyObject *s, /* String to split */ + PyObject *sep, /* String separator */ + Py_ssize_t maxsplit /* Maxsplit count */ + ); + +/* Translate a string by applying a character mapping table to it and + return the resulting Unicode object. + + The mapping table must map Unicode ordinal integers to Unicode + ordinal integers or None (causing deletion of the character). + + Mapping tables may be dictionaries or sequences. Unmapped character + ordinals (ones which cause a LookupError) are left untouched and + are copied as-is. + +*/ + +PyAPI_FUNC(PyObject *) PyUnicode_Translate( + PyObject *str, /* String */ + PyObject *table, /* Translate table */ + const char *errors /* error handling */ + ); + +/* Join a sequence of strings using the given separator and return + the resulting Unicode string. */ + +PyAPI_FUNC(PyObject*) PyUnicode_Join( + PyObject *separator, /* Separator string */ + PyObject *seq /* Sequence object */ + ); + +/* Return 1 if substr matches str[start:end] at the given tail end, 0 + otherwise. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( + PyObject *str, /* String */ + PyObject *substr, /* Prefix or Suffix string */ + Py_ssize_t start, /* Start index */ + Py_ssize_t end, /* Stop index */ + int direction /* Tail end: -1 prefix, +1 suffix */ + ); + +/* Return the first position of substr in str[start:end] using the + given search direction or -1 if not found. -2 is returned in case + an error occurred and an exception is set. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( + PyObject *str, /* String */ + PyObject *substr, /* Substring to find */ + Py_ssize_t start, /* Start index */ + Py_ssize_t end, /* Stop index */ + int direction /* Find direction: +1 forward, -1 backward */ + ); + +/* Count the number of occurrences of substr in str[start:end]. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( + PyObject *str, /* String */ + PyObject *substr, /* Substring to count */ + Py_ssize_t start, /* Start index */ + Py_ssize_t end /* Stop index */ + ); + +/* Replace at most maxcount occurrences of substr in str with replstr + and return the resulting Unicode object. */ + +PyAPI_FUNC(PyObject *) PyUnicode_Replace( + PyObject *str, /* String */ + PyObject *substr, /* Substring to find */ + PyObject *replstr, /* Substring to replace */ + Py_ssize_t maxcount /* Max. number of replacements to apply; + -1 = all */ + ); + +/* Compare two strings and return -1, 0, 1 for less than, equal, + greater than resp. */ + +PyAPI_FUNC(int) PyUnicode_Compare( + PyObject *left, /* Left string */ + PyObject *right /* Right string */ + ); + +/* Rich compare two strings and return one of the following: + + - NULL in case an exception was raised + - Py_True or Py_False for successfuly comparisons + - Py_NotImplemented in case the type combination is unknown + + Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in + case the conversion of the arguments to Unicode fails with a + UnicodeDecodeError. + + Possible values for op: + + Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE + +*/ + +PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( + PyObject *left, /* Left string */ + PyObject *right, /* Right string */ + int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ + ); + +/* Apply an argument tuple or dictionary to a format string and return + the resulting Unicode string. */ + +PyAPI_FUNC(PyObject *) PyUnicode_Format( + PyObject *format, /* Format string */ + PyObject *args /* Argument tuple or dictionary */ + ); + +/* Checks whether element is contained in container and return 1/0 + accordingly. + + element has to coerce to a one element Unicode string. -1 is + returned in case of an error. */ + +PyAPI_FUNC(int) PyUnicode_Contains( + PyObject *container, /* Container string */ + PyObject *element /* Element string */ + ); + +/* Externally visible for str.strip(unicode) */ +PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( + PyUnicodeObject *self, + int striptype, + PyObject *sepobj + ); + +/* === Characters Type APIs =============================================== */ + +/* Helper array used by Py_UNICODE_ISSPACE(). */ + +PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; + +/* These should not be used directly. Use the Py_UNICODE_IS* and + Py_UNICODE_TO* macros instead. + + These APIs are implemented in Objects/unicodectype.c. + +*/ + +PyAPI_FUNC(int) _PyUnicode_IsLowercase( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsUppercase( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsTitlecase( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsWhitespace( + const Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsLinebreak( + const Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_ToDigit( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(double) _PyUnicode_ToNumeric( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsDigit( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsNumeric( + Py_UNICODE ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsAlpha( + Py_UNICODE ch /* Unicode character */ + ); + +#ifdef __cplusplus +} +#endif +#endif /* Py_USING_UNICODE */ +#endif /* !Py_UNICODEOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/warnings.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,23 @@ +#ifndef Py_WARNINGS_H +#define Py_WARNINGS_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(void) _PyWarnings_Init(void); + +PyAPI_FUNC(int) PyErr_WarnEx(PyObject *, const char *, Py_ssize_t); +PyAPI_FUNC(int) PyErr_WarnExplicit(PyObject *, const char *, const char *, int, + const char *, PyObject *); + +#define PyErr_WarnPy3k(msg, stacklevel) \ + (Py_Py3kWarningFlag ? PyErr_WarnEx(PyExc_DeprecationWarning, msg, stacklevel) : 0) + +/* DEPRECATED: Use PyErr_WarnEx() instead. */ +#define PyErr_Warn(category, msg) PyErr_WarnEx(category, msg, 1) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_WARNINGS_H */ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/include/python2.7/weakrefobject.h Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,82 @@ +/* Weak references objects for Python. */ + +#ifndef Py_WEAKREFOBJECT_H +#define Py_WEAKREFOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +typedef struct _PyWeakReference PyWeakReference; + +/* PyWeakReference is the base struct for the Python ReferenceType, ProxyType, + * and CallableProxyType. + */ +struct _PyWeakReference { + PyObject_HEAD + + /* The object to which this is a weak reference, or Py_None if none. + * Note that this is a stealth reference: wr_object's refcount is + * not incremented to reflect this pointer. + */ + PyObject *wr_object; + + /* A callable to invoke when wr_object dies, or NULL if none. */ + PyObject *wr_callback; + + /* A cache for wr_object's hash code. As usual for hashes, this is -1 + * if the hash code isn't known yet. + */ + long hash; + + /* If wr_object is weakly referenced, wr_object has a doubly-linked NULL- + * terminated list of weak references to it. These are the list pointers. + * If wr_object goes away, wr_object is set to Py_None, and these pointers + * have no meaning then. + */ + PyWeakReference *wr_prev; + PyWeakReference *wr_next; +}; + +PyAPI_DATA(PyTypeObject) _PyWeakref_RefType; +PyAPI_DATA(PyTypeObject) _PyWeakref_ProxyType; +PyAPI_DATA(PyTypeObject) _PyWeakref_CallableProxyType; + +#define PyWeakref_CheckRef(op) PyObject_TypeCheck(op, &_PyWeakref_RefType) +#define PyWeakref_CheckRefExact(op) \ + (Py_TYPE(op) == &_PyWeakref_RefType) +#define PyWeakref_CheckProxy(op) \ + ((Py_TYPE(op) == &_PyWeakref_ProxyType) || \ + (Py_TYPE(op) == &_PyWeakref_CallableProxyType)) + +#define PyWeakref_Check(op) \ + (PyWeakref_CheckRef(op) || PyWeakref_CheckProxy(op)) + + +PyAPI_FUNC(PyObject *) PyWeakref_NewRef(PyObject *ob, + PyObject *callback); +PyAPI_FUNC(PyObject *) PyWeakref_NewProxy(PyObject *ob, + PyObject *callback); +PyAPI_FUNC(PyObject *) PyWeakref_GetObject(PyObject *ref); + +PyAPI_FUNC(Py_ssize_t) _PyWeakref_GetWeakrefCount(PyWeakReference *head); + +PyAPI_FUNC(void) _PyWeakref_ClearRef(PyWeakReference *self); + +/* Explanation for the Py_REFCNT() check: when a weakref's target is part + of a long chain of deallocations which triggers the trashcan mechanism, + clearing the weakrefs can be delayed long after the target's refcount + has dropped to zero. In the meantime, code accessing the weakref will + be able to "see" the target object even though it is supposed to be + unreachable. See issue #16602. */ + +#define PyWeakref_GET_OBJECT(ref) \ + (Py_REFCNT(((PyWeakReference *)(ref))->wr_object) > 0 \ + ? ((PyWeakReference *)(ref))->wr_object \ + : Py_None) + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_WEAKREFOBJECT_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/UserDict.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,213 @@ +"""A more or less complete user-defined wrapper around dictionary objects.""" + +class UserDict: + def __init__(*args, **kwargs): + if not args: + raise TypeError("descriptor '__init__' of 'UserDict' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + if args: + dict = args[0] + elif 'dict' in kwargs: + dict = kwargs.pop('dict') + import warnings + warnings.warn("Passing 'dict' as keyword argument is " + "deprecated", PendingDeprecationWarning, + stacklevel=2) + else: + dict = None + self.data = {} + if dict is not None: + self.update(dict) + if len(kwargs): + self.update(kwargs) + def __repr__(self): return repr(self.data) + def __cmp__(self, dict): + if isinstance(dict, UserDict): + return cmp(self.data, dict.data) + else: + return cmp(self.data, dict) + __hash__ = None # Avoid Py3k warning + def __len__(self): return len(self.data) + def __getitem__(self, key): + if key in self.data: + return self.data[key] + if hasattr(self.__class__, "__missing__"): + return self.__class__.__missing__(self, key) + raise KeyError(key) + def __setitem__(self, key, item): self.data[key] = item + def __delitem__(self, key): del self.data[key] + def clear(self): self.data.clear() + def copy(self): + if self.__class__ is UserDict: + return UserDict(self.data.copy()) + import copy + data = self.data + try: + self.data = {} + c = copy.copy(self) + finally: + self.data = data + c.update(self) + return c + def keys(self): return self.data.keys() + def items(self): return self.data.items() + def iteritems(self): return self.data.iteritems() + def iterkeys(self): return self.data.iterkeys() + def itervalues(self): return self.data.itervalues() + def values(self): return self.data.values() + def has_key(self, key): return key in self.data + def update(*args, **kwargs): + if not args: + raise TypeError("descriptor 'update' of 'UserDict' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + if args: + dict = args[0] + elif 'dict' in kwargs: + dict = kwargs.pop('dict') + import warnings + warnings.warn("Passing 'dict' as keyword argument is deprecated", + PendingDeprecationWarning, stacklevel=2) + else: + dict = None + if dict is None: + pass + elif isinstance(dict, UserDict): + self.data.update(dict.data) + elif isinstance(dict, type({})) or not hasattr(dict, 'items'): + self.data.update(dict) + else: + for k, v in dict.items(): + self[k] = v + if len(kwargs): + self.data.update(kwargs) + def get(self, key, failobj=None): + if key not in self: + return failobj + return self[key] + def setdefault(self, key, failobj=None): + if key not in self: + self[key] = failobj + return self[key] + def pop(self, key, *args): + return self.data.pop(key, *args) + def popitem(self): + return self.data.popitem() + def __contains__(self, key): + return key in self.data + @classmethod + def fromkeys(cls, iterable, value=None): + d = cls() + for key in iterable: + d[key] = value + return d + +class IterableUserDict(UserDict): + def __iter__(self): + return iter(self.data) + +import _abcoll +_abcoll.MutableMapping.register(IterableUserDict) + + +class DictMixin: + # Mixin defining all dictionary methods for classes that already have + # a minimum dictionary interface including getitem, setitem, delitem, + # and keys. Without knowledge of the subclass constructor, the mixin + # does not define __init__() or copy(). In addition to the four base + # methods, progressively more efficiency comes with defining + # __contains__(), __iter__(), and iteritems(). + + # second level definitions support higher levels + def __iter__(self): + for k in self.keys(): + yield k + def has_key(self, key): + try: + self[key] + except KeyError: + return False + return True + def __contains__(self, key): + return self.has_key(key) + + # third level takes advantage of second level definitions + def iteritems(self): + for k in self: + yield (k, self[k]) + def iterkeys(self): + return self.__iter__() + + # fourth level uses definitions from lower levels + def itervalues(self): + for _, v in self.iteritems(): + yield v + def values(self): + return [v for _, v in self.iteritems()] + def items(self): + return list(self.iteritems()) + def clear(self): + for key in self.keys(): + del self[key] + def setdefault(self, key, default=None): + try: + return self[key] + except KeyError: + self[key] = default + return default + def pop(self, key, *args): + if len(args) > 1: + raise TypeError, "pop expected at most 2 arguments, got "\ + + repr(1 + len(args)) + try: + value = self[key] + except KeyError: + if args: + return args[0] + raise + del self[key] + return value + def popitem(self): + try: + k, v = self.iteritems().next() + except StopIteration: + raise KeyError, 'container is empty' + del self[k] + return (k, v) + def update(self, other=None, **kwargs): + # Make progressively weaker assumptions about "other" + if other is None: + pass + elif hasattr(other, 'iteritems'): # iteritems saves memory and lookups + for k, v in other.iteritems(): + self[k] = v + elif hasattr(other, 'keys'): + for k in other.keys(): + self[k] = other[k] + else: + for k, v in other: + self[k] = v + if kwargs: + self.update(kwargs) + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default + def __repr__(self): + return repr(dict(self.iteritems())) + def __cmp__(self, other): + if other is None: + return 1 + if isinstance(other, DictMixin): + other = dict(other.iteritems()) + return cmp(dict(self.iteritems()), other) + def __len__(self): + return len(self.keys())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/_abcoll.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,695 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Abstract Base Classes (ABCs) for collections, according to PEP 3119. + +DON'T USE THIS MODULE DIRECTLY! The classes here should be imported +via collections; they are defined here only to alleviate certain +bootstrapping issues. Unit tests are in test_collections. +""" + +from abc import ABCMeta, abstractmethod +import sys + +__all__ = ["Hashable", "Iterable", "Iterator", + "Sized", "Container", "Callable", + "Set", "MutableSet", + "Mapping", "MutableMapping", + "MappingView", "KeysView", "ItemsView", "ValuesView", + "Sequence", "MutableSequence", + ] + +### ONE-TRICK PONIES ### + +def _hasattr(C, attr): + try: + return any(attr in B.__dict__ for B in C.__mro__) + except AttributeError: + # Old-style class + return hasattr(C, attr) + + +class Hashable: + __metaclass__ = ABCMeta + + @abstractmethod + def __hash__(self): + return 0 + + @classmethod + def __subclasshook__(cls, C): + if cls is Hashable: + try: + for B in C.__mro__: + if "__hash__" in B.__dict__: + if B.__dict__["__hash__"]: + return True + break + except AttributeError: + # Old-style class + if getattr(C, "__hash__", None): + return True + return NotImplemented + + +class Iterable: + __metaclass__ = ABCMeta + + @abstractmethod + def __iter__(self): + while False: + yield None + + @classmethod + def __subclasshook__(cls, C): + if cls is Iterable: + if _hasattr(C, "__iter__"): + return True + return NotImplemented + +Iterable.register(str) + + +class Iterator(Iterable): + + @abstractmethod + def next(self): + 'Return the next item from the iterator. When exhausted, raise StopIteration' + raise StopIteration + + def __iter__(self): + return self + + @classmethod + def __subclasshook__(cls, C): + if cls is Iterator: + if _hasattr(C, "next") and _hasattr(C, "__iter__"): + return True + return NotImplemented + + +class Sized: + __metaclass__ = ABCMeta + + @abstractmethod + def __len__(self): + return 0 + + @classmethod + def __subclasshook__(cls, C): + if cls is Sized: + if _hasattr(C, "__len__"): + return True + return NotImplemented + + +class Container: + __metaclass__ = ABCMeta + + @abstractmethod + def __contains__(self, x): + return False + + @classmethod + def __subclasshook__(cls, C): + if cls is Container: + if _hasattr(C, "__contains__"): + return True + return NotImplemented + + +class Callable: + __metaclass__ = ABCMeta + + @abstractmethod + def __call__(self, *args, **kwds): + return False + + @classmethod + def __subclasshook__(cls, C): + if cls is Callable: + if _hasattr(C, "__call__"): + return True + return NotImplemented + + +### SETS ### + + +class Set(Sized, Iterable, Container): + """A set is a finite, iterable container. + + This class provides concrete generic implementations of all + methods except for __contains__, __iter__ and __len__. + + To override the comparisons (presumably for speed, as the + semantics are fixed), redefine __le__ and __ge__, + then the other operations will automatically follow suit. + """ + + def __le__(self, other): + if not isinstance(other, Set): + return NotImplemented + if len(self) > len(other): + return False + for elem in self: + if elem not in other: + return False + return True + + def __lt__(self, other): + if not isinstance(other, Set): + return NotImplemented + return len(self) < len(other) and self.__le__(other) + + def __gt__(self, other): + if not isinstance(other, Set): + return NotImplemented + return len(self) > len(other) and self.__ge__(other) + + def __ge__(self, other): + if not isinstance(other, Set): + return NotImplemented + if len(self) < len(other): + return False + for elem in other: + if elem not in self: + return False + return True + + def __eq__(self, other): + if not isinstance(other, Set): + return NotImplemented + return len(self) == len(other) and self.__le__(other) + + def __ne__(self, other): + return not (self == other) + + @classmethod + def _from_iterable(cls, it): + '''Construct an instance of the class from any iterable input. + + Must override this method if the class constructor signature + does not accept an iterable for an input. + ''' + return cls(it) + + def __and__(self, other): + if not isinstance(other, Iterable): + return NotImplemented + return self._from_iterable(value for value in other if value in self) + + __rand__ = __and__ + + def isdisjoint(self, other): + 'Return True if two sets have a null intersection.' + for value in other: + if value in self: + return False + return True + + def __or__(self, other): + if not isinstance(other, Iterable): + return NotImplemented + chain = (e for s in (self, other) for e in s) + return self._from_iterable(chain) + + __ror__ = __or__ + + def __sub__(self, other): + if not isinstance(other, Set): + if not isinstance(other, Iterable): + return NotImplemented + other = self._from_iterable(other) + return self._from_iterable(value for value in self + if value not in other) + + def __rsub__(self, other): + if not isinstance(other, Set): + if not isinstance(other, Iterable): + return NotImplemented + other = self._from_iterable(other) + return self._from_iterable(value for value in other + if value not in self) + + def __xor__(self, other): + if not isinstance(other, Set): + if not isinstance(other, Iterable): + return NotImplemented + other = self._from_iterable(other) + return (self - other) | (other - self) + + __rxor__ = __xor__ + + # Sets are not hashable by default, but subclasses can change this + __hash__ = None + + def _hash(self): + """Compute the hash value of a set. + + Note that we don't define __hash__: not all sets are hashable. + But if you define a hashable set type, its __hash__ should + call this function. + + This must be compatible __eq__. + + All sets ought to compare equal if they contain the same + elements, regardless of how they are implemented, and + regardless of the order of the elements; so there's not much + freedom for __eq__ or __hash__. We match the algorithm used + by the built-in frozenset type. + """ + MAX = sys.maxint + MASK = 2 * MAX + 1 + n = len(self) + h = 1927868237 * (n + 1) + h &= MASK + for x in self: + hx = hash(x) + h ^= (hx ^ (hx << 16) ^ 89869747) * 3644798167 + h &= MASK + h = h * 69069 + 907133923 + h &= MASK + if h > MAX: + h -= MASK + 1 + if h == -1: + h = 590923713 + return h + +Set.register(frozenset) + + +class MutableSet(Set): + """A mutable set is a finite, iterable container. + + This class provides concrete generic implementations of all + methods except for __contains__, __iter__, __len__, + add(), and discard(). + + To override the comparisons (presumably for speed, as the + semantics are fixed), all you have to do is redefine __le__ and + then the other operations will automatically follow suit. + """ + + @abstractmethod + def add(self, value): + """Add an element.""" + raise NotImplementedError + + @abstractmethod + def discard(self, value): + """Remove an element. Do not raise an exception if absent.""" + raise NotImplementedError + + def remove(self, value): + """Remove an element. If not a member, raise a KeyError.""" + if value not in self: + raise KeyError(value) + self.discard(value) + + def pop(self): + """Return the popped value. Raise KeyError if empty.""" + it = iter(self) + try: + value = next(it) + except StopIteration: + raise KeyError + self.discard(value) + return value + + def clear(self): + """This is slow (creates N new iterators!) but effective.""" + try: + while True: + self.pop() + except KeyError: + pass + + def __ior__(self, it): + for value in it: + self.add(value) + return self + + def __iand__(self, it): + for value in (self - it): + self.discard(value) + return self + + def __ixor__(self, it): + if it is self: + self.clear() + else: + if not isinstance(it, Set): + it = self._from_iterable(it) + for value in it: + if value in self: + self.discard(value) + else: + self.add(value) + return self + + def __isub__(self, it): + if it is self: + self.clear() + else: + for value in it: + self.discard(value) + return self + +MutableSet.register(set) + + +### MAPPINGS ### + + +class Mapping(Sized, Iterable, Container): + + """A Mapping is a generic container for associating key/value + pairs. + + This class provides concrete generic implementations of all + methods except for __getitem__, __iter__, and __len__. + + """ + + @abstractmethod + def __getitem__(self, key): + raise KeyError + + def get(self, key, default=None): + 'D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.' + try: + return self[key] + except KeyError: + return default + + def __contains__(self, key): + try: + self[key] + except KeyError: + return False + else: + return True + + def iterkeys(self): + 'D.iterkeys() -> an iterator over the keys of D' + return iter(self) + + def itervalues(self): + 'D.itervalues() -> an iterator over the values of D' + for key in self: + yield self[key] + + def iteritems(self): + 'D.iteritems() -> an iterator over the (key, value) items of D' + for key in self: + yield (key, self[key]) + + def keys(self): + "D.keys() -> list of D's keys" + return list(self) + + def items(self): + "D.items() -> list of D's (key, value) pairs, as 2-tuples" + return [(key, self[key]) for key in self] + + def values(self): + "D.values() -> list of D's values" + return [self[key] for key in self] + + # Mappings are not hashable by default, but subclasses can change this + __hash__ = None + + def __eq__(self, other): + if not isinstance(other, Mapping): + return NotImplemented + return dict(self.items()) == dict(other.items()) + + def __ne__(self, other): + return not (self == other) + +class MappingView(Sized): + + def __init__(self, mapping): + self._mapping = mapping + + def __len__(self): + return len(self._mapping) + + def __repr__(self): + return '{0.__class__.__name__}({0._mapping!r})'.format(self) + + +class KeysView(MappingView, Set): + + @classmethod + def _from_iterable(self, it): + return set(it) + + def __contains__(self, key): + return key in self._mapping + + def __iter__(self): + for key in self._mapping: + yield key + +KeysView.register(type({}.viewkeys())) + +class ItemsView(MappingView, Set): + + @classmethod + def _from_iterable(self, it): + return set(it) + + def __contains__(self, item): + key, value = item + try: + v = self._mapping[key] + except KeyError: + return False + else: + return v == value + + def __iter__(self): + for key in self._mapping: + yield (key, self._mapping[key]) + +ItemsView.register(type({}.viewitems())) + +class ValuesView(MappingView): + + def __contains__(self, value): + for key in self._mapping: + if value == self._mapping[key]: + return True + return False + + def __iter__(self): + for key in self._mapping: + yield self._mapping[key] + +ValuesView.register(type({}.viewvalues())) + +class MutableMapping(Mapping): + + """A MutableMapping is a generic container for associating + key/value pairs. + + This class provides concrete generic implementations of all + methods except for __getitem__, __setitem__, __delitem__, + __iter__, and __len__. + + """ + + @abstractmethod + def __setitem__(self, key, value): + raise KeyError + + @abstractmethod + def __delitem__(self, key): + raise KeyError + + __marker = object() + + def pop(self, key, default=__marker): + '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + ''' + try: + value = self[key] + except KeyError: + if default is self.__marker: + raise + return default + else: + del self[key] + return value + + def popitem(self): + '''D.popitem() -> (k, v), remove and return some (key, value) pair + as a 2-tuple; but raise KeyError if D is empty. + ''' + try: + key = next(iter(self)) + except StopIteration: + raise KeyError + value = self[key] + del self[key] + return key, value + + def clear(self): + 'D.clear() -> None. Remove all items from D.' + try: + while True: + self.popitem() + except KeyError: + pass + + def update(*args, **kwds): + ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. + If E present and has a .keys() method, does: for k in E: D[k] = E[k] + If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v + In either case, this is followed by: for k, v in F.items(): D[k] = v + ''' + if not args: + raise TypeError("descriptor 'update' of 'MutableMapping' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('update expected at most 1 arguments, got %d' % + len(args)) + if args: + other = args[0] + if isinstance(other, Mapping): + for key in other: + self[key] = other[key] + elif hasattr(other, "keys"): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + def setdefault(self, key, default=None): + 'D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D' + try: + return self[key] + except KeyError: + self[key] = default + return default + +MutableMapping.register(dict) + + +### SEQUENCES ### + + +class Sequence(Sized, Iterable, Container): + """All the operations on a read-only sequence. + + Concrete subclasses must override __new__ or __init__, + __getitem__, and __len__. + """ + + @abstractmethod + def __getitem__(self, index): + raise IndexError + + def __iter__(self): + i = 0 + try: + while True: + v = self[i] + yield v + i += 1 + except IndexError: + return + + def __contains__(self, value): + for v in self: + if v == value: + return True + return False + + def __reversed__(self): + for i in reversed(range(len(self))): + yield self[i] + + def index(self, value): + '''S.index(value) -> integer -- return first index of value. + Raises ValueError if the value is not present. + ''' + for i, v in enumerate(self): + if v == value: + return i + raise ValueError + + def count(self, value): + 'S.count(value) -> integer -- return number of occurrences of value' + return sum(1 for v in self if v == value) + +Sequence.register(tuple) +Sequence.register(basestring) +Sequence.register(buffer) +Sequence.register(xrange) + + +class MutableSequence(Sequence): + + """All the operations on a read-only sequence. + + Concrete subclasses must provide __new__ or __init__, + __getitem__, __setitem__, __delitem__, __len__, and insert(). + + """ + + @abstractmethod + def __setitem__(self, index, value): + raise IndexError + + @abstractmethod + def __delitem__(self, index): + raise IndexError + + @abstractmethod + def insert(self, index, value): + 'S.insert(index, object) -- insert object before index' + raise IndexError + + def append(self, value): + 'S.append(object) -- append object to the end of the sequence' + self.insert(len(self), value) + + def reverse(self): + 'S.reverse() -- reverse *IN PLACE*' + n = len(self) + for i in range(n//2): + self[i], self[n-i-1] = self[n-i-1], self[i] + + def extend(self, values): + 'S.extend(iterable) -- extend sequence by appending elements from the iterable' + for v in values: + self.append(v) + + def pop(self, index=-1): + '''S.pop([index]) -> item -- remove and return item at index (default last). + Raise IndexError if list is empty or index is out of range. + ''' + v = self[index] + del self[index] + return v + + def remove(self, value): + '''S.remove(value) -- remove first occurrence of value. + Raise ValueError if the value is not present. + ''' + del self[self.index(value)] + + def __iadd__(self, values): + self.extend(values) + return self + +MutableSequence.register(list)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/_weakrefset.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,204 @@ +# Access WeakSet through the weakref module. +# This code is separated-out because it is needed +# by abc.py to load everything else at startup. + +from _weakref import ref + +__all__ = ['WeakSet'] + + +class _IterationGuard(object): + # This context manager registers itself in the current iterators of the + # weak container, such as to delay all removals until the context manager + # exits. + # This technique should be relatively thread-safe (since sets are). + + def __init__(self, weakcontainer): + # Don't create cycles + self.weakcontainer = ref(weakcontainer) + + def __enter__(self): + w = self.weakcontainer() + if w is not None: + w._iterating.add(self) + return self + + def __exit__(self, e, t, b): + w = self.weakcontainer() + if w is not None: + s = w._iterating + s.remove(self) + if not s: + w._commit_removals() + + +class WeakSet(object): + def __init__(self, data=None): + self.data = set() + def _remove(item, selfref=ref(self)): + self = selfref() + if self is not None: + if self._iterating: + self._pending_removals.append(item) + else: + self.data.discard(item) + self._remove = _remove + # A list of keys to be removed + self._pending_removals = [] + self._iterating = set() + if data is not None: + self.update(data) + + def _commit_removals(self): + l = self._pending_removals + discard = self.data.discard + while l: + discard(l.pop()) + + def __iter__(self): + with _IterationGuard(self): + for itemref in self.data: + item = itemref() + if item is not None: + # Caveat: the iterator will keep a strong reference to + # `item` until it is resumed or closed. + yield item + + def __len__(self): + return len(self.data) - len(self._pending_removals) + + def __contains__(self, item): + try: + wr = ref(item) + except TypeError: + return False + return wr in self.data + + def __reduce__(self): + return (self.__class__, (list(self),), + getattr(self, '__dict__', None)) + + __hash__ = None + + def add(self, item): + if self._pending_removals: + self._commit_removals() + self.data.add(ref(item, self._remove)) + + def clear(self): + if self._pending_removals: + self._commit_removals() + self.data.clear() + + def copy(self): + return self.__class__(self) + + def pop(self): + if self._pending_removals: + self._commit_removals() + while True: + try: + itemref = self.data.pop() + except KeyError: + raise KeyError('pop from empty WeakSet') + item = itemref() + if item is not None: + return item + + def remove(self, item): + if self._pending_removals: + self._commit_removals() + self.data.remove(ref(item)) + + def discard(self, item): + if self._pending_removals: + self._commit_removals() + self.data.discard(ref(item)) + + def update(self, other): + if self._pending_removals: + self._commit_removals() + for element in other: + self.add(element) + + def __ior__(self, other): + self.update(other) + return self + + def difference(self, other): + newset = self.copy() + newset.difference_update(other) + return newset + __sub__ = difference + + def difference_update(self, other): + self.__isub__(other) + def __isub__(self, other): + if self._pending_removals: + self._commit_removals() + if self is other: + self.data.clear() + else: + self.data.difference_update(ref(item) for item in other) + return self + + def intersection(self, other): + return self.__class__(item for item in other if item in self) + __and__ = intersection + + def intersection_update(self, other): + self.__iand__(other) + def __iand__(self, other): + if self._pending_removals: + self._commit_removals() + self.data.intersection_update(ref(item) for item in other) + return self + + def issubset(self, other): + return self.data.issubset(ref(item) for item in other) + __le__ = issubset + + def __lt__(self, other): + return self.data < set(ref(item) for item in other) + + def issuperset(self, other): + return self.data.issuperset(ref(item) for item in other) + __ge__ = issuperset + + def __gt__(self, other): + return self.data > set(ref(item) for item in other) + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.data == set(ref(item) for item in other) + + def __ne__(self, other): + opposite = self.__eq__(other) + if opposite is NotImplemented: + return NotImplemented + return not opposite + + def symmetric_difference(self, other): + newset = self.copy() + newset.symmetric_difference_update(other) + return newset + __xor__ = symmetric_difference + + def symmetric_difference_update(self, other): + self.__ixor__(other) + def __ixor__(self, other): + if self._pending_removals: + self._commit_removals() + if self is other: + self.data.clear() + else: + self.data.symmetric_difference_update(ref(item, self._remove) for item in other) + return self + + def union(self, other): + return self.__class__(e for s in (self, other) for e in s) + __or__ = union + + def isdisjoint(self, other): + return len(self.intersection(other)) == 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/abc.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,185 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Abstract Base Classes (ABCs) according to PEP 3119.""" + +import types + +from _weakrefset import WeakSet + +# Instance of old-style class +class _C: pass +_InstanceType = type(_C()) + + +def abstractmethod(funcobj): + """A decorator indicating abstract methods. + + Requires that the metaclass is ABCMeta or derived from it. A + class that has a metaclass derived from ABCMeta cannot be + instantiated unless all of its abstract methods are overridden. + The abstract methods can be called using any of the normal + 'super' call mechanisms. + + Usage: + + class C: + __metaclass__ = ABCMeta + @abstractmethod + def my_abstract_method(self, ...): + ... + """ + funcobj.__isabstractmethod__ = True + return funcobj + + +class abstractproperty(property): + """A decorator indicating abstract properties. + + Requires that the metaclass is ABCMeta or derived from it. A + class that has a metaclass derived from ABCMeta cannot be + instantiated unless all of its abstract properties are overridden. + The abstract properties can be called using any of the normal + 'super' call mechanisms. + + Usage: + + class C: + __metaclass__ = ABCMeta + @abstractproperty + def my_abstract_property(self): + ... + + This defines a read-only property; you can also define a read-write + abstract property using the 'long' form of property declaration: + + class C: + __metaclass__ = ABCMeta + def getx(self): ... + def setx(self, value): ... + x = abstractproperty(getx, setx) + """ + __isabstractmethod__ = True + + +class ABCMeta(type): + + """Metaclass for defining Abstract Base Classes (ABCs). + + Use this metaclass to create an ABC. An ABC can be subclassed + directly, and then acts as a mix-in class. You can also register + unrelated concrete classes (even built-in classes) and unrelated + ABCs as 'virtual subclasses' -- these and their descendants will + be considered subclasses of the registering ABC by the built-in + issubclass() function, but the registering ABC won't show up in + their MRO (Method Resolution Order) nor will method + implementations defined by the registering ABC be callable (not + even via super()). + + """ + + # A global counter that is incremented each time a class is + # registered as a virtual subclass of anything. It forces the + # negative cache to be cleared before its next use. + _abc_invalidation_counter = 0 + + def __new__(mcls, name, bases, namespace): + cls = super(ABCMeta, mcls).__new__(mcls, name, bases, namespace) + # Compute set of abstract method names + abstracts = set(name + for name, value in namespace.items() + if getattr(value, "__isabstractmethod__", False)) + for base in bases: + for name in getattr(base, "__abstractmethods__", set()): + value = getattr(cls, name, None) + if getattr(value, "__isabstractmethod__", False): + abstracts.add(name) + cls.__abstractmethods__ = frozenset(abstracts) + # Set up inheritance registry + cls._abc_registry = WeakSet() + cls._abc_cache = WeakSet() + cls._abc_negative_cache = WeakSet() + cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter + return cls + + def register(cls, subclass): + """Register a virtual subclass of an ABC.""" + if not isinstance(subclass, (type, types.ClassType)): + raise TypeError("Can only register classes") + if issubclass(subclass, cls): + return # Already a subclass + # Subtle: test for cycles *after* testing for "already a subclass"; + # this means we allow X.register(X) and interpret it as a no-op. + if issubclass(cls, subclass): + # This would create a cycle, which is bad for the algorithm below + raise RuntimeError("Refusing to create an inheritance cycle") + cls._abc_registry.add(subclass) + ABCMeta._abc_invalidation_counter += 1 # Invalidate negative cache + + def _dump_registry(cls, file=None): + """Debug helper to print the ABC registry.""" + print >> file, "Class: %s.%s" % (cls.__module__, cls.__name__) + print >> file, "Inv.counter: %s" % ABCMeta._abc_invalidation_counter + for name in sorted(cls.__dict__.keys()): + if name.startswith("_abc_"): + value = getattr(cls, name) + print >> file, "%s: %r" % (name, value) + + def __instancecheck__(cls, instance): + """Override for isinstance(instance, cls).""" + # Inline the cache checking when it's simple. + subclass = getattr(instance, '__class__', None) + if subclass is not None and subclass in cls._abc_cache: + return True + subtype = type(instance) + # Old-style instances + if subtype is _InstanceType: + subtype = subclass + if subtype is subclass or subclass is None: + if (cls._abc_negative_cache_version == + ABCMeta._abc_invalidation_counter and + subtype in cls._abc_negative_cache): + return False + # Fall back to the subclass check. + return cls.__subclasscheck__(subtype) + return (cls.__subclasscheck__(subclass) or + cls.__subclasscheck__(subtype)) + + def __subclasscheck__(cls, subclass): + """Override for issubclass(subclass, cls).""" + # Check cache + if subclass in cls._abc_cache: + return True + # Check negative cache; may have to invalidate + if cls._abc_negative_cache_version < ABCMeta._abc_invalidation_counter: + # Invalidate the negative cache + cls._abc_negative_cache = WeakSet() + cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter + elif subclass in cls._abc_negative_cache: + return False + # Check the subclass hook + ok = cls.__subclasshook__(subclass) + if ok is not NotImplemented: + assert isinstance(ok, bool) + if ok: + cls._abc_cache.add(subclass) + else: + cls._abc_negative_cache.add(subclass) + return ok + # Check if it's a direct subclass + if cls in getattr(subclass, '__mro__', ()): + cls._abc_cache.add(subclass) + return True + # Check if it's a subclass of a registered class (recursive) + for rcls in cls._abc_registry: + if issubclass(subclass, rcls): + cls._abc_cache.add(subclass) + return True + # Check if it's a subclass of a subclass (recursive) + for scls in cls.__subclasses__(): + if issubclass(subclass, scls): + cls._abc_cache.add(subclass) + return True + # No dice; update negative cache + cls._abc_negative_cache.add(subclass) + return False
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/codecs.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,1113 @@ +""" codecs -- Python Codec Registry, API and helpers. + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +"""#" + +import __builtin__, sys + +### Registry and builtin stateless codec functions + +try: + from _codecs import * +except ImportError, why: + raise SystemError('Failed to load the builtin codecs: %s' % why) + +__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE", + "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", + "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE", + "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE", + "CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder", + "StreamReader", "StreamWriter", + "StreamReaderWriter", "StreamRecoder", + "getencoder", "getdecoder", "getincrementalencoder", + "getincrementaldecoder", "getreader", "getwriter", + "encode", "decode", "iterencode", "iterdecode", + "strict_errors", "ignore_errors", "replace_errors", + "xmlcharrefreplace_errors", "backslashreplace_errors", + "register_error", "lookup_error"] + +### Constants + +# +# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF) +# and its possible byte string values +# for UTF8/UTF16/UTF32 output and little/big endian machines +# + +# UTF-8 +BOM_UTF8 = '\xef\xbb\xbf' + +# UTF-16, little endian +BOM_LE = BOM_UTF16_LE = '\xff\xfe' + +# UTF-16, big endian +BOM_BE = BOM_UTF16_BE = '\xfe\xff' + +# UTF-32, little endian +BOM_UTF32_LE = '\xff\xfe\x00\x00' + +# UTF-32, big endian +BOM_UTF32_BE = '\x00\x00\xfe\xff' + +if sys.byteorder == 'little': + + # UTF-16, native endianness + BOM = BOM_UTF16 = BOM_UTF16_LE + + # UTF-32, native endianness + BOM_UTF32 = BOM_UTF32_LE + +else: + + # UTF-16, native endianness + BOM = BOM_UTF16 = BOM_UTF16_BE + + # UTF-32, native endianness + BOM_UTF32 = BOM_UTF32_BE + +# Old broken names (don't use in new code) +BOM32_LE = BOM_UTF16_LE +BOM32_BE = BOM_UTF16_BE +BOM64_LE = BOM_UTF32_LE +BOM64_BE = BOM_UTF32_BE + + +### Codec base classes (defining the API) + +class CodecInfo(tuple): + """Codec details when looking up the codec registry""" + + # Private API to allow Python to blacklist the known non-Unicode + # codecs in the standard library. A more general mechanism to + # reliably distinguish test encodings from other codecs will hopefully + # be defined for Python 3.5 + # + # See http://bugs.python.org/issue19619 + _is_text_encoding = True # Assume codecs are text encodings by default + + def __new__(cls, encode, decode, streamreader=None, streamwriter=None, + incrementalencoder=None, incrementaldecoder=None, name=None, + _is_text_encoding=None): + self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter)) + self.name = name + self.encode = encode + self.decode = decode + self.incrementalencoder = incrementalencoder + self.incrementaldecoder = incrementaldecoder + self.streamwriter = streamwriter + self.streamreader = streamreader + if _is_text_encoding is not None: + self._is_text_encoding = _is_text_encoding + return self + + def __repr__(self): + return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self)) + +class Codec: + + """ Defines the interface for stateless encoders/decoders. + + The .encode()/.decode() methods may use different error + handling schemes by providing the errors argument. These + string values are predefined: + + 'strict' - raise a ValueError error (or a subclass) + 'ignore' - ignore the character and continue with the next + 'replace' - replace with a suitable replacement character; + Python will use the official U+FFFD REPLACEMENT + CHARACTER for the builtin Unicode codecs on + decoding and '?' on encoding. + 'xmlcharrefreplace' - Replace with the appropriate XML + character reference (only for encoding). + 'backslashreplace' - Replace with backslashed escape sequences + (only for encoding). + + The set of allowed values can be extended via register_error. + + """ + def encode(self, input, errors='strict'): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling. + + The method may not store state in the Codec instance. Use + StreamWriter for codecs which have to keep state in order to + make encoding efficient. + + The encoder must be able to handle zero length input and + return an empty object of the output object type in this + situation. + + """ + raise NotImplementedError + + def decode(self, input, errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling. + + The method may not store state in the Codec instance. Use + StreamReader for codecs which have to keep state in order to + make decoding efficient. + + The decoder must be able to handle zero length input and + return an empty object of the output object type in this + situation. + + """ + raise NotImplementedError + +class IncrementalEncoder(object): + """ + An IncrementalEncoder encodes an input in multiple steps. The input can be + passed piece by piece to the encode() method. The IncrementalEncoder remembers + the state of the Encoding process between calls to encode(). + """ + def __init__(self, errors='strict'): + """ + Creates an IncrementalEncoder instance. + + The IncrementalEncoder may use different error handling schemes by + providing the errors keyword argument. See the module docstring + for a list of possible values. + """ + self.errors = errors + self.buffer = "" + + def encode(self, input, final=False): + """ + Encodes input and returns the resulting object. + """ + raise NotImplementedError + + def reset(self): + """ + Resets the encoder to the initial state. + """ + + def getstate(self): + """ + Return the current state of the encoder. + """ + return 0 + + def setstate(self, state): + """ + Set the current state of the encoder. state must have been + returned by getstate(). + """ + +class BufferedIncrementalEncoder(IncrementalEncoder): + """ + This subclass of IncrementalEncoder can be used as the baseclass for an + incremental encoder if the encoder must keep some of the output in a + buffer between calls to encode(). + """ + def __init__(self, errors='strict'): + IncrementalEncoder.__init__(self, errors) + self.buffer = "" # unencoded input that is kept between calls to encode() + + def _buffer_encode(self, input, errors, final): + # Overwrite this method in subclasses: It must encode input + # and return an (output, length consumed) tuple + raise NotImplementedError + + def encode(self, input, final=False): + # encode input (taking the buffer into account) + data = self.buffer + input + (result, consumed) = self._buffer_encode(data, self.errors, final) + # keep unencoded input until the next call + self.buffer = data[consumed:] + return result + + def reset(self): + IncrementalEncoder.reset(self) + self.buffer = "" + + def getstate(self): + return self.buffer or 0 + + def setstate(self, state): + self.buffer = state or "" + +class IncrementalDecoder(object): + """ + An IncrementalDecoder decodes an input in multiple steps. The input can be + passed piece by piece to the decode() method. The IncrementalDecoder + remembers the state of the decoding process between calls to decode(). + """ + def __init__(self, errors='strict'): + """ + Creates a IncrementalDecoder instance. + + The IncrementalDecoder may use different error handling schemes by + providing the errors keyword argument. See the module docstring + for a list of possible values. + """ + self.errors = errors + + def decode(self, input, final=False): + """ + Decodes input and returns the resulting object. + """ + raise NotImplementedError + + def reset(self): + """ + Resets the decoder to the initial state. + """ + + def getstate(self): + """ + Return the current state of the decoder. + + This must be a (buffered_input, additional_state_info) tuple. + buffered_input must be a bytes object containing bytes that + were passed to decode() that have not yet been converted. + additional_state_info must be a non-negative integer + representing the state of the decoder WITHOUT yet having + processed the contents of buffered_input. In the initial state + and after reset(), getstate() must return (b"", 0). + """ + return (b"", 0) + + def setstate(self, state): + """ + Set the current state of the decoder. + + state must have been returned by getstate(). The effect of + setstate((b"", 0)) must be equivalent to reset(). + """ + +class BufferedIncrementalDecoder(IncrementalDecoder): + """ + This subclass of IncrementalDecoder can be used as the baseclass for an + incremental decoder if the decoder must be able to handle incomplete byte + sequences. + """ + def __init__(self, errors='strict'): + IncrementalDecoder.__init__(self, errors) + self.buffer = "" # undecoded input that is kept between calls to decode() + + def _buffer_decode(self, input, errors, final): + # Overwrite this method in subclasses: It must decode input + # and return an (output, length consumed) tuple + raise NotImplementedError + + def decode(self, input, final=False): + # decode input (taking the buffer into account) + data = self.buffer + input + (result, consumed) = self._buffer_decode(data, self.errors, final) + # keep undecoded input until the next call + self.buffer = data[consumed:] + return result + + def reset(self): + IncrementalDecoder.reset(self) + self.buffer = "" + + def getstate(self): + # additional state info is always 0 + return (self.buffer, 0) + + def setstate(self, state): + # ignore additional state info + self.buffer = state[0] + +# +# The StreamWriter and StreamReader class provide generic working +# interfaces which can be used to implement new encoding submodules +# very easily. See encodings/utf_8.py for an example on how this is +# done. +# + +class StreamWriter(Codec): + + def __init__(self, stream, errors='strict'): + + """ Creates a StreamWriter instance. + + stream must be a file-like object open for writing + (binary) data. + + The StreamWriter may use different error handling + schemes by providing the errors keyword argument. These + parameters are predefined: + + 'strict' - raise a ValueError (or a subclass) + 'ignore' - ignore the character and continue with the next + 'replace'- replace with a suitable replacement character + 'xmlcharrefreplace' - Replace with the appropriate XML + character reference. + 'backslashreplace' - Replace with backslashed escape + sequences (only for encoding). + + The set of allowed parameter values can be extended via + register_error. + """ + self.stream = stream + self.errors = errors + + def write(self, object): + + """ Writes the object's contents encoded to self.stream. + """ + data, consumed = self.encode(object, self.errors) + self.stream.write(data) + + def writelines(self, list): + + """ Writes the concatenated list of strings to the stream + using .write(). + """ + self.write(''.join(list)) + + def reset(self): + + """ Flushes and resets the codec buffers used for keeping state. + + Calling this method should ensure that the data on the + output is put into a clean state, that allows appending + of new fresh data without having to rescan the whole + stream to recover state. + + """ + pass + + def seek(self, offset, whence=0): + self.stream.seek(offset, whence) + if whence == 0 and offset == 0: + self.reset() + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### + +class StreamReader(Codec): + + def __init__(self, stream, errors='strict'): + + """ Creates a StreamReader instance. + + stream must be a file-like object open for reading + (binary) data. + + The StreamReader may use different error handling + schemes by providing the errors keyword argument. These + parameters are predefined: + + 'strict' - raise a ValueError (or a subclass) + 'ignore' - ignore the character and continue with the next + 'replace'- replace with a suitable replacement character; + + The set of allowed parameter values can be extended via + register_error. + """ + self.stream = stream + self.errors = errors + self.bytebuffer = "" + # For str->str decoding this will stay a str + # For str->unicode decoding the first read will promote it to unicode + self.charbuffer = "" + self.linebuffer = None + + def decode(self, input, errors='strict'): + raise NotImplementedError + + def read(self, size=-1, chars=-1, firstline=False): + + """ Decodes data from the stream self.stream and returns the + resulting object. + + chars indicates the number of characters to read from the + stream. read() will never return more than chars + characters, but it might return less, if there are not enough + characters available. + + size indicates the approximate maximum number of bytes to + read from the stream for decoding purposes. The decoder + can modify this setting as appropriate. The default value + -1 indicates to read and decode as much as possible. size + is intended to prevent having to decode huge files in one + step. + + If firstline is true, and a UnicodeDecodeError happens + after the first line terminator in the input only the first line + will be returned, the rest of the input will be kept until the + next call to read(). + + The method should use a greedy read strategy meaning that + it should read as much data as is allowed within the + definition of the encoding and the given size, e.g. if + optional encoding endings or state markers are available + on the stream, these should be read too. + """ + # If we have lines cached, first merge them back into characters + if self.linebuffer: + self.charbuffer = "".join(self.linebuffer) + self.linebuffer = None + + # read until we get the required number of characters (if available) + while True: + # can the request be satisfied from the character buffer? + if chars >= 0: + if len(self.charbuffer) >= chars: + break + elif size >= 0: + if len(self.charbuffer) >= size: + break + # we need more data + if size < 0: + newdata = self.stream.read() + else: + newdata = self.stream.read(size) + # decode bytes (those remaining from the last call included) + data = self.bytebuffer + newdata + try: + newchars, decodedbytes = self.decode(data, self.errors) + except UnicodeDecodeError, exc: + if firstline: + newchars, decodedbytes = self.decode(data[:exc.start], self.errors) + lines = newchars.splitlines(True) + if len(lines)<=1: + raise + else: + raise + # keep undecoded bytes until the next call + self.bytebuffer = data[decodedbytes:] + # put new characters in the character buffer + self.charbuffer += newchars + # there was no data available + if not newdata: + break + if chars < 0: + # Return everything we've got + result = self.charbuffer + self.charbuffer = "" + else: + # Return the first chars characters + result = self.charbuffer[:chars] + self.charbuffer = self.charbuffer[chars:] + return result + + def readline(self, size=None, keepends=True): + + """ Read one line from the input stream and return the + decoded data. + + size, if given, is passed as size argument to the + read() method. + + """ + # If we have lines cached from an earlier read, return + # them unconditionally + if self.linebuffer: + line = self.linebuffer[0] + del self.linebuffer[0] + if len(self.linebuffer) == 1: + # revert to charbuffer mode; we might need more data + # next time + self.charbuffer = self.linebuffer[0] + self.linebuffer = None + if not keepends: + line = line.splitlines(False)[0] + return line + + readsize = size or 72 + line = "" + # If size is given, we call read() only once + while True: + data = self.read(readsize, firstline=True) + if data: + # If we're at a "\r" read one extra character (which might + # be a "\n") to get a proper line ending. If the stream is + # temporarily exhausted we return the wrong line ending. + if data.endswith("\r"): + data += self.read(size=1, chars=1) + + line += data + lines = line.splitlines(True) + if lines: + if len(lines) > 1: + # More than one line result; the first line is a full line + # to return + line = lines[0] + del lines[0] + if len(lines) > 1: + # cache the remaining lines + lines[-1] += self.charbuffer + self.linebuffer = lines + self.charbuffer = None + else: + # only one remaining line, put it back into charbuffer + self.charbuffer = lines[0] + self.charbuffer + if not keepends: + line = line.splitlines(False)[0] + break + line0withend = lines[0] + line0withoutend = lines[0].splitlines(False)[0] + if line0withend != line0withoutend: # We really have a line end + # Put the rest back together and keep it until the next call + self.charbuffer = "".join(lines[1:]) + self.charbuffer + if keepends: + line = line0withend + else: + line = line0withoutend + break + # we didn't get anything or this was our only try + if not data or size is not None: + if line and not keepends: + line = line.splitlines(False)[0] + break + if readsize<8000: + readsize *= 2 + return line + + def readlines(self, sizehint=None, keepends=True): + + """ Read all lines available on the input stream + and return them as list of lines. + + Line breaks are implemented using the codec's decoder + method and are included in the list entries. + + sizehint, if given, is ignored since there is no efficient + way to finding the true end-of-line. + + """ + data = self.read() + return data.splitlines(keepends) + + def reset(self): + + """ Resets the codec buffers used for keeping state. + + Note that no stream repositioning should take place. + This method is primarily intended to be able to recover + from decoding errors. + + """ + self.bytebuffer = "" + self.charbuffer = u"" + self.linebuffer = None + + def seek(self, offset, whence=0): + """ Set the input stream's current position. + + Resets the codec buffers used for keeping state. + """ + self.stream.seek(offset, whence) + self.reset() + + def next(self): + + """ Return the next decoded line from the input stream.""" + line = self.readline() + if line: + return line + raise StopIteration + + def __iter__(self): + return self + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### + +class StreamReaderWriter: + + """ StreamReaderWriter instances allow wrapping streams which + work in both read and write modes. + + The design is such that one can use the factory functions + returned by the codec.lookup() function to construct the + instance. + + """ + # Optional attributes set by the file wrappers below + encoding = 'unknown' + + def __init__(self, stream, Reader, Writer, errors='strict'): + + """ Creates a StreamReaderWriter instance. + + stream must be a Stream-like object. + + Reader, Writer must be factory functions or classes + providing the StreamReader, StreamWriter interface resp. + + Error handling is done in the same way as defined for the + StreamWriter/Readers. + + """ + self.stream = stream + self.reader = Reader(stream, errors) + self.writer = Writer(stream, errors) + self.errors = errors + + def read(self, size=-1): + + return self.reader.read(size) + + def readline(self, size=None): + + return self.reader.readline(size) + + def readlines(self, sizehint=None): + + return self.reader.readlines(sizehint) + + def next(self): + + """ Return the next decoded line from the input stream.""" + return self.reader.next() + + def __iter__(self): + return self + + def write(self, data): + + return self.writer.write(data) + + def writelines(self, list): + + return self.writer.writelines(list) + + def reset(self): + + self.reader.reset() + self.writer.reset() + + def seek(self, offset, whence=0): + self.stream.seek(offset, whence) + self.reader.reset() + if whence == 0 and offset == 0: + self.writer.reset() + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + # these are needed to make "with codecs.open(...)" work properly + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### + +class StreamRecoder: + + """ StreamRecoder instances provide a frontend - backend + view of encoding data. + + They use the complete set of APIs returned by the + codecs.lookup() function to implement their task. + + Data written to the stream is first decoded into an + intermediate format (which is dependent on the given codec + combination) and then written to the stream using an instance + of the provided Writer class. + + In the other direction, data is read from the stream using a + Reader instance and then return encoded data to the caller. + + """ + # Optional attributes set by the file wrappers below + data_encoding = 'unknown' + file_encoding = 'unknown' + + def __init__(self, stream, encode, decode, Reader, Writer, + errors='strict'): + + """ Creates a StreamRecoder instance which implements a two-way + conversion: encode and decode work on the frontend (the + input to .read() and output of .write()) while + Reader and Writer work on the backend (reading and + writing to the stream). + + You can use these objects to do transparent direct + recodings from e.g. latin-1 to utf-8 and back. + + stream must be a file-like object. + + encode, decode must adhere to the Codec interface, Reader, + Writer must be factory functions or classes providing the + StreamReader, StreamWriter interface resp. + + encode and decode are needed for the frontend translation, + Reader and Writer for the backend translation. Unicode is + used as intermediate encoding. + + Error handling is done in the same way as defined for the + StreamWriter/Readers. + + """ + self.stream = stream + self.encode = encode + self.decode = decode + self.reader = Reader(stream, errors) + self.writer = Writer(stream, errors) + self.errors = errors + + def read(self, size=-1): + + data = self.reader.read(size) + data, bytesencoded = self.encode(data, self.errors) + return data + + def readline(self, size=None): + + if size is None: + data = self.reader.readline() + else: + data = self.reader.readline(size) + data, bytesencoded = self.encode(data, self.errors) + return data + + def readlines(self, sizehint=None): + + data = self.reader.read() + data, bytesencoded = self.encode(data, self.errors) + return data.splitlines(1) + + def next(self): + + """ Return the next decoded line from the input stream.""" + data = self.reader.next() + data, bytesencoded = self.encode(data, self.errors) + return data + + def __iter__(self): + return self + + def write(self, data): + + data, bytesdecoded = self.decode(data, self.errors) + return self.writer.write(data) + + def writelines(self, list): + + data = ''.join(list) + data, bytesdecoded = self.decode(data, self.errors) + return self.writer.write(data) + + def reset(self): + + self.reader.reset() + self.writer.reset() + + def __getattr__(self, name, + getattr=getattr): + + """ Inherit all other methods from the underlying stream. + """ + return getattr(self.stream, name) + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.stream.close() + +### Shortcuts + +def open(filename, mode='rb', encoding=None, errors='strict', buffering=1): + + """ Open an encoded file using the given mode and return + a wrapped version providing transparent encoding/decoding. + + Note: The wrapped version will only accept the object format + defined by the codecs, i.e. Unicode objects for most builtin + codecs. Output is also codec dependent and will usually be + Unicode as well. + + Files are always opened in binary mode, even if no binary mode + was specified. This is done to avoid data loss due to encodings + using 8-bit values. The default file mode is 'rb' meaning to + open the file in binary read mode. + + encoding specifies the encoding which is to be used for the + file. + + errors may be given to define the error handling. It defaults + to 'strict' which causes ValueErrors to be raised in case an + encoding error occurs. + + buffering has the same meaning as for the builtin open() API. + It defaults to line buffered. + + The returned wrapped file object provides an extra attribute + .encoding which allows querying the used encoding. This + attribute is only available if an encoding was specified as + parameter. + + """ + if encoding is not None: + if 'U' in mode: + # No automatic conversion of '\n' is done on reading and writing + mode = mode.strip().replace('U', '') + if mode[:1] not in set('rwa'): + mode = 'r' + mode + if 'b' not in mode: + # Force opening of the file in binary mode + mode = mode + 'b' + file = __builtin__.open(filename, mode, buffering) + if encoding is None: + return file + info = lookup(encoding) + srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors) + # Add attributes to simplify introspection + srw.encoding = encoding + return srw + +def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'): + + """ Return a wrapped version of file which provides transparent + encoding translation. + + Strings written to the wrapped file are interpreted according + to the given data_encoding and then written to the original + file as string using file_encoding. The intermediate encoding + will usually be Unicode but depends on the specified codecs. + + Strings are read from the file using file_encoding and then + passed back to the caller as string using data_encoding. + + If file_encoding is not given, it defaults to data_encoding. + + errors may be given to define the error handling. It defaults + to 'strict' which causes ValueErrors to be raised in case an + encoding error occurs. + + The returned wrapped file object provides two extra attributes + .data_encoding and .file_encoding which reflect the given + parameters of the same name. The attributes can be used for + introspection by Python programs. + + """ + if file_encoding is None: + file_encoding = data_encoding + data_info = lookup(data_encoding) + file_info = lookup(file_encoding) + sr = StreamRecoder(file, data_info.encode, data_info.decode, + file_info.streamreader, file_info.streamwriter, errors) + # Add attributes to simplify introspection + sr.data_encoding = data_encoding + sr.file_encoding = file_encoding + return sr + +### Helpers for codec lookup + +def getencoder(encoding): + + """ Lookup up the codec for the given encoding and return + its encoder function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).encode + +def getdecoder(encoding): + + """ Lookup up the codec for the given encoding and return + its decoder function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).decode + +def getincrementalencoder(encoding): + + """ Lookup up the codec for the given encoding and return + its IncrementalEncoder class or factory function. + + Raises a LookupError in case the encoding cannot be found + or the codecs doesn't provide an incremental encoder. + + """ + encoder = lookup(encoding).incrementalencoder + if encoder is None: + raise LookupError(encoding) + return encoder + +def getincrementaldecoder(encoding): + + """ Lookup up the codec for the given encoding and return + its IncrementalDecoder class or factory function. + + Raises a LookupError in case the encoding cannot be found + or the codecs doesn't provide an incremental decoder. + + """ + decoder = lookup(encoding).incrementaldecoder + if decoder is None: + raise LookupError(encoding) + return decoder + +def getreader(encoding): + + """ Lookup up the codec for the given encoding and return + its StreamReader class or factory function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).streamreader + +def getwriter(encoding): + + """ Lookup up the codec for the given encoding and return + its StreamWriter class or factory function. + + Raises a LookupError in case the encoding cannot be found. + + """ + return lookup(encoding).streamwriter + +def iterencode(iterator, encoding, errors='strict', **kwargs): + """ + Encoding iterator. + + Encodes the input strings from the iterator using a IncrementalEncoder. + + errors and kwargs are passed through to the IncrementalEncoder + constructor. + """ + encoder = getincrementalencoder(encoding)(errors, **kwargs) + for input in iterator: + output = encoder.encode(input) + if output: + yield output + output = encoder.encode("", True) + if output: + yield output + +def iterdecode(iterator, encoding, errors='strict', **kwargs): + """ + Decoding iterator. + + Decodes the input strings from the iterator using a IncrementalDecoder. + + errors and kwargs are passed through to the IncrementalDecoder + constructor. + """ + decoder = getincrementaldecoder(encoding)(errors, **kwargs) + for input in iterator: + output = decoder.decode(input) + if output: + yield output + output = decoder.decode("", True) + if output: + yield output + +### Helpers for charmap-based codecs + +def make_identity_dict(rng): + + """ make_identity_dict(rng) -> dict + + Return a dictionary where elements of the rng sequence are + mapped to themselves. + + """ + res = {} + for i in rng: + res[i]=i + return res + +def make_encoding_map(decoding_map): + + """ Creates an encoding map from a decoding map. + + If a target mapping in the decoding map occurs multiple + times, then that target is mapped to None (undefined mapping), + causing an exception when encountered by the charmap codec + during translation. + + One example where this happens is cp875.py which decodes + multiple character to \\u001a. + + """ + m = {} + for k,v in decoding_map.items(): + if not v in m: + m[v] = k + else: + m[v] = None + return m + +### error handlers + +try: + strict_errors = lookup_error("strict") + ignore_errors = lookup_error("ignore") + replace_errors = lookup_error("replace") + xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") + backslashreplace_errors = lookup_error("backslashreplace") +except LookupError: + # In --disable-unicode builds, these error handler are missing + strict_errors = None + ignore_errors = None + replace_errors = None + xmlcharrefreplace_errors = None + backslashreplace_errors = None + +# Tell modulefinder that using codecs probably needs the encodings +# package +_false = 0 +if _false: + import encodings + +### Tests + +if __name__ == '__main__': + + # Make stdout translate Latin-1 output into UTF-8 output + sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8') + + # Have stdin translate Latin-1 input into UTF-8 input + sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/copy_reg.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,201 @@ +"""Helper to provide extensibility for pickle/cPickle. + +This is only useful to add pickle support for extension types defined in +C, not for instances of user-defined classes. +""" + +from types import ClassType as _ClassType + +__all__ = ["pickle", "constructor", + "add_extension", "remove_extension", "clear_extension_cache"] + +dispatch_table = {} + +def pickle(ob_type, pickle_function, constructor_ob=None): + if type(ob_type) is _ClassType: + raise TypeError("copy_reg is not intended for use with classes") + + if not hasattr(pickle_function, '__call__'): + raise TypeError("reduction functions must be callable") + dispatch_table[ob_type] = pickle_function + + # The constructor_ob function is a vestige of safe for unpickling. + # There is no reason for the caller to pass it anymore. + if constructor_ob is not None: + constructor(constructor_ob) + +def constructor(object): + if not hasattr(object, '__call__'): + raise TypeError("constructors must be callable") + +# Example: provide pickling support for complex numbers. + +try: + complex +except NameError: + pass +else: + + def pickle_complex(c): + return complex, (c.real, c.imag) + + pickle(complex, pickle_complex, complex) + +# Support for pickling new-style objects + +def _reconstructor(cls, base, state): + if base is object: + obj = object.__new__(cls) + else: + obj = base.__new__(cls, state) + if base.__init__ != object.__init__: + base.__init__(obj, state) + return obj + +_HEAPTYPE = 1<<9 + +# Python code for object.__reduce_ex__ for protocols 0 and 1 + +def _reduce_ex(self, proto): + assert proto < 2 + for base in self.__class__.__mro__: + if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE: + break + else: + base = object # not really reachable + if base is object: + state = None + else: + if base is self.__class__: + raise TypeError, "can't pickle %s objects" % base.__name__ + state = base(self) + args = (self.__class__, base, state) + try: + getstate = self.__getstate__ + except AttributeError: + if getattr(self, "__slots__", None): + raise TypeError("a class that defines __slots__ without " + "defining __getstate__ cannot be pickled") + try: + dict = self.__dict__ + except AttributeError: + dict = None + else: + dict = getstate() + if dict: + return _reconstructor, args, dict + else: + return _reconstructor, args + +# Helper for __reduce_ex__ protocol 2 + +def __newobj__(cls, *args): + return cls.__new__(cls, *args) + +def _slotnames(cls): + """Return a list of slot names for a given class. + + This needs to find slots defined by the class and its bases, so we + can't simply return the __slots__ attribute. We must walk down + the Method Resolution Order and concatenate the __slots__ of each + class found there. (This assumes classes don't modify their + __slots__ attribute to misrepresent their slots after the class is + defined.) + """ + + # Get the value from a cache in the class if possible + names = cls.__dict__.get("__slotnames__") + if names is not None: + return names + + # Not cached -- calculate the value + names = [] + if not hasattr(cls, "__slots__"): + # This class has no slots + pass + else: + # Slots found -- gather slot names from all base classes + for c in cls.__mro__: + if "__slots__" in c.__dict__: + slots = c.__dict__['__slots__'] + # if class has a single slot, it can be given as a string + if isinstance(slots, basestring): + slots = (slots,) + for name in slots: + # special descriptors + if name in ("__dict__", "__weakref__"): + continue + # mangled names + elif name.startswith('__') and not name.endswith('__'): + names.append('_%s%s' % (c.__name__, name)) + else: + names.append(name) + + # Cache the outcome in the class if at all possible + try: + cls.__slotnames__ = names + except: + pass # But don't die if we can't + + return names + +# A registry of extension codes. This is an ad-hoc compression +# mechanism. Whenever a global reference to <module>, <name> is about +# to be pickled, the (<module>, <name>) tuple is looked up here to see +# if it is a registered extension code for it. Extension codes are +# universal, so that the meaning of a pickle does not depend on +# context. (There are also some codes reserved for local use that +# don't have this restriction.) Codes are positive ints; 0 is +# reserved. + +_extension_registry = {} # key -> code +_inverted_registry = {} # code -> key +_extension_cache = {} # code -> object +# Don't ever rebind those names: cPickle grabs a reference to them when +# it's initialized, and won't see a rebinding. + +def add_extension(module, name, code): + """Register an extension code.""" + code = int(code) + if not 1 <= code <= 0x7fffffff: + raise ValueError, "code out of range" + key = (module, name) + if (_extension_registry.get(key) == code and + _inverted_registry.get(code) == key): + return # Redundant registrations are benign + if key in _extension_registry: + raise ValueError("key %s is already registered with code %s" % + (key, _extension_registry[key])) + if code in _inverted_registry: + raise ValueError("code %s is already in use for key %s" % + (code, _inverted_registry[code])) + _extension_registry[key] = code + _inverted_registry[code] = key + +def remove_extension(module, name, code): + """Unregister an extension code. For testing only.""" + key = (module, name) + if (_extension_registry.get(key) != code or + _inverted_registry.get(code) != key): + raise ValueError("key %s is not registered with code %s" % + (key, code)) + del _extension_registry[key] + del _inverted_registry[code] + if code in _extension_cache: + del _extension_cache[code] + +def clear_extension_cache(): + _extension_cache.clear() + +# Standard extension code assignments + +# Reserved ranges + +# First Last Count Purpose +# 1 127 127 Reserved for Python standard library +# 128 191 64 Reserved for Zope +# 192 239 48 Reserved for 3rd parties +# 240 255 16 Reserved for private use (will never be assigned) +# 256 Inf Inf Reserved for future assignment + +# Extension codes are assigned by the Python Software Foundation.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/distutils/__init__.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,101 @@ +import os +import sys +import warnings +import imp +import opcode # opcode is not a virtualenv module, so we can use it to find the stdlib + # Important! To work on pypy, this must be a module that resides in the + # lib-python/modified-x.y.z directory + +dirname = os.path.dirname + +distutils_path = os.path.join(os.path.dirname(opcode.__file__), 'distutils') +if os.path.normpath(distutils_path) == os.path.dirname(os.path.normpath(__file__)): + warnings.warn( + "The virtualenv distutils package at %s appears to be in the same location as the system distutils?") +else: + __path__.insert(0, distutils_path) + real_distutils = imp.load_module("_virtualenv_distutils", None, distutils_path, ('', '', imp.PKG_DIRECTORY)) + # Copy the relevant attributes + try: + __revision__ = real_distutils.__revision__ + except AttributeError: + pass + __version__ = real_distutils.__version__ + +from distutils import dist, sysconfig + +try: + basestring +except NameError: + basestring = str + +## patch build_ext (distutils doesn't know how to get the libs directory +## path on windows - it hardcodes the paths around the patched sys.prefix) + +if sys.platform == 'win32': + from distutils.command.build_ext import build_ext as old_build_ext + class build_ext(old_build_ext): + def finalize_options (self): + if self.library_dirs is None: + self.library_dirs = [] + elif isinstance(self.library_dirs, basestring): + self.library_dirs = self.library_dirs.split(os.pathsep) + + self.library_dirs.insert(0, os.path.join(sys.real_prefix, "Libs")) + old_build_ext.finalize_options(self) + + from distutils.command import build_ext as build_ext_module + build_ext_module.build_ext = build_ext + +## distutils.dist patches: + +old_find_config_files = dist.Distribution.find_config_files +def find_config_files(self): + found = old_find_config_files(self) + system_distutils = os.path.join(distutils_path, 'distutils.cfg') + #if os.path.exists(system_distutils): + # found.insert(0, system_distutils) + # What to call the per-user config file + if os.name == 'posix': + user_filename = ".pydistutils.cfg" + else: + user_filename = "pydistutils.cfg" + user_filename = os.path.join(sys.prefix, user_filename) + if os.path.isfile(user_filename): + for item in list(found): + if item.endswith('pydistutils.cfg'): + found.remove(item) + found.append(user_filename) + return found +dist.Distribution.find_config_files = find_config_files + +## distutils.sysconfig patches: + +old_get_python_inc = sysconfig.get_python_inc +def sysconfig_get_python_inc(plat_specific=0, prefix=None): + if prefix is None: + prefix = sys.real_prefix + return old_get_python_inc(plat_specific, prefix) +sysconfig_get_python_inc.__doc__ = old_get_python_inc.__doc__ +sysconfig.get_python_inc = sysconfig_get_python_inc + +old_get_python_lib = sysconfig.get_python_lib +def sysconfig_get_python_lib(plat_specific=0, standard_lib=0, prefix=None): + if standard_lib and prefix is None: + prefix = sys.real_prefix + return old_get_python_lib(plat_specific, standard_lib, prefix) +sysconfig_get_python_lib.__doc__ = old_get_python_lib.__doc__ +sysconfig.get_python_lib = sysconfig_get_python_lib + +old_get_config_vars = sysconfig.get_config_vars +def sysconfig_get_config_vars(*args): + real_vars = old_get_config_vars(*args) + if sys.platform == 'win32': + lib_dir = os.path.join(sys.real_prefix, "libs") + if isinstance(real_vars, dict) and 'LIBDIR' not in real_vars: + real_vars['LIBDIR'] = lib_dir # asked for all + elif isinstance(real_vars, list) and 'LIBDIR' in args: + real_vars = real_vars + [lib_dir] # asked for list + return real_vars +sysconfig_get_config_vars.__doc__ = old_get_config_vars.__doc__ +sysconfig.get_config_vars = sysconfig_get_config_vars
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/distutils/distutils.cfg Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,6 @@ +# This is a config file local to this virtualenv installation +# You may include options that will be used by all distutils commands, +# and by easy_install. For instance: +# +# [easy_install] +# find_links = http://mylocalsite
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/__init__.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,157 @@ +""" Standard "encodings" Package + + Standard Python encoding modules are stored in this package + directory. + + Codec modules must have names corresponding to normalized encoding + names as defined in the normalize_encoding() function below, e.g. + 'utf-8' must be implemented by the module 'utf_8.py'. + + Each codec module must export the following interface: + + * getregentry() -> codecs.CodecInfo object + The getregentry() API must a CodecInfo object with encoder, decoder, + incrementalencoder, incrementaldecoder, streamwriter and streamreader + atttributes which adhere to the Python Codec Interface Standard. + + In addition, a module may optionally also define the following + APIs which are then used by the package's codec search function: + + * getaliases() -> sequence of encoding name strings to use as aliases + + Alias names returned by getaliases() must be normalized encoding + names as defined by normalize_encoding(). + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +"""#" + +import codecs +from encodings import aliases +import __builtin__ + +_cache = {} +_unknown = '--unknown--' +_import_tail = ['*'] +_norm_encoding_map = (' . ' + '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ ' + ' abcdefghijklmnopqrstuvwxyz ' + ' ' + ' ' + ' ') +_aliases = aliases.aliases + +class CodecRegistryError(LookupError, SystemError): + pass + +def normalize_encoding(encoding): + + """ Normalize an encoding name. + + Normalization works as follows: all non-alphanumeric + characters except the dot used for Python package names are + collapsed and replaced with a single underscore, e.g. ' -;#' + becomes '_'. Leading and trailing underscores are removed. + + Note that encoding names should be ASCII only; if they do use + non-ASCII characters, these must be Latin-1 compatible. + + """ + # Make sure we have an 8-bit string, because .translate() works + # differently for Unicode strings. + if hasattr(__builtin__, "unicode") and isinstance(encoding, unicode): + # Note that .encode('latin-1') does *not* use the codec + # registry, so this call doesn't recurse. (See unicodeobject.c + # PyUnicode_AsEncodedString() for details) + encoding = encoding.encode('latin-1') + return '_'.join(encoding.translate(_norm_encoding_map).split()) + +def search_function(encoding): + + # Cache lookup + entry = _cache.get(encoding, _unknown) + if entry is not _unknown: + return entry + + # Import the module: + # + # First try to find an alias for the normalized encoding + # name and lookup the module using the aliased name, then try to + # lookup the module using the standard import scheme, i.e. first + # try in the encodings package, then at top-level. + # + norm_encoding = normalize_encoding(encoding) + aliased_encoding = _aliases.get(norm_encoding) or \ + _aliases.get(norm_encoding.replace('.', '_')) + if aliased_encoding is not None: + modnames = [aliased_encoding, + norm_encoding] + else: + modnames = [norm_encoding] + for modname in modnames: + if not modname or '.' in modname: + continue + try: + # Import is absolute to prevent the possibly malicious import of a + # module with side-effects that is not in the 'encodings' package. + mod = __import__('encodings.' + modname, fromlist=_import_tail, + level=0) + except ImportError: + pass + else: + break + else: + mod = None + + try: + getregentry = mod.getregentry + except AttributeError: + # Not a codec module + mod = None + + if mod is None: + # Cache misses + _cache[encoding] = None + return None + + # Now ask the module for the registry entry + entry = getregentry() + if not isinstance(entry, codecs.CodecInfo): + if not 4 <= len(entry) <= 7: + raise CodecRegistryError,\ + 'module "%s" (%s) failed to register' % \ + (mod.__name__, mod.__file__) + if not hasattr(entry[0], '__call__') or \ + not hasattr(entry[1], '__call__') or \ + (entry[2] is not None and not hasattr(entry[2], '__call__')) or \ + (entry[3] is not None and not hasattr(entry[3], '__call__')) or \ + (len(entry) > 4 and entry[4] is not None and not hasattr(entry[4], '__call__')) or \ + (len(entry) > 5 and entry[5] is not None and not hasattr(entry[5], '__call__')): + raise CodecRegistryError,\ + 'incompatible codecs in module "%s" (%s)' % \ + (mod.__name__, mod.__file__) + if len(entry)<7 or entry[6] is None: + entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],) + entry = codecs.CodecInfo(*entry) + + # Cache the codec registry entry + _cache[encoding] = entry + + # Register its aliases (without overwriting previously registered + # aliases) + try: + codecaliases = mod.getaliases() + except AttributeError: + pass + else: + for alias in codecaliases: + if alias not in _aliases: + _aliases[alias] = modname + + # Return the registry entry + return entry + +# Register the search_function in the Python codec registry +codecs.register(search_function)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/aliases.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,527 @@ +""" Encoding Aliases Support + + This module is used by the encodings package search function to + map encodings names to module names. + + Note that the search function normalizes the encoding names before + doing the lookup, so the mapping will have to map normalized + encoding names to module names. + + Contents: + + The following aliases dictionary contains mappings of all IANA + character set names for which the Python core library provides + codecs. In addition to these, a few Python specific codec + aliases have also been added. + +""" +aliases = { + + # Please keep this list sorted alphabetically by value ! + + # ascii codec + '646' : 'ascii', + 'ansi_x3.4_1968' : 'ascii', + 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name + 'ansi_x3.4_1986' : 'ascii', + 'cp367' : 'ascii', + 'csascii' : 'ascii', + 'ibm367' : 'ascii', + 'iso646_us' : 'ascii', + 'iso_646.irv_1991' : 'ascii', + 'iso_ir_6' : 'ascii', + 'us' : 'ascii', + 'us_ascii' : 'ascii', + + # base64_codec codec + 'base64' : 'base64_codec', + 'base_64' : 'base64_codec', + + # big5 codec + 'big5_tw' : 'big5', + 'csbig5' : 'big5', + + # big5hkscs codec + 'big5_hkscs' : 'big5hkscs', + 'hkscs' : 'big5hkscs', + + # bz2_codec codec + 'bz2' : 'bz2_codec', + + # cp037 codec + '037' : 'cp037', + 'csibm037' : 'cp037', + 'ebcdic_cp_ca' : 'cp037', + 'ebcdic_cp_nl' : 'cp037', + 'ebcdic_cp_us' : 'cp037', + 'ebcdic_cp_wt' : 'cp037', + 'ibm037' : 'cp037', + 'ibm039' : 'cp037', + + # cp1026 codec + '1026' : 'cp1026', + 'csibm1026' : 'cp1026', + 'ibm1026' : 'cp1026', + + # cp1140 codec + '1140' : 'cp1140', + 'ibm1140' : 'cp1140', + + # cp1250 codec + '1250' : 'cp1250', + 'windows_1250' : 'cp1250', + + # cp1251 codec + '1251' : 'cp1251', + 'windows_1251' : 'cp1251', + + # cp1252 codec + '1252' : 'cp1252', + 'windows_1252' : 'cp1252', + + # cp1253 codec + '1253' : 'cp1253', + 'windows_1253' : 'cp1253', + + # cp1254 codec + '1254' : 'cp1254', + 'windows_1254' : 'cp1254', + + # cp1255 codec + '1255' : 'cp1255', + 'windows_1255' : 'cp1255', + + # cp1256 codec + '1256' : 'cp1256', + 'windows_1256' : 'cp1256', + + # cp1257 codec + '1257' : 'cp1257', + 'windows_1257' : 'cp1257', + + # cp1258 codec + '1258' : 'cp1258', + 'windows_1258' : 'cp1258', + + # cp424 codec + '424' : 'cp424', + 'csibm424' : 'cp424', + 'ebcdic_cp_he' : 'cp424', + 'ibm424' : 'cp424', + + # cp437 codec + '437' : 'cp437', + 'cspc8codepage437' : 'cp437', + 'ibm437' : 'cp437', + + # cp500 codec + '500' : 'cp500', + 'csibm500' : 'cp500', + 'ebcdic_cp_be' : 'cp500', + 'ebcdic_cp_ch' : 'cp500', + 'ibm500' : 'cp500', + + # cp775 codec + '775' : 'cp775', + 'cspc775baltic' : 'cp775', + 'ibm775' : 'cp775', + + # cp850 codec + '850' : 'cp850', + 'cspc850multilingual' : 'cp850', + 'ibm850' : 'cp850', + + # cp852 codec + '852' : 'cp852', + 'cspcp852' : 'cp852', + 'ibm852' : 'cp852', + + # cp855 codec + '855' : 'cp855', + 'csibm855' : 'cp855', + 'ibm855' : 'cp855', + + # cp857 codec + '857' : 'cp857', + 'csibm857' : 'cp857', + 'ibm857' : 'cp857', + + # cp858 codec + '858' : 'cp858', + 'csibm858' : 'cp858', + 'ibm858' : 'cp858', + + # cp860 codec + '860' : 'cp860', + 'csibm860' : 'cp860', + 'ibm860' : 'cp860', + + # cp861 codec + '861' : 'cp861', + 'cp_is' : 'cp861', + 'csibm861' : 'cp861', + 'ibm861' : 'cp861', + + # cp862 codec + '862' : 'cp862', + 'cspc862latinhebrew' : 'cp862', + 'ibm862' : 'cp862', + + # cp863 codec + '863' : 'cp863', + 'csibm863' : 'cp863', + 'ibm863' : 'cp863', + + # cp864 codec + '864' : 'cp864', + 'csibm864' : 'cp864', + 'ibm864' : 'cp864', + + # cp865 codec + '865' : 'cp865', + 'csibm865' : 'cp865', + 'ibm865' : 'cp865', + + # cp866 codec + '866' : 'cp866', + 'csibm866' : 'cp866', + 'ibm866' : 'cp866', + + # cp869 codec + '869' : 'cp869', + 'cp_gr' : 'cp869', + 'csibm869' : 'cp869', + 'ibm869' : 'cp869', + + # cp932 codec + '932' : 'cp932', + 'ms932' : 'cp932', + 'mskanji' : 'cp932', + 'ms_kanji' : 'cp932', + + # cp949 codec + '949' : 'cp949', + 'ms949' : 'cp949', + 'uhc' : 'cp949', + + # cp950 codec + '950' : 'cp950', + 'ms950' : 'cp950', + + # euc_jis_2004 codec + 'jisx0213' : 'euc_jis_2004', + 'eucjis2004' : 'euc_jis_2004', + 'euc_jis2004' : 'euc_jis_2004', + + # euc_jisx0213 codec + 'eucjisx0213' : 'euc_jisx0213', + + # euc_jp codec + 'eucjp' : 'euc_jp', + 'ujis' : 'euc_jp', + 'u_jis' : 'euc_jp', + + # euc_kr codec + 'euckr' : 'euc_kr', + 'korean' : 'euc_kr', + 'ksc5601' : 'euc_kr', + 'ks_c_5601' : 'euc_kr', + 'ks_c_5601_1987' : 'euc_kr', + 'ksx1001' : 'euc_kr', + 'ks_x_1001' : 'euc_kr', + + # gb18030 codec + 'gb18030_2000' : 'gb18030', + + # gb2312 codec + 'chinese' : 'gb2312', + 'csiso58gb231280' : 'gb2312', + 'euc_cn' : 'gb2312', + 'euccn' : 'gb2312', + 'eucgb2312_cn' : 'gb2312', + 'gb2312_1980' : 'gb2312', + 'gb2312_80' : 'gb2312', + 'iso_ir_58' : 'gb2312', + + # gbk codec + '936' : 'gbk', + 'cp936' : 'gbk', + 'ms936' : 'gbk', + + # hex_codec codec + 'hex' : 'hex_codec', + + # hp_roman8 codec + 'roman8' : 'hp_roman8', + 'r8' : 'hp_roman8', + 'csHPRoman8' : 'hp_roman8', + + # hz codec + 'hzgb' : 'hz', + 'hz_gb' : 'hz', + 'hz_gb_2312' : 'hz', + + # iso2022_jp codec + 'csiso2022jp' : 'iso2022_jp', + 'iso2022jp' : 'iso2022_jp', + 'iso_2022_jp' : 'iso2022_jp', + + # iso2022_jp_1 codec + 'iso2022jp_1' : 'iso2022_jp_1', + 'iso_2022_jp_1' : 'iso2022_jp_1', + + # iso2022_jp_2 codec + 'iso2022jp_2' : 'iso2022_jp_2', + 'iso_2022_jp_2' : 'iso2022_jp_2', + + # iso2022_jp_2004 codec + 'iso_2022_jp_2004' : 'iso2022_jp_2004', + 'iso2022jp_2004' : 'iso2022_jp_2004', + + # iso2022_jp_3 codec + 'iso2022jp_3' : 'iso2022_jp_3', + 'iso_2022_jp_3' : 'iso2022_jp_3', + + # iso2022_jp_ext codec + 'iso2022jp_ext' : 'iso2022_jp_ext', + 'iso_2022_jp_ext' : 'iso2022_jp_ext', + + # iso2022_kr codec + 'csiso2022kr' : 'iso2022_kr', + 'iso2022kr' : 'iso2022_kr', + 'iso_2022_kr' : 'iso2022_kr', + + # iso8859_10 codec + 'csisolatin6' : 'iso8859_10', + 'iso_8859_10' : 'iso8859_10', + 'iso_8859_10_1992' : 'iso8859_10', + 'iso_ir_157' : 'iso8859_10', + 'l6' : 'iso8859_10', + 'latin6' : 'iso8859_10', + + # iso8859_11 codec + 'thai' : 'iso8859_11', + 'iso_8859_11' : 'iso8859_11', + 'iso_8859_11_2001' : 'iso8859_11', + + # iso8859_13 codec + 'iso_8859_13' : 'iso8859_13', + 'l7' : 'iso8859_13', + 'latin7' : 'iso8859_13', + + # iso8859_14 codec + 'iso_8859_14' : 'iso8859_14', + 'iso_8859_14_1998' : 'iso8859_14', + 'iso_celtic' : 'iso8859_14', + 'iso_ir_199' : 'iso8859_14', + 'l8' : 'iso8859_14', + 'latin8' : 'iso8859_14', + + # iso8859_15 codec + 'iso_8859_15' : 'iso8859_15', + 'l9' : 'iso8859_15', + 'latin9' : 'iso8859_15', + + # iso8859_16 codec + 'iso_8859_16' : 'iso8859_16', + 'iso_8859_16_2001' : 'iso8859_16', + 'iso_ir_226' : 'iso8859_16', + 'l10' : 'iso8859_16', + 'latin10' : 'iso8859_16', + + # iso8859_2 codec + 'csisolatin2' : 'iso8859_2', + 'iso_8859_2' : 'iso8859_2', + 'iso_8859_2_1987' : 'iso8859_2', + 'iso_ir_101' : 'iso8859_2', + 'l2' : 'iso8859_2', + 'latin2' : 'iso8859_2', + + # iso8859_3 codec + 'csisolatin3' : 'iso8859_3', + 'iso_8859_3' : 'iso8859_3', + 'iso_8859_3_1988' : 'iso8859_3', + 'iso_ir_109' : 'iso8859_3', + 'l3' : 'iso8859_3', + 'latin3' : 'iso8859_3', + + # iso8859_4 codec + 'csisolatin4' : 'iso8859_4', + 'iso_8859_4' : 'iso8859_4', + 'iso_8859_4_1988' : 'iso8859_4', + 'iso_ir_110' : 'iso8859_4', + 'l4' : 'iso8859_4', + 'latin4' : 'iso8859_4', + + # iso8859_5 codec + 'csisolatincyrillic' : 'iso8859_5', + 'cyrillic' : 'iso8859_5', + 'iso_8859_5' : 'iso8859_5', + 'iso_8859_5_1988' : 'iso8859_5', + 'iso_ir_144' : 'iso8859_5', + + # iso8859_6 codec + 'arabic' : 'iso8859_6', + 'asmo_708' : 'iso8859_6', + 'csisolatinarabic' : 'iso8859_6', + 'ecma_114' : 'iso8859_6', + 'iso_8859_6' : 'iso8859_6', + 'iso_8859_6_1987' : 'iso8859_6', + 'iso_ir_127' : 'iso8859_6', + + # iso8859_7 codec + 'csisolatingreek' : 'iso8859_7', + 'ecma_118' : 'iso8859_7', + 'elot_928' : 'iso8859_7', + 'greek' : 'iso8859_7', + 'greek8' : 'iso8859_7', + 'iso_8859_7' : 'iso8859_7', + 'iso_8859_7_1987' : 'iso8859_7', + 'iso_ir_126' : 'iso8859_7', + + # iso8859_8 codec + 'csisolatinhebrew' : 'iso8859_8', + 'hebrew' : 'iso8859_8', + 'iso_8859_8' : 'iso8859_8', + 'iso_8859_8_1988' : 'iso8859_8', + 'iso_ir_138' : 'iso8859_8', + + # iso8859_9 codec + 'csisolatin5' : 'iso8859_9', + 'iso_8859_9' : 'iso8859_9', + 'iso_8859_9_1989' : 'iso8859_9', + 'iso_ir_148' : 'iso8859_9', + 'l5' : 'iso8859_9', + 'latin5' : 'iso8859_9', + + # johab codec + 'cp1361' : 'johab', + 'ms1361' : 'johab', + + # koi8_r codec + 'cskoi8r' : 'koi8_r', + + # latin_1 codec + # + # Note that the latin_1 codec is implemented internally in C and a + # lot faster than the charmap codec iso8859_1 which uses the same + # encoding. This is why we discourage the use of the iso8859_1 + # codec and alias it to latin_1 instead. + # + '8859' : 'latin_1', + 'cp819' : 'latin_1', + 'csisolatin1' : 'latin_1', + 'ibm819' : 'latin_1', + 'iso8859' : 'latin_1', + 'iso8859_1' : 'latin_1', + 'iso_8859_1' : 'latin_1', + 'iso_8859_1_1987' : 'latin_1', + 'iso_ir_100' : 'latin_1', + 'l1' : 'latin_1', + 'latin' : 'latin_1', + 'latin1' : 'latin_1', + + # mac_cyrillic codec + 'maccyrillic' : 'mac_cyrillic', + + # mac_greek codec + 'macgreek' : 'mac_greek', + + # mac_iceland codec + 'maciceland' : 'mac_iceland', + + # mac_latin2 codec + 'maccentraleurope' : 'mac_latin2', + 'maclatin2' : 'mac_latin2', + + # mac_roman codec + 'macroman' : 'mac_roman', + + # mac_turkish codec + 'macturkish' : 'mac_turkish', + + # mbcs codec + 'dbcs' : 'mbcs', + + # ptcp154 codec + 'csptcp154' : 'ptcp154', + 'pt154' : 'ptcp154', + 'cp154' : 'ptcp154', + 'cyrillic_asian' : 'ptcp154', + + # quopri_codec codec + 'quopri' : 'quopri_codec', + 'quoted_printable' : 'quopri_codec', + 'quotedprintable' : 'quopri_codec', + + # rot_13 codec + 'rot13' : 'rot_13', + + # shift_jis codec + 'csshiftjis' : 'shift_jis', + 'shiftjis' : 'shift_jis', + 'sjis' : 'shift_jis', + 's_jis' : 'shift_jis', + + # shift_jis_2004 codec + 'shiftjis2004' : 'shift_jis_2004', + 'sjis_2004' : 'shift_jis_2004', + 's_jis_2004' : 'shift_jis_2004', + + # shift_jisx0213 codec + 'shiftjisx0213' : 'shift_jisx0213', + 'sjisx0213' : 'shift_jisx0213', + 's_jisx0213' : 'shift_jisx0213', + + # tactis codec + 'tis260' : 'tactis', + + # tis_620 codec + 'tis620' : 'tis_620', + 'tis_620_0' : 'tis_620', + 'tis_620_2529_0' : 'tis_620', + 'tis_620_2529_1' : 'tis_620', + 'iso_ir_166' : 'tis_620', + + # utf_16 codec + 'u16' : 'utf_16', + 'utf16' : 'utf_16', + + # utf_16_be codec + 'unicodebigunmarked' : 'utf_16_be', + 'utf_16be' : 'utf_16_be', + + # utf_16_le codec + 'unicodelittleunmarked' : 'utf_16_le', + 'utf_16le' : 'utf_16_le', + + # utf_32 codec + 'u32' : 'utf_32', + 'utf32' : 'utf_32', + + # utf_32_be codec + 'utf_32be' : 'utf_32_be', + + # utf_32_le codec + 'utf_32le' : 'utf_32_le', + + # utf_7 codec + 'u7' : 'utf_7', + 'utf7' : 'utf_7', + 'unicode_1_1_utf_7' : 'utf_7', + + # utf_8 codec + 'u8' : 'utf_8', + 'utf' : 'utf_8', + 'utf8' : 'utf_8', + 'utf8_ucs2' : 'utf_8', + 'utf8_ucs4' : 'utf_8', + + # uu_codec codec + 'uu' : 'uu_codec', + + # zlib_codec codec + 'zip' : 'zlib_codec', + 'zlib' : 'zlib_codec', + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/ascii.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,50 @@ +""" Python 'ascii' Codec + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + # Note: Binding these as C functions will result in the class not + # converting them to methods. This is intended. + encode = codecs.ascii_encode + decode = codecs.ascii_decode + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.ascii_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.ascii_decode(input, self.errors)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +class StreamConverter(StreamWriter,StreamReader): + + encode = codecs.ascii_decode + decode = codecs.ascii_encode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='ascii', + encode=Codec.encode, + decode=Codec.decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/base64_codec.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,80 @@ +""" Python 'base64_codec' Codec - base64 content transfer encoding + + Unlike most of the other codecs which target Unicode, this codec + will return Python string objects for both encode and decode. + + Written by Marc-Andre Lemburg (mal@lemburg.com). + +""" +import codecs, base64 + +### Codec APIs + +def base64_encode(input,errors='strict'): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = base64.encodestring(input) + return (output, len(input)) + +def base64_decode(input,errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = base64.decodestring(input) + return (output, len(input)) + +class Codec(codecs.Codec): + + def encode(self, input,errors='strict'): + return base64_encode(input,errors) + def decode(self, input,errors='strict'): + return base64_decode(input,errors) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + assert self.errors == 'strict' + return base64.encodestring(input) + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + assert self.errors == 'strict' + return base64.decodestring(input) + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='base64', + encode=base64_encode, + decode=base64_decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + _is_text_encoding=False, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/big5.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# big5.py: Python Unicode Codec for BIG5 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_tw, codecs +import _multibytecodec as mbc + +codec = _codecs_tw.getcodec('big5') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='big5', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/big5hkscs.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# big5hkscs.py: Python Unicode Codec for BIG5HKSCS +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_hk, codecs +import _multibytecodec as mbc + +codec = _codecs_hk.getcodec('big5hkscs') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='big5hkscs', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/bz2_codec.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,103 @@ +""" Python 'bz2_codec' Codec - bz2 compression encoding + + Unlike most of the other codecs which target Unicode, this codec + will return Python string objects for both encode and decode. + + Adapted by Raymond Hettinger from zlib_codec.py which was written + by Marc-Andre Lemburg (mal@lemburg.com). + +""" +import codecs +import bz2 # this codec needs the optional bz2 module ! + +### Codec APIs + +def bz2_encode(input,errors='strict'): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = bz2.compress(input) + return (output, len(input)) + +def bz2_decode(input,errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = bz2.decompress(input) + return (output, len(input)) + +class Codec(codecs.Codec): + + def encode(self, input, errors='strict'): + return bz2_encode(input, errors) + def decode(self, input, errors='strict'): + return bz2_decode(input, errors) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def __init__(self, errors='strict'): + assert errors == 'strict' + self.errors = errors + self.compressobj = bz2.BZ2Compressor() + + def encode(self, input, final=False): + if final: + c = self.compressobj.compress(input) + return c + self.compressobj.flush() + else: + return self.compressobj.compress(input) + + def reset(self): + self.compressobj = bz2.BZ2Compressor() + +class IncrementalDecoder(codecs.IncrementalDecoder): + def __init__(self, errors='strict'): + assert errors == 'strict' + self.errors = errors + self.decompressobj = bz2.BZ2Decompressor() + + def decode(self, input, final=False): + try: + return self.decompressobj.decompress(input) + except EOFError: + return '' + + def reset(self): + self.decompressobj = bz2.BZ2Decompressor() + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name="bz2", + encode=bz2_encode, + decode=bz2_decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + _is_text_encoding=False, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/charmap.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,69 @@ +""" Generic Python Character Mapping Codec. + + Use this codec directly rather than through the automatic + conversion mechanisms supplied by unicode() and .encode(). + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + # Note: Binding these as C functions will result in the class not + # converting them to methods. This is intended. + encode = codecs.charmap_encode + decode = codecs.charmap_decode + +class IncrementalEncoder(codecs.IncrementalEncoder): + def __init__(self, errors='strict', mapping=None): + codecs.IncrementalEncoder.__init__(self, errors) + self.mapping = mapping + + def encode(self, input, final=False): + return codecs.charmap_encode(input, self.errors, self.mapping)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def __init__(self, errors='strict', mapping=None): + codecs.IncrementalDecoder.__init__(self, errors) + self.mapping = mapping + + def decode(self, input, final=False): + return codecs.charmap_decode(input, self.errors, self.mapping)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + + def __init__(self,stream,errors='strict',mapping=None): + codecs.StreamWriter.__init__(self,stream,errors) + self.mapping = mapping + + def encode(self,input,errors='strict'): + return Codec.encode(input,errors,self.mapping) + +class StreamReader(Codec,codecs.StreamReader): + + def __init__(self,stream,errors='strict',mapping=None): + codecs.StreamReader.__init__(self,stream,errors) + self.mapping = mapping + + def decode(self,input,errors='strict'): + return Codec.decode(input,errors,self.mapping) + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='charmap', + encode=Codec.encode, + decode=Codec.decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp037.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp037 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp037', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> CONTROL + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> CONTROL + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> CONTROL + u'\x8d' # 0x09 -> CONTROL + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> CONTROL + u'\x85' # 0x15 -> CONTROL + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> CONTROL + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> CONTROL + u'\x81' # 0x21 -> CONTROL + u'\x82' # 0x22 -> CONTROL + u'\x83' # 0x23 -> CONTROL + u'\x84' # 0x24 -> CONTROL + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> CONTROL + u'\x89' # 0x29 -> CONTROL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> CONTROL + u'\x91' # 0x31 -> CONTROL + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> CONTROL + u'\x94' # 0x34 -> CONTROL + u'\x95' # 0x35 -> CONTROL + u'\x96' # 0x36 -> CONTROL + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> CONTROL + u'\x99' # 0x39 -> CONTROL + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\xa0' # 0x41 -> NO-BREAK SPACE + u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE + u'\xa2' # 0x4A -> CENT SIGN + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'|' # 0x4F -> VERTICAL LINE + u'&' # 0x50 -> AMPERSAND + u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE + u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE + u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'!' # 0x5A -> EXCLAMATION MARK + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'\xac' # 0x5F -> NOT SIGN + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE + u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE + u'`' # 0x79 -> GRAVE ACCENT + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK + u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) + u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) + u'\xb1' # 0x8F -> PLUS-MINUS SIGN + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) + u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) + u'\xae' # 0xAF -> REGISTERED SIGN + u'^' # 0xB0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0xBA -> LEFT SQUARE BRACKET + u']' # 0xBB -> RIGHT SQUARE BRACKET + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1006.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1006 generated from 'MAPPINGS/VENDORS/MISC/CP1006.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1006', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u06f0' # 0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO + u'\u06f1' # 0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE + u'\u06f2' # 0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO + u'\u06f3' # 0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE + u'\u06f4' # 0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR + u'\u06f5' # 0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE + u'\u06f6' # 0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX + u'\u06f7' # 0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN + u'\u06f8' # 0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT + u'\u06f9' # 0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE + u'\u060c' # 0xAB -> ARABIC COMMA + u'\u061b' # 0xAC -> ARABIC SEMICOLON + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u061f' # 0xAE -> ARABIC QUESTION MARK + u'\ufe81' # 0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufe8d' # 0xB0 -> ARABIC LETTER ALEF ISOLATED FORM + u'\ufe8e' # 0xB1 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8e' # 0xB2 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8f' # 0xB3 -> ARABIC LETTER BEH ISOLATED FORM + u'\ufe91' # 0xB4 -> ARABIC LETTER BEH INITIAL FORM + u'\ufb56' # 0xB5 -> ARABIC LETTER PEH ISOLATED FORM + u'\ufb58' # 0xB6 -> ARABIC LETTER PEH INITIAL FORM + u'\ufe93' # 0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM + u'\ufe95' # 0xB8 -> ARABIC LETTER TEH ISOLATED FORM + u'\ufe97' # 0xB9 -> ARABIC LETTER TEH INITIAL FORM + u'\ufb66' # 0xBA -> ARABIC LETTER TTEH ISOLATED FORM + u'\ufb68' # 0xBB -> ARABIC LETTER TTEH INITIAL FORM + u'\ufe99' # 0xBC -> ARABIC LETTER THEH ISOLATED FORM + u'\ufe9b' # 0xBD -> ARABIC LETTER THEH INITIAL FORM + u'\ufe9d' # 0xBE -> ARABIC LETTER JEEM ISOLATED FORM + u'\ufe9f' # 0xBF -> ARABIC LETTER JEEM INITIAL FORM + u'\ufb7a' # 0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM + u'\ufb7c' # 0xC1 -> ARABIC LETTER TCHEH INITIAL FORM + u'\ufea1' # 0xC2 -> ARABIC LETTER HAH ISOLATED FORM + u'\ufea3' # 0xC3 -> ARABIC LETTER HAH INITIAL FORM + u'\ufea5' # 0xC4 -> ARABIC LETTER KHAH ISOLATED FORM + u'\ufea7' # 0xC5 -> ARABIC LETTER KHAH INITIAL FORM + u'\ufea9' # 0xC6 -> ARABIC LETTER DAL ISOLATED FORM + u'\ufb84' # 0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN + u'\ufeab' # 0xC8 -> ARABIC LETTER THAL ISOLATED FORM + u'\ufead' # 0xC9 -> ARABIC LETTER REH ISOLATED FORM + u'\ufb8c' # 0xCA -> ARABIC LETTER RREH ISOLATED FORM + u'\ufeaf' # 0xCB -> ARABIC LETTER ZAIN ISOLATED FORM + u'\ufb8a' # 0xCC -> ARABIC LETTER JEH ISOLATED FORM + u'\ufeb1' # 0xCD -> ARABIC LETTER SEEN ISOLATED FORM + u'\ufeb3' # 0xCE -> ARABIC LETTER SEEN INITIAL FORM + u'\ufeb5' # 0xCF -> ARABIC LETTER SHEEN ISOLATED FORM + u'\ufeb7' # 0xD0 -> ARABIC LETTER SHEEN INITIAL FORM + u'\ufeb9' # 0xD1 -> ARABIC LETTER SAD ISOLATED FORM + u'\ufebb' # 0xD2 -> ARABIC LETTER SAD INITIAL FORM + u'\ufebd' # 0xD3 -> ARABIC LETTER DAD ISOLATED FORM + u'\ufebf' # 0xD4 -> ARABIC LETTER DAD INITIAL FORM + u'\ufec1' # 0xD5 -> ARABIC LETTER TAH ISOLATED FORM + u'\ufec5' # 0xD6 -> ARABIC LETTER ZAH ISOLATED FORM + u'\ufec9' # 0xD7 -> ARABIC LETTER AIN ISOLATED FORM + u'\ufeca' # 0xD8 -> ARABIC LETTER AIN FINAL FORM + u'\ufecb' # 0xD9 -> ARABIC LETTER AIN INITIAL FORM + u'\ufecc' # 0xDA -> ARABIC LETTER AIN MEDIAL FORM + u'\ufecd' # 0xDB -> ARABIC LETTER GHAIN ISOLATED FORM + u'\ufece' # 0xDC -> ARABIC LETTER GHAIN FINAL FORM + u'\ufecf' # 0xDD -> ARABIC LETTER GHAIN INITIAL FORM + u'\ufed0' # 0xDE -> ARABIC LETTER GHAIN MEDIAL FORM + u'\ufed1' # 0xDF -> ARABIC LETTER FEH ISOLATED FORM + u'\ufed3' # 0xE0 -> ARABIC LETTER FEH INITIAL FORM + u'\ufed5' # 0xE1 -> ARABIC LETTER QAF ISOLATED FORM + u'\ufed7' # 0xE2 -> ARABIC LETTER QAF INITIAL FORM + u'\ufed9' # 0xE3 -> ARABIC LETTER KAF ISOLATED FORM + u'\ufedb' # 0xE4 -> ARABIC LETTER KAF INITIAL FORM + u'\ufb92' # 0xE5 -> ARABIC LETTER GAF ISOLATED FORM + u'\ufb94' # 0xE6 -> ARABIC LETTER GAF INITIAL FORM + u'\ufedd' # 0xE7 -> ARABIC LETTER LAM ISOLATED FORM + u'\ufedf' # 0xE8 -> ARABIC LETTER LAM INITIAL FORM + u'\ufee0' # 0xE9 -> ARABIC LETTER LAM MEDIAL FORM + u'\ufee1' # 0xEA -> ARABIC LETTER MEEM ISOLATED FORM + u'\ufee3' # 0xEB -> ARABIC LETTER MEEM INITIAL FORM + u'\ufb9e' # 0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM + u'\ufee5' # 0xED -> ARABIC LETTER NOON ISOLATED FORM + u'\ufee7' # 0xEE -> ARABIC LETTER NOON INITIAL FORM + u'\ufe85' # 0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + u'\ufeed' # 0xF0 -> ARABIC LETTER WAW ISOLATED FORM + u'\ufba6' # 0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM + u'\ufba8' # 0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM + u'\ufba9' # 0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM + u'\ufbaa' # 0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM + u'\ufe80' # 0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM + u'\ufe89' # 0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM + u'\ufe8a' # 0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM + u'\ufe8b' # 0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + u'\ufef1' # 0xF9 -> ARABIC LETTER YEH ISOLATED FORM + u'\ufef2' # 0xFA -> ARABIC LETTER YEH FINAL FORM + u'\ufef3' # 0xFB -> ARABIC LETTER YEH INITIAL FORM + u'\ufbb0' # 0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM + u'\ufbae' # 0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM + u'\ufe7c' # 0xFE -> ARABIC SHADDA ISOLATED FORM + u'\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1026.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1026 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1026', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> CONTROL + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> CONTROL + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> CONTROL + u'\x8d' # 0x09 -> CONTROL + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> CONTROL + u'\x85' # 0x15 -> CONTROL + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> CONTROL + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> CONTROL + u'\x81' # 0x21 -> CONTROL + u'\x82' # 0x22 -> CONTROL + u'\x83' # 0x23 -> CONTROL + u'\x84' # 0x24 -> CONTROL + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> CONTROL + u'\x89' # 0x29 -> CONTROL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> CONTROL + u'\x91' # 0x31 -> CONTROL + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> CONTROL + u'\x94' # 0x34 -> CONTROL + u'\x95' # 0x35 -> CONTROL + u'\x96' # 0x36 -> CONTROL + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> CONTROL + u'\x99' # 0x39 -> CONTROL + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\xa0' # 0x41 -> NO-BREAK SPACE + u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE + u'{' # 0x48 -> LEFT CURLY BRACKET + u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE + u'\xc7' # 0x4A -> LATIN CAPITAL LETTER C WITH CEDILLA + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'!' # 0x4F -> EXCLAMATION MARK + u'&' # 0x50 -> AMPERSAND + u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE + u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE + u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'\u011e' # 0x5A -> LATIN CAPITAL LETTER G WITH BREVE + u'\u0130' # 0x5B -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'^' # 0x5F -> CIRCUMFLEX ACCENT + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'[' # 0x68 -> LEFT SQUARE BRACKET + u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE + u'\u015f' # 0x6A -> LATIN SMALL LETTER S WITH CEDILLA + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE + u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE + u'\u0131' # 0x79 -> LATIN SMALL LETTER DOTLESS I + u':' # 0x7A -> COLON + u'\xd6' # 0x7B -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u015e' # 0x7C -> LATIN CAPITAL LETTER S WITH CEDILLA + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'\xdc' # 0x7F -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'}' # 0x8C -> RIGHT CURLY BRACKET + u'`' # 0x8D -> GRAVE ACCENT + u'\xa6' # 0x8E -> BROKEN BAR + u'\xb1' # 0x8F -> PLUS-MINUS SIGN + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'\xf6' # 0xA1 -> LATIN SMALL LETTER O WITH DIAERESIS + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u']' # 0xAC -> RIGHT SQUARE BRACKET + u'$' # 0xAD -> DOLLAR SIGN + u'@' # 0xAE -> COMMERCIAL AT + u'\xae' # 0xAF -> REGISTERED SIGN + u'\xa2' # 0xB0 -> CENT SIGN + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'\xac' # 0xBA -> NOT SIGN + u'|' # 0xBB -> VERTICAL LINE + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'\xe7' # 0xC0 -> LATIN SMALL LETTER C WITH CEDILLA + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'~' # 0xCC -> TILDE + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'\u011f' # 0xD0 -> LATIN SMALL LETTER G WITH BREVE + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\\' # 0xDC -> REVERSE SOLIDUS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xfc' # 0xE0 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'#' # 0xEC -> NUMBER SIGN + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'"' # 0xFC -> QUOTATION MARK + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1140.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1140 generated from 'python-mappings/CP1140.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1140', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> CONTROL + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> CONTROL + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> CONTROL + u'\x8d' # 0x09 -> CONTROL + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> CONTROL + u'\x85' # 0x15 -> CONTROL + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> CONTROL + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> CONTROL + u'\x81' # 0x21 -> CONTROL + u'\x82' # 0x22 -> CONTROL + u'\x83' # 0x23 -> CONTROL + u'\x84' # 0x24 -> CONTROL + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> CONTROL + u'\x89' # 0x29 -> CONTROL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> CONTROL + u'\x91' # 0x31 -> CONTROL + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> CONTROL + u'\x94' # 0x34 -> CONTROL + u'\x95' # 0x35 -> CONTROL + u'\x96' # 0x36 -> CONTROL + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> CONTROL + u'\x99' # 0x39 -> CONTROL + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\xa0' # 0x41 -> NO-BREAK SPACE + u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE + u'\xa2' # 0x4A -> CENT SIGN + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'|' # 0x4F -> VERTICAL LINE + u'&' # 0x50 -> AMPERSAND + u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE + u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE + u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'!' # 0x5A -> EXCLAMATION MARK + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'\xac' # 0x5F -> NOT SIGN + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE + u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE + u'`' # 0x79 -> GRAVE ACCENT + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK + u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) + u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) + u'\xb1' # 0x8F -> PLUS-MINUS SIGN + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\u20ac' # 0x9F -> EURO SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) + u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) + u'\xae' # 0xAF -> REGISTERED SIGN + u'^' # 0xB0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0xBA -> LEFT SQUARE BRACKET + u']' # 0xBB -> RIGHT SQUARE BRACKET + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1250.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1250 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1250', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\ufffe' # 0x83 -> UNDEFINED + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\ufffe' # 0x88 -> UNDEFINED + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u015a' # 0x8C -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u0164' # 0x8D -> LATIN CAPITAL LETTER T WITH CARON + u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON + u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\ufffe' # 0x98 -> UNDEFINED + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u015b' # 0x9C -> LATIN SMALL LETTER S WITH ACUTE + u'\u0165' # 0x9D -> LATIN SMALL LETTER T WITH CARON + u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON + u'\u017a' # 0x9F -> LATIN SMALL LETTER Z WITH ACUTE + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u02c7' # 0xA1 -> CARON + u'\u02d8' # 0xA2 -> BREVE + u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u0104' # 0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u02db' # 0xB2 -> OGONEK + u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\u0105' # 0xB9 -> LATIN SMALL LETTER A WITH OGONEK + u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u013d' # 0xBC -> LATIN CAPITAL LETTER L WITH CARON + u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT + u'\u013e' # 0xBE -> LATIN SMALL LETTER L WITH CARON + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON + u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE + u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON + u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA + u'\u02d9' # 0xFF -> DOT ABOVE +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1251.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1251 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1251', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u0402' # 0x80 -> CYRILLIC CAPITAL LETTER DJE + u'\u0403' # 0x81 -> CYRILLIC CAPITAL LETTER GJE + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0453' # 0x83 -> CYRILLIC SMALL LETTER GJE + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u20ac' # 0x88 -> EURO SIGN + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\u0409' # 0x8A -> CYRILLIC CAPITAL LETTER LJE + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u040a' # 0x8C -> CYRILLIC CAPITAL LETTER NJE + u'\u040c' # 0x8D -> CYRILLIC CAPITAL LETTER KJE + u'\u040b' # 0x8E -> CYRILLIC CAPITAL LETTER TSHE + u'\u040f' # 0x8F -> CYRILLIC CAPITAL LETTER DZHE + u'\u0452' # 0x90 -> CYRILLIC SMALL LETTER DJE + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\ufffe' # 0x98 -> UNDEFINED + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\u0459' # 0x9A -> CYRILLIC SMALL LETTER LJE + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u045a' # 0x9C -> CYRILLIC SMALL LETTER NJE + u'\u045c' # 0x9D -> CYRILLIC SMALL LETTER KJE + u'\u045b' # 0x9E -> CYRILLIC SMALL LETTER TSHE + u'\u045f' # 0x9F -> CYRILLIC SMALL LETTER DZHE + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u040e' # 0xA1 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0xA2 -> CYRILLIC SMALL LETTER SHORT U + u'\u0408' # 0xA3 -> CYRILLIC CAPITAL LETTER JE + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u0490' # 0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0404' # 0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u0407' # 0xAF -> CYRILLIC CAPITAL LETTER YI + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0491' # 0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO + u'\u2116' # 0xB9 -> NUMERO SIGN + u'\u0454' # 0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0458' # 0xBC -> CYRILLIC SMALL LETTER JE + u'\u0405' # 0xBD -> CYRILLIC CAPITAL LETTER DZE + u'\u0455' # 0xBE -> CYRILLIC SMALL LETTER DZE + u'\u0457' # 0xBF -> CYRILLIC SMALL LETTER YI + u'\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E + u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1252.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1252 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1252', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x8D -> UNDEFINED + u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\u02dc' # 0x98 -> SMALL TILDE + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x9D -> UNDEFINED + u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON + u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1253.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1253 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1253', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\ufffe' # 0x88 -> UNDEFINED + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\ufffe' # 0x98 -> UNDEFINED + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x9C -> UNDEFINED + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0385' # 0xA1 -> GREEK DIALYTIKA TONOS + u'\u0386' # 0xA2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\ufffe' # 0xAA -> UNDEFINED + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u2015' # 0xAF -> HORIZONTAL BAR + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u0384' # 0xB4 -> GREEK TONOS + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU + u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU + u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI + u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO + u'\ufffe' # 0xD2 -> UNDEFINED + u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU + u'\u03bd' # 0xED -> GREEK SMALL LETTER NU + u'\u03be' # 0xEE -> GREEK SMALL LETTER XI + u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI + u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO + u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU + u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI + u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI + u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA + u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\ufffe' # 0xFF -> UNDEFINED +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1254.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1254 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1254', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\u02dc' # 0x98 -> SMALL TILDE + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1255.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1255 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1255', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\u02dc' # 0x98 -> SMALL TILDE + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x9C -> UNDEFINED + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\u20aa' # 0xA4 -> NEW SHEQEL SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xd7' # 0xAA -> MULTIPLICATION SIGN + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xf7' # 0xBA -> DIVISION SIGN + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\u05b0' # 0xC0 -> HEBREW POINT SHEVA + u'\u05b1' # 0xC1 -> HEBREW POINT HATAF SEGOL + u'\u05b2' # 0xC2 -> HEBREW POINT HATAF PATAH + u'\u05b3' # 0xC3 -> HEBREW POINT HATAF QAMATS + u'\u05b4' # 0xC4 -> HEBREW POINT HIRIQ + u'\u05b5' # 0xC5 -> HEBREW POINT TSERE + u'\u05b6' # 0xC6 -> HEBREW POINT SEGOL + u'\u05b7' # 0xC7 -> HEBREW POINT PATAH + u'\u05b8' # 0xC8 -> HEBREW POINT QAMATS + u'\u05b9' # 0xC9 -> HEBREW POINT HOLAM + u'\ufffe' # 0xCA -> UNDEFINED + u'\u05bb' # 0xCB -> HEBREW POINT QUBUTS + u'\u05bc' # 0xCC -> HEBREW POINT DAGESH OR MAPIQ + u'\u05bd' # 0xCD -> HEBREW POINT METEG + u'\u05be' # 0xCE -> HEBREW PUNCTUATION MAQAF + u'\u05bf' # 0xCF -> HEBREW POINT RAFE + u'\u05c0' # 0xD0 -> HEBREW PUNCTUATION PASEQ + u'\u05c1' # 0xD1 -> HEBREW POINT SHIN DOT + u'\u05c2' # 0xD2 -> HEBREW POINT SIN DOT + u'\u05c3' # 0xD3 -> HEBREW PUNCTUATION SOF PASUQ + u'\u05f0' # 0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV + u'\u05f1' # 0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD + u'\u05f2' # 0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD + u'\u05f3' # 0xD7 -> HEBREW PUNCTUATION GERESH + u'\u05f4' # 0xD8 -> HEBREW PUNCTUATION GERSHAYIM + u'\ufffe' # 0xD9 -> UNDEFINED + u'\ufffe' # 0xDA -> UNDEFINED + u'\ufffe' # 0xDB -> UNDEFINED + u'\ufffe' # 0xDC -> UNDEFINED + u'\ufffe' # 0xDD -> UNDEFINED + u'\ufffe' # 0xDE -> UNDEFINED + u'\ufffe' # 0xDF -> UNDEFINED + u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF + u'\u05d1' # 0xE1 -> HEBREW LETTER BET + u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL + u'\u05d3' # 0xE3 -> HEBREW LETTER DALET + u'\u05d4' # 0xE4 -> HEBREW LETTER HE + u'\u05d5' # 0xE5 -> HEBREW LETTER VAV + u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0xE7 -> HEBREW LETTER HET + u'\u05d8' # 0xE8 -> HEBREW LETTER TET + u'\u05d9' # 0xE9 -> HEBREW LETTER YOD + u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF + u'\u05db' # 0xEB -> HEBREW LETTER KAF + u'\u05dc' # 0xEC -> HEBREW LETTER LAMED + u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM + u'\u05de' # 0xEE -> HEBREW LETTER MEM + u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0xF0 -> HEBREW LETTER NUN + u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN + u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0xF4 -> HEBREW LETTER PE + u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI + u'\u05e7' # 0xF7 -> HEBREW LETTER QOF + u'\u05e8' # 0xF8 -> HEBREW LETTER RESH + u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN + u'\u05ea' # 0xFA -> HEBREW LETTER TAV + u'\ufffe' # 0xFB -> UNDEFINED + u'\ufffe' # 0xFC -> UNDEFINED + u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK + u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK + u'\ufffe' # 0xFF -> UNDEFINED +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1256.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1256 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1256', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\u067e' # 0x81 -> ARABIC LETTER PEH + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\u0679' # 0x8A -> ARABIC LETTER TTEH + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\u0686' # 0x8D -> ARABIC LETTER TCHEH + u'\u0698' # 0x8E -> ARABIC LETTER JEH + u'\u0688' # 0x8F -> ARABIC LETTER DDAL + u'\u06af' # 0x90 -> ARABIC LETTER GAF + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\u06a9' # 0x98 -> ARABIC LETTER KEHEH + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\u0691' # 0x9A -> ARABIC LETTER RREH + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\u200c' # 0x9D -> ZERO WIDTH NON-JOINER + u'\u200d' # 0x9E -> ZERO WIDTH JOINER + u'\u06ba' # 0x9F -> ARABIC LETTER NOON GHUNNA + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u060c' # 0xA1 -> ARABIC COMMA + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u06be' # 0xAA -> ARABIC LETTER HEH DOACHASHMEE + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\u061b' # 0xBA -> ARABIC SEMICOLON + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\u061f' # 0xBF -> ARABIC QUESTION MARK + u'\u06c1' # 0xC0 -> ARABIC LETTER HEH GOAL + u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA + u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0xC7 -> ARABIC LETTER ALEF + u'\u0628' # 0xC8 -> ARABIC LETTER BEH + u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0xCA -> ARABIC LETTER TEH + u'\u062b' # 0xCB -> ARABIC LETTER THEH + u'\u062c' # 0xCC -> ARABIC LETTER JEEM + u'\u062d' # 0xCD -> ARABIC LETTER HAH + u'\u062e' # 0xCE -> ARABIC LETTER KHAH + u'\u062f' # 0xCF -> ARABIC LETTER DAL + u'\u0630' # 0xD0 -> ARABIC LETTER THAL + u'\u0631' # 0xD1 -> ARABIC LETTER REH + u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN + u'\u0633' # 0xD3 -> ARABIC LETTER SEEN + u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN + u'\u0635' # 0xD5 -> ARABIC LETTER SAD + u'\u0636' # 0xD6 -> ARABIC LETTER DAD + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0637' # 0xD8 -> ARABIC LETTER TAH + u'\u0638' # 0xD9 -> ARABIC LETTER ZAH + u'\u0639' # 0xDA -> ARABIC LETTER AIN + u'\u063a' # 0xDB -> ARABIC LETTER GHAIN + u'\u0640' # 0xDC -> ARABIC TATWEEL + u'\u0641' # 0xDD -> ARABIC LETTER FEH + u'\u0642' # 0xDE -> ARABIC LETTER QAF + u'\u0643' # 0xDF -> ARABIC LETTER KAF + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\u0644' # 0xE1 -> ARABIC LETTER LAM + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0645' # 0xE3 -> ARABIC LETTER MEEM + u'\u0646' # 0xE4 -> ARABIC LETTER NOON + u'\u0647' # 0xE5 -> ARABIC LETTER HEH + u'\u0648' # 0xE6 -> ARABIC LETTER WAW + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0649' # 0xEC -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0xED -> ARABIC LETTER YEH + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u064b' # 0xF0 -> ARABIC FATHATAN + u'\u064c' # 0xF1 -> ARABIC DAMMATAN + u'\u064d' # 0xF2 -> ARABIC KASRATAN + u'\u064e' # 0xF3 -> ARABIC FATHA + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u064f' # 0xF5 -> ARABIC DAMMA + u'\u0650' # 0xF6 -> ARABIC KASRA + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0651' # 0xF8 -> ARABIC SHADDA + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\u0652' # 0xFA -> ARABIC SUKUN + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK + u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK + u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1257.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1257 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1257', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\ufffe' # 0x83 -> UNDEFINED + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\ufffe' # 0x88 -> UNDEFINED + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\xa8' # 0x8D -> DIAERESIS + u'\u02c7' # 0x8E -> CARON + u'\xb8' # 0x8F -> CEDILLA + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\ufffe' # 0x98 -> UNDEFINED + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufffe' # 0x9C -> UNDEFINED + u'\xaf' # 0x9D -> MACRON + u'\u02db' # 0x9E -> OGONEK + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\ufffe' # 0xA1 -> UNDEFINED + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\ufffe' # 0xA5 -> UNDEFINED + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xe6' # 0xBF -> LATIN SMALL LETTER AE + u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON + u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON + u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE + u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA + u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON + u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK + u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE + u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE + u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON + u'\u02d9' # 0xFF -> DOT ABOVE +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp1258.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp1258 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp1258', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK + u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK + u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\u2020' # 0x86 -> DAGGER + u'\u2021' # 0x87 -> DOUBLE DAGGER + u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u2030' # 0x89 -> PER MILLE SIGN + u'\ufffe' # 0x8A -> UNDEFINED + u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\u02dc' # 0x98 -> SMALL TILDE + u'\u2122' # 0x99 -> TRADE MARK SIGN + u'\ufffe' # 0x9A -> UNDEFINED + u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0300' # 0xCC -> COMBINING GRAVE ACCENT + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\u0309' # 0xD2 -> COMBINING HOOK ABOVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u01a0' # 0xD5 -> LATIN CAPITAL LETTER O WITH HORN + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u01af' # 0xDD -> LATIN CAPITAL LETTER U WITH HORN + u'\u0303' # 0xDE -> COMBINING TILDE + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0301' # 0xEC -> COMBINING ACUTE ACCENT + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\u0323' # 0xF2 -> COMBINING DOT BELOW + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u01a1' # 0xF5 -> LATIN SMALL LETTER O WITH HORN + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u01b0' # 0xFD -> LATIN SMALL LETTER U WITH HORN + u'\u20ab' # 0xFE -> DONG SIGN + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp424.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp424 generated from 'MAPPINGS/VENDORS/MISC/CP424.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp424', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> SELECT + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> REQUIRED NEW LINE + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> GRAPHIC ESCAPE + u'\x8d' # 0x09 -> SUPERSCRIPT + u'\x8e' # 0x0A -> REPEAT + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> RESTORE/ENABLE PRESENTATION + u'\x85' # 0x15 -> NEW LINE + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> PROGRAM OPERATOR COMMUNICATION + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> UNIT BACK SPACE + u'\x8f' # 0x1B -> CUSTOMER USE ONE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> DIGIT SELECT + u'\x81' # 0x21 -> START OF SIGNIFICANCE + u'\x82' # 0x22 -> FIELD SEPARATOR + u'\x83' # 0x23 -> WORD UNDERSCORE + u'\x84' # 0x24 -> BYPASS OR INHIBIT PRESENTATION + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> SET ATTRIBUTE + u'\x89' # 0x29 -> START FIELD EXTENDED + u'\x8a' # 0x2A -> SET MODE OR SWITCH + u'\x8b' # 0x2B -> CONTROL SEQUENCE PREFIX + u'\x8c' # 0x2C -> MODIFY FIELD ATTRIBUTE + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> <reserved> + u'\x91' # 0x31 -> <reserved> + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> INDEX RETURN + u'\x94' # 0x34 -> PRESENTATION POSITION + u'\x95' # 0x35 -> TRANSPARENT + u'\x96' # 0x36 -> NUMERIC BACKSPACE + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> SUBSCRIPT + u'\x99' # 0x39 -> INDENT TABULATION + u'\x9a' # 0x3A -> REVERSE FORM FEED + u'\x9b' # 0x3B -> CUSTOMER USE THREE + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> <reserved> + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\u05d0' # 0x41 -> HEBREW LETTER ALEF + u'\u05d1' # 0x42 -> HEBREW LETTER BET + u'\u05d2' # 0x43 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x44 -> HEBREW LETTER DALET + u'\u05d4' # 0x45 -> HEBREW LETTER HE + u'\u05d5' # 0x46 -> HEBREW LETTER VAV + u'\u05d6' # 0x47 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x48 -> HEBREW LETTER HET + u'\u05d8' # 0x49 -> HEBREW LETTER TET + u'\xa2' # 0x4A -> CENT SIGN + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'|' # 0x4F -> VERTICAL LINE + u'&' # 0x50 -> AMPERSAND + u'\u05d9' # 0x51 -> HEBREW LETTER YOD + u'\u05da' # 0x52 -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x53 -> HEBREW LETTER KAF + u'\u05dc' # 0x54 -> HEBREW LETTER LAMED + u'\u05dd' # 0x55 -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x56 -> HEBREW LETTER MEM + u'\u05df' # 0x57 -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x58 -> HEBREW LETTER NUN + u'\u05e1' # 0x59 -> HEBREW LETTER SAMEKH + u'!' # 0x5A -> EXCLAMATION MARK + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'\xac' # 0x5F -> NOT SIGN + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\u05e2' # 0x62 -> HEBREW LETTER AYIN + u'\u05e3' # 0x63 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x64 -> HEBREW LETTER PE + u'\u05e5' # 0x65 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x66 -> HEBREW LETTER TSADI + u'\u05e7' # 0x67 -> HEBREW LETTER QOF + u'\u05e8' # 0x68 -> HEBREW LETTER RESH + u'\u05e9' # 0x69 -> HEBREW LETTER SHIN + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\ufffe' # 0x70 -> UNDEFINED + u'\u05ea' # 0x71 -> HEBREW LETTER TAV + u'\ufffe' # 0x72 -> UNDEFINED + u'\ufffe' # 0x73 -> UNDEFINED + u'\xa0' # 0x74 -> NO-BREAK SPACE + u'\ufffe' # 0x75 -> UNDEFINED + u'\ufffe' # 0x76 -> UNDEFINED + u'\ufffe' # 0x77 -> UNDEFINED + u'\u2017' # 0x78 -> DOUBLE LOW LINE + u'`' # 0x79 -> GRAVE ACCENT + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK + u'\ufffe' # 0x80 -> UNDEFINED + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\xb1' # 0x8F -> PLUS-MINUS SIGN + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\ufffe' # 0x9A -> UNDEFINED + u'\ufffe' # 0x9B -> UNDEFINED + u'\ufffe' # 0x9C -> UNDEFINED + u'\xb8' # 0x9D -> CEDILLA + u'\ufffe' # 0x9E -> UNDEFINED + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\ufffe' # 0xAA -> UNDEFINED + u'\ufffe' # 0xAB -> UNDEFINED + u'\ufffe' # 0xAC -> UNDEFINED + u'\ufffe' # 0xAD -> UNDEFINED + u'\ufffe' # 0xAE -> UNDEFINED + u'\xae' # 0xAF -> REGISTERED SIGN + u'^' # 0xB0 -> CIRCUMFLEX ACCENT + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'[' # 0xBA -> LEFT SQUARE BRACKET + u']' # 0xBB -> RIGHT SQUARE BRACKET + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\ufffe' # 0xCB -> UNDEFINED + u'\ufffe' # 0xCC -> UNDEFINED + u'\ufffe' # 0xCD -> UNDEFINED + u'\ufffe' # 0xCE -> UNDEFINED + u'\ufffe' # 0xCF -> UNDEFINED + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\ufffe' # 0xDB -> UNDEFINED + u'\ufffe' # 0xDC -> UNDEFINED + u'\ufffe' # 0xDD -> UNDEFINED + u'\ufffe' # 0xDE -> UNDEFINED + u'\ufffe' # 0xDF -> UNDEFINED + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\ufffe' # 0xEB -> UNDEFINED + u'\ufffe' # 0xEC -> UNDEFINED + u'\ufffe' # 0xED -> UNDEFINED + u'\ufffe' # 0xEE -> UNDEFINED + u'\ufffe' # 0xEF -> UNDEFINED + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\ufffe' # 0xFB -> UNDEFINED + u'\ufffe' # 0xFC -> UNDEFINED + u'\ufffe' # 0xFD -> UNDEFINED + u'\ufffe' # 0xFE -> UNDEFINED + u'\x9f' # 0xFF -> EIGHT ONES +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp437.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec cp437 generated from 'VENDORS/MICSFT/PC/CP437.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp437', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00a5, # YEN SIGN + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xa5' # 0x009d -> YEN SIGN + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a5: 0x009d, # YEN SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp500.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp500 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp500', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> CONTROL + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> CONTROL + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> CONTROL + u'\x8d' # 0x09 -> CONTROL + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> CONTROL + u'\x85' # 0x15 -> CONTROL + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> CONTROL + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> CONTROL + u'\x81' # 0x21 -> CONTROL + u'\x82' # 0x22 -> CONTROL + u'\x83' # 0x23 -> CONTROL + u'\x84' # 0x24 -> CONTROL + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> CONTROL + u'\x89' # 0x29 -> CONTROL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> CONTROL + u'\x91' # 0x31 -> CONTROL + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> CONTROL + u'\x94' # 0x34 -> CONTROL + u'\x95' # 0x35 -> CONTROL + u'\x96' # 0x36 -> CONTROL + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> CONTROL + u'\x99' # 0x39 -> CONTROL + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\xa0' # 0x41 -> NO-BREAK SPACE + u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE + u'[' # 0x4A -> LEFT SQUARE BRACKET + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'!' # 0x4F -> EXCLAMATION MARK + u'&' # 0x50 -> AMPERSAND + u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE + u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE + u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) + u']' # 0x5A -> RIGHT SQUARE BRACKET + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'^' # 0x5F -> CIRCUMFLEX ACCENT + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xa6' # 0x6A -> BROKEN BAR + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE + u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE + u'`' # 0x79 -> GRAVE ACCENT + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK + u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) + u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) + u'\xb1' # 0x8F -> PLUS-MINUS SIGN + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR + u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE + u'\xb8' # 0x9D -> CEDILLA + u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE + u'\xa4' # 0x9F -> CURRENCY SIGN + u'\xb5' # 0xA0 -> MICRO SIGN + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK + u'\xbf' # 0xAB -> INVERTED QUESTION MARK + u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) + u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) + u'\xae' # 0xAF -> REGISTERED SIGN + u'\xa2' # 0xB0 -> CENT SIGN + u'\xa3' # 0xB1 -> POUND SIGN + u'\xa5' # 0xB2 -> YEN SIGN + u'\xb7' # 0xB3 -> MIDDLE DOT + u'\xa9' # 0xB4 -> COPYRIGHT SIGN + u'\xa7' # 0xB5 -> SECTION SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS + u'\xac' # 0xBA -> NOT SIGN + u'|' # 0xBB -> VERTICAL LINE + u'\xaf' # 0xBC -> MACRON + u'\xa8' # 0xBD -> DIAERESIS + u'\xb4' # 0xBE -> ACUTE ACCENT + u'\xd7' # 0xBF -> MULTIPLICATION SIGN + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE + u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb9' # 0xDA -> SUPERSCRIPT ONE + u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE + u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\xf7' # 0xE1 -> DIVISION SIGN + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE + u'\x9f' # 0xFF -> CONTROL +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp720.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,309 @@ +"""Python Character Mapping Codec cp720 generated on Windows: +Vista 6.0.6002 SP2 Multiprocessor Free with the command: + python Tools/unicode/genwincodec.py 720 +"""#" + + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp720', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\x80' + u'\x81' + u'\xe9' # 0x82 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x83 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\x84' + u'\xe0' # 0x85 -> LATIN SMALL LETTER A WITH GRAVE + u'\x86' + u'\xe7' # 0x87 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x88 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x89 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x8A -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x8B -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x8C -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\x8d' + u'\x8e' + u'\x8f' + u'\x90' + u'\u0651' # 0x91 -> ARABIC SHADDA + u'\u0652' # 0x92 -> ARABIC SUKUN + u'\xf4' # 0x93 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xa4' # 0x94 -> CURRENCY SIGN + u'\u0640' # 0x95 -> ARABIC TATWEEL + u'\xfb' # 0x96 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x97 -> LATIN SMALL LETTER U WITH GRAVE + u'\u0621' # 0x98 -> ARABIC LETTER HAMZA + u'\u0622' # 0x99 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0x9A -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0x9B -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\xa3' # 0x9C -> POUND SIGN + u'\u0625' # 0x9D -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0x9E -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0x9F -> ARABIC LETTER ALEF + u'\u0628' # 0xA0 -> ARABIC LETTER BEH + u'\u0629' # 0xA1 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0xA2 -> ARABIC LETTER TEH + u'\u062b' # 0xA3 -> ARABIC LETTER THEH + u'\u062c' # 0xA4 -> ARABIC LETTER JEEM + u'\u062d' # 0xA5 -> ARABIC LETTER HAH + u'\u062e' # 0xA6 -> ARABIC LETTER KHAH + u'\u062f' # 0xA7 -> ARABIC LETTER DAL + u'\u0630' # 0xA8 -> ARABIC LETTER THAL + u'\u0631' # 0xA9 -> ARABIC LETTER REH + u'\u0632' # 0xAA -> ARABIC LETTER ZAIN + u'\u0633' # 0xAB -> ARABIC LETTER SEEN + u'\u0634' # 0xAC -> ARABIC LETTER SHEEN + u'\u0635' # 0xAD -> ARABIC LETTER SAD + u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0xB0 -> LIGHT SHADE + u'\u2592' # 0xB1 -> MEDIUM SHADE + u'\u2593' # 0xB2 -> DARK SHADE + u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0xB5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0xB6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0xB8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0xBD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0xBE -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0xC6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0xC7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0xCF -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0xD0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0xD1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0xD2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0xD3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0xD4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0xD5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0xD6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0xD7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0xD8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0xDB -> FULL BLOCK + u'\u2584' # 0xDC -> LOWER HALF BLOCK + u'\u258c' # 0xDD -> LEFT HALF BLOCK + u'\u2590' # 0xDE -> RIGHT HALF BLOCK + u'\u2580' # 0xDF -> UPPER HALF BLOCK + u'\u0636' # 0xE0 -> ARABIC LETTER DAD + u'\u0637' # 0xE1 -> ARABIC LETTER TAH + u'\u0638' # 0xE2 -> ARABIC LETTER ZAH + u'\u0639' # 0xE3 -> ARABIC LETTER AIN + u'\u063a' # 0xE4 -> ARABIC LETTER GHAIN + u'\u0641' # 0xE5 -> ARABIC LETTER FEH + u'\xb5' # 0xE6 -> MICRO SIGN + u'\u0642' # 0xE7 -> ARABIC LETTER QAF + u'\u0643' # 0xE8 -> ARABIC LETTER KAF + u'\u0644' # 0xE9 -> ARABIC LETTER LAM + u'\u0645' # 0xEA -> ARABIC LETTER MEEM + u'\u0646' # 0xEB -> ARABIC LETTER NOON + u'\u0647' # 0xEC -> ARABIC LETTER HEH + u'\u0648' # 0xED -> ARABIC LETTER WAW + u'\u0649' # 0xEE -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0xEF -> ARABIC LETTER YEH + u'\u2261' # 0xF0 -> IDENTICAL TO + u'\u064b' # 0xF1 -> ARABIC FATHATAN + u'\u064c' # 0xF2 -> ARABIC DAMMATAN + u'\u064d' # 0xF3 -> ARABIC KASRATAN + u'\u064e' # 0xF4 -> ARABIC FATHA + u'\u064f' # 0xF5 -> ARABIC DAMMA + u'\u0650' # 0xF6 -> ARABIC KASRA + u'\u2248' # 0xF7 -> ALMOST EQUAL TO + u'\xb0' # 0xF8 -> DEGREE SIGN + u'\u2219' # 0xF9 -> BULLET OPERATOR + u'\xb7' # 0xFA -> MIDDLE DOT + u'\u221a' # 0xFB -> SQUARE ROOT + u'\u207f' # 0xFC -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0xFD -> SUPERSCRIPT TWO + u'\u25a0' # 0xFE -> BLACK SQUARE + u'\xa0' # 0xFF -> NO-BREAK SPACE +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp737.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec cp737 generated from 'VENDORS/MICSFT/PC/CP737.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp737', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA + 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA + 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA + 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x008b: 0x039c, # GREEK CAPITAL LETTER MU + 0x008c: 0x039d, # GREEK CAPITAL LETTER NU + 0x008d: 0x039e, # GREEK CAPITAL LETTER XI + 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI + 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x0099: 0x03b2, # GREEK SMALL LETTER BETA + 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA + 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA + 0x009e: 0x03b7, # GREEK SMALL LETTER ETA + 0x009f: 0x03b8, # GREEK SMALL LETTER THETA + 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00a3: 0x03bc, # GREEK SMALL LETTER MU + 0x00a4: 0x03bd, # GREEK SMALL LETTER NU + 0x00a5: 0x03be, # GREEK SMALL LETTER XI + 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00a7: 0x03c0, # GREEK SMALL LETTER PI + 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO + 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU + 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI + 0x00af: 0x03c8, # GREEK SMALL LETTER PSI + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0391' # 0x0080 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x0081 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x0082 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x0083 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x0084 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x0085 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x0086 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0x0087 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x0088 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0x0089 -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x008a -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x008b -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x008c -> GREEK CAPITAL LETTER NU + u'\u039e' # 0x008d -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x008e -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0x008f -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x0090 -> GREEK CAPITAL LETTER RHO + u'\u03a3' # 0x0091 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x0092 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x0093 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x0094 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x0095 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x0096 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x0097 -> GREEK CAPITAL LETTER OMEGA + u'\u03b1' # 0x0098 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x0099 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x009a -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0x009b -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x009c -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0x009d -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x009e -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x009f -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00a0 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00a1 -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00a2 -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00a3 -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00a4 -> GREEK SMALL LETTER NU + u'\u03be' # 0x00a5 -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00a6 -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00a7 -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00a8 -> GREEK SMALL LETTER RHO + u'\u03c3' # 0x00a9 -> GREEK SMALL LETTER SIGMA + u'\u03c2' # 0x00aa -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c4' # 0x00ab -> GREEK SMALL LETTER TAU + u'\u03c5' # 0x00ac -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00ad -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00ae -> GREEK SMALL LETTER CHI + u'\u03c8' # 0x00af -> GREEK SMALL LETTER PSI + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03c9' # 0x00e0 -> GREEK SMALL LETTER OMEGA + u'\u03ac' # 0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03ca' # 0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03af' # 0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03cb' # 0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03ce' # 0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u0386' # 0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\u0388' # 0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u038c' # 0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\u038e' # 0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u03aa' # 0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00f7: 0x00f6, # DIVISION SIGN + 0x0386: 0x00ea, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x00eb, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x00ec, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x00ed, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x00ee, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x00ef, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x00f0, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0391: 0x0080, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x0081, # GREEK CAPITAL LETTER BETA + 0x0393: 0x0082, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x0083, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x0084, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x0085, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x0086, # GREEK CAPITAL LETTER ETA + 0x0398: 0x0087, # GREEK CAPITAL LETTER THETA + 0x0399: 0x0088, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x0089, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x008a, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x008b, # GREEK CAPITAL LETTER MU + 0x039d: 0x008c, # GREEK CAPITAL LETTER NU + 0x039e: 0x008d, # GREEK CAPITAL LETTER XI + 0x039f: 0x008e, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x008f, # GREEK CAPITAL LETTER PI + 0x03a1: 0x0090, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x0091, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x0092, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x0093, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x0094, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x0095, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x0096, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x0097, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x00f4, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x00f5, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x00e1, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x00e2, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x00e3, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x00e5, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b1: 0x0098, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x0099, # GREEK SMALL LETTER BETA + 0x03b3: 0x009a, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x009b, # GREEK SMALL LETTER DELTA + 0x03b5: 0x009c, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x009d, # GREEK SMALL LETTER ZETA + 0x03b7: 0x009e, # GREEK SMALL LETTER ETA + 0x03b8: 0x009f, # GREEK SMALL LETTER THETA + 0x03b9: 0x00a0, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00a1, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00a2, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00a3, # GREEK SMALL LETTER MU + 0x03bd: 0x00a4, # GREEK SMALL LETTER NU + 0x03be: 0x00a5, # GREEK SMALL LETTER XI + 0x03bf: 0x00a6, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00a7, # GREEK SMALL LETTER PI + 0x03c1: 0x00a8, # GREEK SMALL LETTER RHO + 0x03c2: 0x00aa, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00a9, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00ab, # GREEK SMALL LETTER TAU + 0x03c5: 0x00ac, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00ad, # GREEK SMALL LETTER PHI + 0x03c7: 0x00ae, # GREEK SMALL LETTER CHI + 0x03c8: 0x00af, # GREEK SMALL LETTER PSI + 0x03c9: 0x00e0, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00e4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00e8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00e6, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00e7, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00e9, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp775.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,697 @@ +""" Python Character Mapping Codec cp775 generated from 'VENDORS/MICSFT/PC/CP775.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp775', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0096: 0x00a2, # CENT SIGN + 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x00a4, # CURRENCY SIGN + 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00a7: 0x00a6, # BROKEN BAR + 0x00a8: 0x00a9, # COPYRIGHT SIGN + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0106' # 0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0101' # 0x0083 -> LATIN SMALL LETTER A WITH MACRON + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0123' # 0x0085 -> LATIN SMALL LETTER G WITH CEDILLA + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0107' # 0x0087 -> LATIN SMALL LETTER C WITH ACUTE + u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE + u'\u0113' # 0x0089 -> LATIN SMALL LETTER E WITH MACRON + u'\u0156' # 0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\u0157' # 0x008b -> LATIN SMALL LETTER R WITH CEDILLA + u'\u012b' # 0x008c -> LATIN SMALL LETTER I WITH MACRON + u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\u014d' # 0x0093 -> LATIN SMALL LETTER O WITH MACRON + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u0122' # 0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\xa2' # 0x0096 -> CENT SIGN + u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\xa4' # 0x009f -> CURRENCY SIGN + u'\u0100' # 0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u012a' # 0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\u017b' # 0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017c' # 0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017a' # 0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE + u'\u201d' # 0x00a6 -> RIGHT DOUBLE QUOTATION MARK + u'\xa6' # 0x00a7 -> BROKEN BAR + u'\xa9' # 0x00a8 -> COPYRIGHT SIGN + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\u0141' # 0x00ad -> LATIN CAPITAL LETTER L WITH STROKE + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u0104' # 0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u010c' # 0x00b6 -> LATIN CAPITAL LETTER C WITH CARON + u'\u0118' # 0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0116' # 0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u012e' # 0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0160' # 0x00be -> LATIN CAPITAL LETTER S WITH CARON + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u0172' # 0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u016a' # 0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u017d' # 0x00cf -> LATIN CAPITAL LETTER Z WITH CARON + u'\u0105' # 0x00d0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u010d' # 0x00d1 -> LATIN SMALL LETTER C WITH CARON + u'\u0119' # 0x00d2 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0117' # 0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u012f' # 0x00d4 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0161' # 0x00d5 -> LATIN SMALL LETTER S WITH CARON + u'\u0173' # 0x00d6 -> LATIN SMALL LETTER U WITH OGONEK + u'\u016b' # 0x00d7 -> LATIN SMALL LETTER U WITH MACRON + u'\u017e' # 0x00d8 -> LATIN SMALL LETTER Z WITH CARON + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'\u014c' # 0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u0144' # 0x00e7 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0136' # 0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u0137' # 0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA + u'\u013b' # 0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u013c' # 0x00eb -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0146' # 0x00ec -> LATIN SMALL LETTER N WITH CEDILLA + u'\u0112' # 0x00ed -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0145' # 0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u2019' # 0x00ef -> RIGHT SINGLE QUOTATION MARK + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u201c' # 0x00f2 -> LEFT DOUBLE QUOTATION MARK + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u201e' # 0x00f7 -> DOUBLE LOW-9 QUOTATION MARK + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a2: 0x0096, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x009f, # CURRENCY SIGN + 0x00a6: 0x00a7, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a9: 0x00a8, # COPYRIGHT SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0100: 0x00a0, # LATIN CAPITAL LETTER A WITH MACRON + 0x0101: 0x0083, # LATIN SMALL LETTER A WITH MACRON + 0x0104: 0x00b5, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00d0, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x0080, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x0087, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00b6, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x00d1, # LATIN SMALL LETTER C WITH CARON + 0x0112: 0x00ed, # LATIN CAPITAL LETTER E WITH MACRON + 0x0113: 0x0089, # LATIN SMALL LETTER E WITH MACRON + 0x0116: 0x00b8, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0117: 0x00d3, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0118: 0x00b7, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00d2, # LATIN SMALL LETTER E WITH OGONEK + 0x0122: 0x0095, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x0123: 0x0085, # LATIN SMALL LETTER G WITH CEDILLA + 0x012a: 0x00a1, # LATIN CAPITAL LETTER I WITH MACRON + 0x012b: 0x008c, # LATIN SMALL LETTER I WITH MACRON + 0x012e: 0x00bd, # LATIN CAPITAL LETTER I WITH OGONEK + 0x012f: 0x00d4, # LATIN SMALL LETTER I WITH OGONEK + 0x0136: 0x00e8, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x0137: 0x00e9, # LATIN SMALL LETTER K WITH CEDILLA + 0x013b: 0x00ea, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x013c: 0x00eb, # LATIN SMALL LETTER L WITH CEDILLA + 0x0141: 0x00ad, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00e7, # LATIN SMALL LETTER N WITH ACUTE + 0x0145: 0x00ee, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x0146: 0x00ec, # LATIN SMALL LETTER N WITH CEDILLA + 0x014c: 0x00e2, # LATIN CAPITAL LETTER O WITH MACRON + 0x014d: 0x0093, # LATIN SMALL LETTER O WITH MACRON + 0x0156: 0x008a, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x0157: 0x008b, # LATIN SMALL LETTER R WITH CEDILLA + 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE + 0x0160: 0x00be, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00d5, # LATIN SMALL LETTER S WITH CARON + 0x016a: 0x00c7, # LATIN CAPITAL LETTER U WITH MACRON + 0x016b: 0x00d7, # LATIN SMALL LETTER U WITH MACRON + 0x0172: 0x00c6, # LATIN CAPITAL LETTER U WITH OGONEK + 0x0173: 0x00d6, # LATIN SMALL LETTER U WITH OGONEK + 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00a5, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00a3, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00a4, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00cf, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00d8, # LATIN SMALL LETTER Z WITH CARON + 0x2019: 0x00ef, # RIGHT SINGLE QUOTATION MARK + 0x201c: 0x00f2, # LEFT DOUBLE QUOTATION MARK + 0x201d: 0x00a6, # RIGHT DOUBLE QUOTATION MARK + 0x201e: 0x00f7, # DOUBLE LOW-9 QUOTATION MARK + 0x2219: 0x00f9, # BULLET OPERATOR + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp850.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP850.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp850', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH + 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN + 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2017, # DOUBLE LOW LINE + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa9' # 0x00b8 -> COPYRIGHT SIGN + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0x00bd -> CENT SIGN + u'\xa5' # 0x00be -> YEN SIGN + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE + u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH + u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH + u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\u0131' # 0x00d5 -> LATIN SMALL LETTER DOTLESS I + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\xa6' # 0x00dd -> BROKEN BAR + u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN + u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE + u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xaf' # 0x00ee -> MACRON + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2017' # 0x00f2 -> DOUBLE LOW LINE + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00bd, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a5: 0x00be, # YEN SIGN + 0x00a6: 0x00dd, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x00b8, # COPYRIGHT SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00af: 0x00ee, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00f7, # CEDILLA + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0131: 0x00d5, # LATIN SMALL LETTER DOTLESS I + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x2017: 0x00f2, # DOUBLE LOW LINE + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp852.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP852.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp852', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE + 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE + 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE + 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT + 0x00f2: 0x02db, # OGONEK + 0x00f3: 0x02c7, # CARON + 0x00f4: 0x02d8, # BREVE + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x02d9, # DOT ABOVE + 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u016f' # 0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\u0107' # 0x0086 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0150' # 0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\u0151' # 0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0106' # 0x008f -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0139' # 0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u013a' # 0x0092 -> LATIN SMALL LETTER L WITH ACUTE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u013d' # 0x0095 -> LATIN CAPITAL LETTER L WITH CARON + u'\u013e' # 0x0096 -> LATIN SMALL LETTER L WITH CARON + u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0164' # 0x009b -> LATIN CAPITAL LETTER T WITH CARON + u'\u0165' # 0x009c -> LATIN SMALL LETTER T WITH CARON + u'\u0141' # 0x009d -> LATIN CAPITAL LETTER L WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\u010d' # 0x009f -> LATIN SMALL LETTER C WITH CARON + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\u0104' # 0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0105' # 0x00a5 -> LATIN SMALL LETTER A WITH OGONEK + u'\u017d' # 0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017e' # 0x00a7 -> LATIN SMALL LETTER Z WITH CARON + u'\u0118' # 0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0119' # 0x00a9 -> LATIN SMALL LETTER E WITH OGONEK + u'\xac' # 0x00aa -> NOT SIGN + u'\u017a' # 0x00ab -> LATIN SMALL LETTER Z WITH ACUTE + u'\u010c' # 0x00ac -> LATIN CAPITAL LETTER C WITH CARON + u'\u015f' # 0x00ad -> LATIN SMALL LETTER S WITH CEDILLA + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u011a' # 0x00b7 -> LATIN CAPITAL LETTER E WITH CARON + u'\u015e' # 0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u017b' # 0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017c' # 0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u0102' # 0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE + u'\u0103' # 0x00c7 -> LATIN SMALL LETTER A WITH BREVE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\u0111' # 0x00d0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0110' # 0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u010e' # 0x00d2 -> LATIN CAPITAL LETTER D WITH CARON + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u010f' # 0x00d4 -> LATIN SMALL LETTER D WITH CARON + u'\u0147' # 0x00d5 -> LATIN CAPITAL LETTER N WITH CARON + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u011b' # 0x00d8 -> LATIN SMALL LETTER E WITH CARON + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u0162' # 0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\u016e' # 0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0144' # 0x00e4 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0x00e5 -> LATIN SMALL LETTER N WITH CARON + u'\u0160' # 0x00e6 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0161' # 0x00e7 -> LATIN SMALL LETTER S WITH CARON + u'\u0154' # 0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0155' # 0x00ea -> LATIN SMALL LETTER R WITH ACUTE + u'\u0170' # 0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE + u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0163' # 0x00ee -> LATIN SMALL LETTER T WITH CEDILLA + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\u02dd' # 0x00f1 -> DOUBLE ACUTE ACCENT + u'\u02db' # 0x00f2 -> OGONEK + u'\u02c7' # 0x00f3 -> CARON + u'\u02d8' # 0x00f4 -> BREVE + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\u02d9' # 0x00fa -> DOT ABOVE + u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\u0158' # 0x00fc -> LATIN CAPITAL LETTER R WITH CARON + u'\u0159' # 0x00fd -> LATIN SMALL LETTER R WITH CARON + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b8: 0x00f7, # CEDILLA + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE + 0x0102: 0x00c6, # LATIN CAPITAL LETTER A WITH BREVE + 0x0103: 0x00c7, # LATIN SMALL LETTER A WITH BREVE + 0x0104: 0x00a4, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0105: 0x00a5, # LATIN SMALL LETTER A WITH OGONEK + 0x0106: 0x008f, # LATIN CAPITAL LETTER C WITH ACUTE + 0x0107: 0x0086, # LATIN SMALL LETTER C WITH ACUTE + 0x010c: 0x00ac, # LATIN CAPITAL LETTER C WITH CARON + 0x010d: 0x009f, # LATIN SMALL LETTER C WITH CARON + 0x010e: 0x00d2, # LATIN CAPITAL LETTER D WITH CARON + 0x010f: 0x00d4, # LATIN SMALL LETTER D WITH CARON + 0x0110: 0x00d1, # LATIN CAPITAL LETTER D WITH STROKE + 0x0111: 0x00d0, # LATIN SMALL LETTER D WITH STROKE + 0x0118: 0x00a8, # LATIN CAPITAL LETTER E WITH OGONEK + 0x0119: 0x00a9, # LATIN SMALL LETTER E WITH OGONEK + 0x011a: 0x00b7, # LATIN CAPITAL LETTER E WITH CARON + 0x011b: 0x00d8, # LATIN SMALL LETTER E WITH CARON + 0x0139: 0x0091, # LATIN CAPITAL LETTER L WITH ACUTE + 0x013a: 0x0092, # LATIN SMALL LETTER L WITH ACUTE + 0x013d: 0x0095, # LATIN CAPITAL LETTER L WITH CARON + 0x013e: 0x0096, # LATIN SMALL LETTER L WITH CARON + 0x0141: 0x009d, # LATIN CAPITAL LETTER L WITH STROKE + 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE + 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE + 0x0144: 0x00e4, # LATIN SMALL LETTER N WITH ACUTE + 0x0147: 0x00d5, # LATIN CAPITAL LETTER N WITH CARON + 0x0148: 0x00e5, # LATIN SMALL LETTER N WITH CARON + 0x0150: 0x008a, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0151: 0x008b, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x0154: 0x00e8, # LATIN CAPITAL LETTER R WITH ACUTE + 0x0155: 0x00ea, # LATIN SMALL LETTER R WITH ACUTE + 0x0158: 0x00fc, # LATIN CAPITAL LETTER R WITH CARON + 0x0159: 0x00fd, # LATIN SMALL LETTER R WITH CARON + 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE + 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE + 0x015e: 0x00b8, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x00ad, # LATIN SMALL LETTER S WITH CEDILLA + 0x0160: 0x00e6, # LATIN CAPITAL LETTER S WITH CARON + 0x0161: 0x00e7, # LATIN SMALL LETTER S WITH CARON + 0x0162: 0x00dd, # LATIN CAPITAL LETTER T WITH CEDILLA + 0x0163: 0x00ee, # LATIN SMALL LETTER T WITH CEDILLA + 0x0164: 0x009b, # LATIN CAPITAL LETTER T WITH CARON + 0x0165: 0x009c, # LATIN SMALL LETTER T WITH CARON + 0x016e: 0x00de, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x016f: 0x0085, # LATIN SMALL LETTER U WITH RING ABOVE + 0x0170: 0x00eb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x017a: 0x00ab, # LATIN SMALL LETTER Z WITH ACUTE + 0x017b: 0x00bd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017c: 0x00be, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x017d: 0x00a6, # LATIN CAPITAL LETTER Z WITH CARON + 0x017e: 0x00a7, # LATIN SMALL LETTER Z WITH CARON + 0x02c7: 0x00f3, # CARON + 0x02d8: 0x00f4, # BREVE + 0x02d9: 0x00fa, # DOT ABOVE + 0x02db: 0x00f2, # OGONEK + 0x02dd: 0x00f1, # DOUBLE ACUTE ACCENT + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp855.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP855.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp855', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE + 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE + 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE + 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE + 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO + 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE + 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE + 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI + 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE + 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE + 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE + 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE + 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE + 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE + 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE + 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE + 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE + 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE + 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE + 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU + 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I + 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O + 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00ef: 0x2116, # NUMERO SIGN + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E + 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00fd: 0x00a7, # SECTION SIGN + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0452' # 0x0080 -> CYRILLIC SMALL LETTER DJE + u'\u0402' # 0x0081 -> CYRILLIC CAPITAL LETTER DJE + u'\u0453' # 0x0082 -> CYRILLIC SMALL LETTER GJE + u'\u0403' # 0x0083 -> CYRILLIC CAPITAL LETTER GJE + u'\u0451' # 0x0084 -> CYRILLIC SMALL LETTER IO + u'\u0401' # 0x0085 -> CYRILLIC CAPITAL LETTER IO + u'\u0454' # 0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0404' # 0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0455' # 0x0088 -> CYRILLIC SMALL LETTER DZE + u'\u0405' # 0x0089 -> CYRILLIC CAPITAL LETTER DZE + u'\u0456' # 0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0406' # 0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0x008c -> CYRILLIC SMALL LETTER YI + u'\u0407' # 0x008d -> CYRILLIC CAPITAL LETTER YI + u'\u0458' # 0x008e -> CYRILLIC SMALL LETTER JE + u'\u0408' # 0x008f -> CYRILLIC CAPITAL LETTER JE + u'\u0459' # 0x0090 -> CYRILLIC SMALL LETTER LJE + u'\u0409' # 0x0091 -> CYRILLIC CAPITAL LETTER LJE + u'\u045a' # 0x0092 -> CYRILLIC SMALL LETTER NJE + u'\u040a' # 0x0093 -> CYRILLIC CAPITAL LETTER NJE + u'\u045b' # 0x0094 -> CYRILLIC SMALL LETTER TSHE + u'\u040b' # 0x0095 -> CYRILLIC CAPITAL LETTER TSHE + u'\u045c' # 0x0096 -> CYRILLIC SMALL LETTER KJE + u'\u040c' # 0x0097 -> CYRILLIC CAPITAL LETTER KJE + u'\u045e' # 0x0098 -> CYRILLIC SMALL LETTER SHORT U + u'\u040e' # 0x0099 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045f' # 0x009a -> CYRILLIC SMALL LETTER DZHE + u'\u040f' # 0x009b -> CYRILLIC CAPITAL LETTER DZHE + u'\u044e' # 0x009c -> CYRILLIC SMALL LETTER YU + u'\u042e' # 0x009d -> CYRILLIC CAPITAL LETTER YU + u'\u044a' # 0x009e -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042a' # 0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A + u'\u0410' # 0x00a1 -> CYRILLIC CAPITAL LETTER A + u'\u0431' # 0x00a2 -> CYRILLIC SMALL LETTER BE + u'\u0411' # 0x00a3 -> CYRILLIC CAPITAL LETTER BE + u'\u0446' # 0x00a4 -> CYRILLIC SMALL LETTER TSE + u'\u0426' # 0x00a5 -> CYRILLIC CAPITAL LETTER TSE + u'\u0434' # 0x00a6 -> CYRILLIC SMALL LETTER DE + u'\u0414' # 0x00a7 -> CYRILLIC CAPITAL LETTER DE + u'\u0435' # 0x00a8 -> CYRILLIC SMALL LETTER IE + u'\u0415' # 0x00a9 -> CYRILLIC CAPITAL LETTER IE + u'\u0444' # 0x00aa -> CYRILLIC SMALL LETTER EF + u'\u0424' # 0x00ab -> CYRILLIC CAPITAL LETTER EF + u'\u0433' # 0x00ac -> CYRILLIC SMALL LETTER GHE + u'\u0413' # 0x00ad -> CYRILLIC CAPITAL LETTER GHE + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u0445' # 0x00b5 -> CYRILLIC SMALL LETTER HA + u'\u0425' # 0x00b6 -> CYRILLIC CAPITAL LETTER HA + u'\u0438' # 0x00b7 -> CYRILLIC SMALL LETTER I + u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u0439' # 0x00bd -> CYRILLIC SMALL LETTER SHORT I + u'\u0419' # 0x00be -> CYRILLIC CAPITAL LETTER SHORT I + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u043a' # 0x00c6 -> CYRILLIC SMALL LETTER KA + u'\u041a' # 0x00c7 -> CYRILLIC CAPITAL LETTER KA + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\u043b' # 0x00d0 -> CYRILLIC SMALL LETTER EL + u'\u041b' # 0x00d1 -> CYRILLIC CAPITAL LETTER EL + u'\u043c' # 0x00d2 -> CYRILLIC SMALL LETTER EM + u'\u041c' # 0x00d3 -> CYRILLIC CAPITAL LETTER EM + u'\u043d' # 0x00d4 -> CYRILLIC SMALL LETTER EN + u'\u041d' # 0x00d5 -> CYRILLIC CAPITAL LETTER EN + u'\u043e' # 0x00d6 -> CYRILLIC SMALL LETTER O + u'\u041e' # 0x00d7 -> CYRILLIC CAPITAL LETTER O + u'\u043f' # 0x00d8 -> CYRILLIC SMALL LETTER PE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u041f' # 0x00dd -> CYRILLIC CAPITAL LETTER PE + u'\u044f' # 0x00de -> CYRILLIC SMALL LETTER YA + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u042f' # 0x00e0 -> CYRILLIC CAPITAL LETTER YA + u'\u0440' # 0x00e1 -> CYRILLIC SMALL LETTER ER + u'\u0420' # 0x00e2 -> CYRILLIC CAPITAL LETTER ER + u'\u0441' # 0x00e3 -> CYRILLIC SMALL LETTER ES + u'\u0421' # 0x00e4 -> CYRILLIC CAPITAL LETTER ES + u'\u0442' # 0x00e5 -> CYRILLIC SMALL LETTER TE + u'\u0422' # 0x00e6 -> CYRILLIC CAPITAL LETTER TE + u'\u0443' # 0x00e7 -> CYRILLIC SMALL LETTER U + u'\u0423' # 0x00e8 -> CYRILLIC CAPITAL LETTER U + u'\u0436' # 0x00e9 -> CYRILLIC SMALL LETTER ZHE + u'\u0416' # 0x00ea -> CYRILLIC CAPITAL LETTER ZHE + u'\u0432' # 0x00eb -> CYRILLIC SMALL LETTER VE + u'\u0412' # 0x00ec -> CYRILLIC CAPITAL LETTER VE + u'\u044c' # 0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u042c' # 0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u2116' # 0x00ef -> NUMERO SIGN + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\u044b' # 0x00f1 -> CYRILLIC SMALL LETTER YERU + u'\u042b' # 0x00f2 -> CYRILLIC CAPITAL LETTER YERU + u'\u0437' # 0x00f3 -> CYRILLIC SMALL LETTER ZE + u'\u0417' # 0x00f4 -> CYRILLIC CAPITAL LETTER ZE + u'\u0448' # 0x00f5 -> CYRILLIC SMALL LETTER SHA + u'\u0428' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHA + u'\u044d' # 0x00f7 -> CYRILLIC SMALL LETTER E + u'\u042d' # 0x00f8 -> CYRILLIC CAPITAL LETTER E + u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA + u'\u0429' # 0x00fa -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0447' # 0x00fb -> CYRILLIC SMALL LETTER CHE + u'\u0427' # 0x00fc -> CYRILLIC CAPITAL LETTER CHE + u'\xa7' # 0x00fd -> SECTION SIGN + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a7: 0x00fd, # SECTION SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x0401: 0x0085, # CYRILLIC CAPITAL LETTER IO + 0x0402: 0x0081, # CYRILLIC CAPITAL LETTER DJE + 0x0403: 0x0083, # CYRILLIC CAPITAL LETTER GJE + 0x0404: 0x0087, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405: 0x0089, # CYRILLIC CAPITAL LETTER DZE + 0x0406: 0x008b, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0x008d, # CYRILLIC CAPITAL LETTER YI + 0x0408: 0x008f, # CYRILLIC CAPITAL LETTER JE + 0x0409: 0x0091, # CYRILLIC CAPITAL LETTER LJE + 0x040a: 0x0093, # CYRILLIC CAPITAL LETTER NJE + 0x040b: 0x0095, # CYRILLIC CAPITAL LETTER TSHE + 0x040c: 0x0097, # CYRILLIC CAPITAL LETTER KJE + 0x040e: 0x0099, # CYRILLIC CAPITAL LETTER SHORT U + 0x040f: 0x009b, # CYRILLIC CAPITAL LETTER DZHE + 0x0410: 0x00a1, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x00a3, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x00ec, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x00ad, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x00a7, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x00a9, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x00ea, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x00f4, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x00be, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x00c7, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x00d1, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x00d3, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x00d5, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x00d7, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x00dd, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x00e2, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x00e4, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x00e6, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x00e8, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x00ab, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x00b6, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x00a5, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x00fc, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x00f6, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x00fa, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x009f, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x00f2, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x00ee, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x00f8, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x009d, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x00e0, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00a2, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00eb, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00ac, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00a6, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00a8, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00e9, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00f3, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00b7, # CYRILLIC SMALL LETTER I + 0x0439: 0x00bd, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00c6, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00d0, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00d2, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00d4, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00d6, # CYRILLIC SMALL LETTER O + 0x043f: 0x00d8, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00e1, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00e3, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00e5, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00e7, # CYRILLIC SMALL LETTER U + 0x0444: 0x00aa, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00b5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00a4, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00fb, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00f5, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x009e, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00f1, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00ed, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00f7, # CYRILLIC SMALL LETTER E + 0x044e: 0x009c, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00de, # CYRILLIC SMALL LETTER YA + 0x0451: 0x0084, # CYRILLIC SMALL LETTER IO + 0x0452: 0x0080, # CYRILLIC SMALL LETTER DJE + 0x0453: 0x0082, # CYRILLIC SMALL LETTER GJE + 0x0454: 0x0086, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0455: 0x0088, # CYRILLIC SMALL LETTER DZE + 0x0456: 0x008a, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0x008c, # CYRILLIC SMALL LETTER YI + 0x0458: 0x008e, # CYRILLIC SMALL LETTER JE + 0x0459: 0x0090, # CYRILLIC SMALL LETTER LJE + 0x045a: 0x0092, # CYRILLIC SMALL LETTER NJE + 0x045b: 0x0094, # CYRILLIC SMALL LETTER TSHE + 0x045c: 0x0096, # CYRILLIC SMALL LETTER KJE + 0x045e: 0x0098, # CYRILLIC SMALL LETTER SHORT U + 0x045f: 0x009a, # CYRILLIC SMALL LETTER DZHE + 0x2116: 0x00ef, # NUMERO SIGN + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp856.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp856 generated from 'MAPPINGS/VENDORS/MISC/CP856.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp856', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u05d0' # 0x80 -> HEBREW LETTER ALEF + u'\u05d1' # 0x81 -> HEBREW LETTER BET + u'\u05d2' # 0x82 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x83 -> HEBREW LETTER DALET + u'\u05d4' # 0x84 -> HEBREW LETTER HE + u'\u05d5' # 0x85 -> HEBREW LETTER VAV + u'\u05d6' # 0x86 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x87 -> HEBREW LETTER HET + u'\u05d8' # 0x88 -> HEBREW LETTER TET + u'\u05d9' # 0x89 -> HEBREW LETTER YOD + u'\u05da' # 0x8A -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x8B -> HEBREW LETTER KAF + u'\u05dc' # 0x8C -> HEBREW LETTER LAMED + u'\u05dd' # 0x8D -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x8E -> HEBREW LETTER MEM + u'\u05df' # 0x8F -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x90 -> HEBREW LETTER NUN + u'\u05e1' # 0x91 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x92 -> HEBREW LETTER AYIN + u'\u05e3' # 0x93 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x94 -> HEBREW LETTER PE + u'\u05e5' # 0x95 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x96 -> HEBREW LETTER TSADI + u'\u05e7' # 0x97 -> HEBREW LETTER QOF + u'\u05e8' # 0x98 -> HEBREW LETTER RESH + u'\u05e9' # 0x99 -> HEBREW LETTER SHIN + u'\u05ea' # 0x9A -> HEBREW LETTER TAV + u'\ufffe' # 0x9B -> UNDEFINED + u'\xa3' # 0x9C -> POUND SIGN + u'\ufffe' # 0x9D -> UNDEFINED + u'\xd7' # 0x9E -> MULTIPLICATION SIGN + u'\ufffe' # 0x9F -> UNDEFINED + u'\ufffe' # 0xA0 -> UNDEFINED + u'\ufffe' # 0xA1 -> UNDEFINED + u'\ufffe' # 0xA2 -> UNDEFINED + u'\ufffe' # 0xA3 -> UNDEFINED + u'\ufffe' # 0xA4 -> UNDEFINED + u'\ufffe' # 0xA5 -> UNDEFINED + u'\ufffe' # 0xA6 -> UNDEFINED + u'\ufffe' # 0xA7 -> UNDEFINED + u'\ufffe' # 0xA8 -> UNDEFINED + u'\xae' # 0xA9 -> REGISTERED SIGN + u'\xac' # 0xAA -> NOT SIGN + u'\xbd' # 0xAB -> VULGAR FRACTION ONE HALF + u'\xbc' # 0xAC -> VULGAR FRACTION ONE QUARTER + u'\ufffe' # 0xAD -> UNDEFINED + u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0xB0 -> LIGHT SHADE + u'\u2592' # 0xB1 -> MEDIUM SHADE + u'\u2593' # 0xB2 -> DARK SHADE + u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\ufffe' # 0xB5 -> UNDEFINED + u'\ufffe' # 0xB6 -> UNDEFINED + u'\ufffe' # 0xB7 -> UNDEFINED + u'\xa9' # 0xB8 -> COPYRIGHT SIGN + u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0xBD -> CENT SIGN + u'\xa5' # 0xBE -> YEN SIGN + u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\ufffe' # 0xC6 -> UNDEFINED + u'\ufffe' # 0xC7 -> UNDEFINED + u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0xCF -> CURRENCY SIGN + u'\ufffe' # 0xD0 -> UNDEFINED + u'\ufffe' # 0xD1 -> UNDEFINED + u'\ufffe' # 0xD2 -> UNDEFINED + u'\ufffe' # 0xD3 -> UNDEFINEDS + u'\ufffe' # 0xD4 -> UNDEFINED + u'\ufffe' # 0xD5 -> UNDEFINED + u'\ufffe' # 0xD6 -> UNDEFINEDE + u'\ufffe' # 0xD7 -> UNDEFINED + u'\ufffe' # 0xD8 -> UNDEFINED + u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0xDB -> FULL BLOCK + u'\u2584' # 0xDC -> LOWER HALF BLOCK + u'\xa6' # 0xDD -> BROKEN BAR + u'\ufffe' # 0xDE -> UNDEFINED + u'\u2580' # 0xDF -> UPPER HALF BLOCK + u'\ufffe' # 0xE0 -> UNDEFINED + u'\ufffe' # 0xE1 -> UNDEFINED + u'\ufffe' # 0xE2 -> UNDEFINED + u'\ufffe' # 0xE3 -> UNDEFINED + u'\ufffe' # 0xE4 -> UNDEFINED + u'\ufffe' # 0xE5 -> UNDEFINED + u'\xb5' # 0xE6 -> MICRO SIGN + u'\ufffe' # 0xE7 -> UNDEFINED + u'\ufffe' # 0xE8 -> UNDEFINED + u'\ufffe' # 0xE9 -> UNDEFINED + u'\ufffe' # 0xEA -> UNDEFINED + u'\ufffe' # 0xEB -> UNDEFINED + u'\ufffe' # 0xEC -> UNDEFINED + u'\ufffe' # 0xED -> UNDEFINED + u'\xaf' # 0xEE -> MACRON + u'\xb4' # 0xEF -> ACUTE ACCENT + u'\xad' # 0xF0 -> SOFT HYPHEN + u'\xb1' # 0xF1 -> PLUS-MINUS SIGN + u'\u2017' # 0xF2 -> DOUBLE LOW LINE + u'\xbe' # 0xF3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0xF4 -> PILCROW SIGN + u'\xa7' # 0xF5 -> SECTION SIGN + u'\xf7' # 0xF6 -> DIVISION SIGN + u'\xb8' # 0xF7 -> CEDILLA + u'\xb0' # 0xF8 -> DEGREE SIGN + u'\xa8' # 0xF9 -> DIAERESIS + u'\xb7' # 0xFA -> MIDDLE DOT + u'\xb9' # 0xFB -> SUPERSCRIPT ONE + u'\xb3' # 0xFC -> SUPERSCRIPT THREE + u'\xb2' # 0xFD -> SUPERSCRIPT TWO + u'\u25a0' # 0xFE -> BLACK SQUARE + u'\xa0' # 0xFF -> NO-BREAK SPACE +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp857.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,694 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP857.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp857', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE + 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00d5: None, # UNDEFINED + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: None, # UNDEFINED + 0x00e8: 0x00d7, # MULTIPLICATION SIGN + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: None, # UNDEFINED + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u0131' # 0x008d -> LATIN SMALL LETTER DOTLESS I + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\u0130' # 0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u015e' # 0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u015f' # 0x009f -> LATIN SMALL LETTER S WITH CEDILLA + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\u011e' # 0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE + u'\u011f' # 0x00a7 -> LATIN SMALL LETTER G WITH BREVE + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa9' # 0x00b8 -> COPYRIGHT SIGN + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0x00bd -> CENT SIGN + u'\xa5' # 0x00be -> YEN SIGN + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE + u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\xba' # 0x00d0 -> MASCULINE ORDINAL INDICATOR + u'\xaa' # 0x00d1 -> FEMININE ORDINAL INDICATOR + u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\ufffe' # 0x00d5 -> UNDEFINED + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\xa6' # 0x00dd -> BROKEN BAR + u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\ufffe' # 0x00e7 -> UNDEFINED + u'\xd7' # 0x00e8 -> MULTIPLICATION SIGN + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE + u'\xff' # 0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xaf' # 0x00ee -> MACRON + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\ufffe' # 0x00f2 -> UNDEFINED + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00bd, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a5: 0x00be, # YEN SIGN + 0x00a6: 0x00dd, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x00b8, # COPYRIGHT SIGN + 0x00aa: 0x00d1, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00af: 0x00ee, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00f7, # CEDILLA + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00ba: 0x00d0, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x00e8, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x00ed, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x011e: 0x00a6, # LATIN CAPITAL LETTER G WITH BREVE + 0x011f: 0x00a7, # LATIN SMALL LETTER G WITH BREVE + 0x0130: 0x0098, # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0x0131: 0x008d, # LATIN SMALL LETTER DOTLESS I + 0x015e: 0x009e, # LATIN CAPITAL LETTER S WITH CEDILLA + 0x015f: 0x009f, # LATIN SMALL LETTER S WITH CEDILLA + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp858.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec for CP858, modified from cp850. + +""" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp858', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x00d7, # MULTIPLICATION SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00b8: 0x00a9, # COPYRIGHT SIGN + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x00a2, # CENT SIGN + 0x00be: 0x00a5, # YEN SIGN + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x00a4, # CURRENCY SIGN + 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH + 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00d5: 0x20ac, # EURO SIGN + 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x00a6, # BROKEN BAR + 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN + 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN + 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00ee: 0x00af, # MACRON + 0x00ef: 0x00b4, # ACUTE ACCENT + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2017, # DOUBLE LOW LINE + 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x00b8, # CEDILLA + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x00b9, # SUPERSCRIPT ONE + 0x00fc: 0x00b3, # SUPERSCRIPT THREE + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd7' # 0x009e -> MULTIPLICATION SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xae' # 0x00a9 -> REGISTERED SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa9' # 0x00b8 -> COPYRIGHT SIGN + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\xa2' # 0x00bd -> CENT SIGN + u'\xa5' # 0x00be -> YEN SIGN + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE + u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa4' # 0x00cf -> CURRENCY SIGN + u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH + u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH + u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\u20ac' # 0x00d5 -> EURO SIGN + u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\xa6' # 0x00dd -> BROKEN BAR + u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE + u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN + u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN + u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE + u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xaf' # 0x00ee -> MACRON + u'\xb4' # 0x00ef -> ACUTE ACCENT + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2017' # 0x00f2 -> DOUBLE LOW LINE + u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS + u'\xb6' # 0x00f4 -> PILCROW SIGN + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\xb8' # 0x00f7 -> CEDILLA + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\xb9' # 0x00fb -> SUPERSCRIPT ONE + u'\xb3' # 0x00fc -> SUPERSCRIPT THREE + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x00bd, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00cf, # CURRENCY SIGN + 0x00a5: 0x00be, # YEN SIGN + 0x00a6: 0x00dd, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x00b8, # COPYRIGHT SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00ae: 0x00a9, # REGISTERED SIGN + 0x00af: 0x00ee, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00fc, # SUPERSCRIPT THREE + 0x00b4: 0x00ef, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x00f4, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00f7, # CEDILLA + 0x00b9: 0x00fb, # SUPERSCRIPT ONE + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d7: 0x009e, # MULTIPLICATION SIGN + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x20ac: 0x00d5, # EURO SIGN + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x2017: 0x00f2, # DOUBLE LOW LINE + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp860.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP860.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp860', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0x0084 -> LATIN SMALL LETTER A WITH TILDE + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xc1' # 0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xca' # 0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xcd' # 0x008b -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xd4' # 0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc3' # 0x008e -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc2' # 0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xc0' # 0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc8' # 0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0x0094 -> LATIN SMALL LETTER O WITH TILDE + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xda' # 0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xcc' # 0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd5' # 0x0099 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\xd3' # 0x009f -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\xd2' # 0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c0: 0x0091, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c1: 0x0086, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c2: 0x008f, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c3: 0x008e, # LATIN CAPITAL LETTER A WITH TILDE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x0092, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x0089, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cc: 0x0098, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00cd: 0x008b, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d2: 0x00a9, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00d3: 0x009f, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d4: 0x008c, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d5: 0x0099, # LATIN CAPITAL LETTER O WITH TILDE + 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00da: 0x0096, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e3: 0x0084, # LATIN SMALL LETTER A WITH TILDE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f5: 0x0094, # LATIN SMALL LETTER O WITH TILDE + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp861.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP861.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp861', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH + 0x008c: 0x00f0, # LATIN SMALL LETTER ETH + 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00fe, # LATIN SMALL LETTER THORN + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xd0' # 0x008b -> LATIN CAPITAL LETTER ETH + u'\xf0' # 0x008c -> LATIN SMALL LETTER ETH + u'\xde' # 0x008d -> LATIN CAPITAL LETTER THORN + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xfe' # 0x0095 -> LATIN SMALL LETTER THORN + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xdd' # 0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xfd' # 0x0098 -> LATIN SMALL LETTER Y WITH ACUTE + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xc1' # 0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcd' # 0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xd3' # 0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xda' # 0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a3: 0x009c, # POUND SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c1: 0x00a4, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00cd: 0x00a5, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00d0: 0x008b, # LATIN CAPITAL LETTER ETH + 0x00d3: 0x00a6, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00da: 0x00a7, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dd: 0x0097, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00de: 0x008d, # LATIN CAPITAL LETTER THORN + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f0: 0x008c, # LATIN SMALL LETTER ETH + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00fd: 0x0098, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fe: 0x0095, # LATIN SMALL LETTER THORN + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp862.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP862.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp862', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x05d0, # HEBREW LETTER ALEF + 0x0081: 0x05d1, # HEBREW LETTER BET + 0x0082: 0x05d2, # HEBREW LETTER GIMEL + 0x0083: 0x05d3, # HEBREW LETTER DALET + 0x0084: 0x05d4, # HEBREW LETTER HE + 0x0085: 0x05d5, # HEBREW LETTER VAV + 0x0086: 0x05d6, # HEBREW LETTER ZAYIN + 0x0087: 0x05d7, # HEBREW LETTER HET + 0x0088: 0x05d8, # HEBREW LETTER TET + 0x0089: 0x05d9, # HEBREW LETTER YOD + 0x008a: 0x05da, # HEBREW LETTER FINAL KAF + 0x008b: 0x05db, # HEBREW LETTER KAF + 0x008c: 0x05dc, # HEBREW LETTER LAMED + 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM + 0x008e: 0x05de, # HEBREW LETTER MEM + 0x008f: 0x05df, # HEBREW LETTER FINAL NUN + 0x0090: 0x05e0, # HEBREW LETTER NUN + 0x0091: 0x05e1, # HEBREW LETTER SAMEKH + 0x0092: 0x05e2, # HEBREW LETTER AYIN + 0x0093: 0x05e3, # HEBREW LETTER FINAL PE + 0x0094: 0x05e4, # HEBREW LETTER PE + 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI + 0x0096: 0x05e6, # HEBREW LETTER TSADI + 0x0097: 0x05e7, # HEBREW LETTER QOF + 0x0098: 0x05e8, # HEBREW LETTER RESH + 0x0099: 0x05e9, # HEBREW LETTER SHIN + 0x009a: 0x05ea, # HEBREW LETTER TAV + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00a5, # YEN SIGN + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF + u'\u05d1' # 0x0081 -> HEBREW LETTER BET + u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL + u'\u05d3' # 0x0083 -> HEBREW LETTER DALET + u'\u05d4' # 0x0084 -> HEBREW LETTER HE + u'\u05d5' # 0x0085 -> HEBREW LETTER VAV + u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0x0087 -> HEBREW LETTER HET + u'\u05d8' # 0x0088 -> HEBREW LETTER TET + u'\u05d9' # 0x0089 -> HEBREW LETTER YOD + u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF + u'\u05db' # 0x008b -> HEBREW LETTER KAF + u'\u05dc' # 0x008c -> HEBREW LETTER LAMED + u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM + u'\u05de' # 0x008e -> HEBREW LETTER MEM + u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0x0090 -> HEBREW LETTER NUN + u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN + u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0x0094 -> HEBREW LETTER PE + u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI + u'\u05e7' # 0x0097 -> HEBREW LETTER QOF + u'\u05e8' # 0x0098 -> HEBREW LETTER RESH + u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN + u'\u05ea' # 0x009a -> HEBREW LETTER TAV + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xa5' # 0x009d -> YEN SIGN + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a5: 0x009d, # YEN SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x05d0: 0x0080, # HEBREW LETTER ALEF + 0x05d1: 0x0081, # HEBREW LETTER BET + 0x05d2: 0x0082, # HEBREW LETTER GIMEL + 0x05d3: 0x0083, # HEBREW LETTER DALET + 0x05d4: 0x0084, # HEBREW LETTER HE + 0x05d5: 0x0085, # HEBREW LETTER VAV + 0x05d6: 0x0086, # HEBREW LETTER ZAYIN + 0x05d7: 0x0087, # HEBREW LETTER HET + 0x05d8: 0x0088, # HEBREW LETTER TET + 0x05d9: 0x0089, # HEBREW LETTER YOD + 0x05da: 0x008a, # HEBREW LETTER FINAL KAF + 0x05db: 0x008b, # HEBREW LETTER KAF + 0x05dc: 0x008c, # HEBREW LETTER LAMED + 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM + 0x05de: 0x008e, # HEBREW LETTER MEM + 0x05df: 0x008f, # HEBREW LETTER FINAL NUN + 0x05e0: 0x0090, # HEBREW LETTER NUN + 0x05e1: 0x0091, # HEBREW LETTER SAMEKH + 0x05e2: 0x0092, # HEBREW LETTER AYIN + 0x05e3: 0x0093, # HEBREW LETTER FINAL PE + 0x05e4: 0x0094, # HEBREW LETTER PE + 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI + 0x05e6: 0x0096, # HEBREW LETTER TSADI + 0x05e7: 0x0097, # HEBREW LETTER QOF + 0x05e8: 0x0098, # HEBREW LETTER RESH + 0x05e9: 0x0099, # HEBREW LETTER SHIN + 0x05ea: 0x009a, # HEBREW LETTER TAV + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp863.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP863.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp863', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00b6, # PILCROW SIGN + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x2017, # DOUBLE LOW LINE + 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x008f: 0x00a7, # SECTION SIGN + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00a4, # CURRENCY SIGN + 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00a2, # CENT SIGN + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00a6, # BROKEN BAR + 0x00a1: 0x00b4, # ACUTE ACCENT + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00a8, # DIAERESIS + 0x00a5: 0x00b8, # CEDILLA + 0x00a6: 0x00b3, # SUPERSCRIPT THREE + 0x00a7: 0x00af, # MACRON + 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xc2' # 0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xb6' # 0x0086 -> PILCROW SIGN + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u2017' # 0x008d -> DOUBLE LOW LINE + u'\xc0' # 0x008e -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xa7' # 0x008f -> SECTION SIGN + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xc8' # 0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xca' # 0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xcb' # 0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcf' # 0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xa4' # 0x0098 -> CURRENCY SIGN + u'\xd4' # 0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xa2' # 0x009b -> CENT SIGN + u'\xa3' # 0x009c -> POUND SIGN + u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xdb' # 0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xa6' # 0x00a0 -> BROKEN BAR + u'\xb4' # 0x00a1 -> ACUTE ACCENT + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xa8' # 0x00a4 -> DIAERESIS + u'\xb8' # 0x00a5 -> CEDILLA + u'\xb3' # 0x00a6 -> SUPERSCRIPT THREE + u'\xaf' # 0x00a7 -> MACRON + u'\xce' # 0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xbe' # 0x00ad -> VULGAR FRACTION THREE QUARTERS + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a2: 0x009b, # CENT SIGN + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x0098, # CURRENCY SIGN + 0x00a6: 0x00a0, # BROKEN BAR + 0x00a7: 0x008f, # SECTION SIGN + 0x00a8: 0x00a4, # DIAERESIS + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00af: 0x00a7, # MACRON + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b3: 0x00a6, # SUPERSCRIPT THREE + 0x00b4: 0x00a1, # ACUTE ACCENT + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b6: 0x0086, # PILCROW SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00b8: 0x00a5, # CEDILLA + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00be: 0x00ad, # VULGAR FRACTION THREE QUARTERS + 0x00c0: 0x008e, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00c2: 0x0084, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c8: 0x0091, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00ca: 0x0092, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00cb: 0x0094, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00ce: 0x00a8, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00cf: 0x0095, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00d4: 0x0099, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00db: 0x009e, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x2017: 0x008d, # DOUBLE LOW LINE + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp864.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,690 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP864.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp864', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0025: 0x066a, # ARABIC PERCENT SIGN + 0x0080: 0x00b0, # DEGREE SIGN + 0x0081: 0x00b7, # MIDDLE DOT + 0x0082: 0x2219, # BULLET OPERATOR + 0x0083: 0x221a, # SQUARE ROOT + 0x0084: 0x2592, # MEDIUM SHADE + 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL + 0x0086: 0x2502, # FORMS LIGHT VERTICAL + 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL + 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT + 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL + 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT + 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL + 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT + 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT + 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT + 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT + 0x0090: 0x03b2, # GREEK SMALL BETA + 0x0091: 0x221e, # INFINITY + 0x0092: 0x03c6, # GREEK SMALL PHI + 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN + 0x0094: 0x00bd, # FRACTION 1/2 + 0x0095: 0x00bc, # FRACTION 1/4 + 0x0096: 0x2248, # ALMOST EQUAL TO + 0x0097: 0x00ab, # LEFT POINTING GUILLEMET + 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET + 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + 0x009b: None, # UNDEFINED + 0x009c: None, # UNDEFINED + 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM + 0x009f: None, # UNDEFINED + 0x00a1: 0x00ad, # SOFT HYPHEN + 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + 0x00a6: None, # UNDEFINED + 0x00a7: None, # UNDEFINED + 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM + 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM + 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM + 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM + 0x00ac: 0x060c, # ARABIC COMMA + 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM + 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM + 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM + 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO + 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE + 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO + 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE + 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR + 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE + 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX + 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN + 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT + 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE + 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM + 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM + 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM + 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: 0x00a2, # CENT SIGN + 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM + 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM + 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM + 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM + 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM + 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM + 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM + 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM + 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM + 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM + 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM + 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM + 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM + 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM + 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM + 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM + 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM + 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM + 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM + 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM + 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM + 0x00db: 0x00a6, # BROKEN VERTICAL BAR + 0x00dc: 0x00ac, # NOT SIGN + 0x00dd: 0x00f7, # DIVISION SIGN + 0x00de: 0x00d7, # MULTIPLICATION SIGN + 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM + 0x00e0: 0x0640, # ARABIC TATWEEL + 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM + 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM + 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM + 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM + 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM + 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM + 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM + 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM + 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM + 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM + 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM + 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM + 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM + 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM + 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM + 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM + 0x00f1: 0x0651, # ARABIC SHADDAH + 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM + 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM + 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM + 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM + 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM + 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM + 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM + 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM + 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM + 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: None, # UNDEFINED +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'\u066a' # 0x0025 -> ARABIC PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xb0' # 0x0080 -> DEGREE SIGN + u'\xb7' # 0x0081 -> MIDDLE DOT + u'\u2219' # 0x0082 -> BULLET OPERATOR + u'\u221a' # 0x0083 -> SQUARE ROOT + u'\u2592' # 0x0084 -> MEDIUM SHADE + u'\u2500' # 0x0085 -> FORMS LIGHT HORIZONTAL + u'\u2502' # 0x0086 -> FORMS LIGHT VERTICAL + u'\u253c' # 0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL + u'\u2524' # 0x0088 -> FORMS LIGHT VERTICAL AND LEFT + u'\u252c' # 0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x008a -> FORMS LIGHT VERTICAL AND RIGHT + u'\u2534' # 0x008b -> FORMS LIGHT UP AND HORIZONTAL + u'\u2510' # 0x008c -> FORMS LIGHT DOWN AND LEFT + u'\u250c' # 0x008d -> FORMS LIGHT DOWN AND RIGHT + u'\u2514' # 0x008e -> FORMS LIGHT UP AND RIGHT + u'\u2518' # 0x008f -> FORMS LIGHT UP AND LEFT + u'\u03b2' # 0x0090 -> GREEK SMALL BETA + u'\u221e' # 0x0091 -> INFINITY + u'\u03c6' # 0x0092 -> GREEK SMALL PHI + u'\xb1' # 0x0093 -> PLUS-OR-MINUS SIGN + u'\xbd' # 0x0094 -> FRACTION 1/2 + u'\xbc' # 0x0095 -> FRACTION 1/4 + u'\u2248' # 0x0096 -> ALMOST EQUAL TO + u'\xab' # 0x0097 -> LEFT POINTING GUILLEMET + u'\xbb' # 0x0098 -> RIGHT POINTING GUILLEMET + u'\ufef7' # 0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + u'\ufef8' # 0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + u'\ufffe' # 0x009b -> UNDEFINED + u'\ufffe' # 0x009c -> UNDEFINED + u'\ufefb' # 0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + u'\ufefc' # 0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM + u'\ufffe' # 0x009f -> UNDEFINED + u'\xa0' # 0x00a0 -> NON-BREAKING SPACE + u'\xad' # 0x00a1 -> SOFT HYPHEN + u'\ufe82' # 0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + u'\xa3' # 0x00a3 -> POUND SIGN + u'\xa4' # 0x00a4 -> CURRENCY SIGN + u'\ufe84' # 0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + u'\ufffe' # 0x00a6 -> UNDEFINED + u'\ufffe' # 0x00a7 -> UNDEFINED + u'\ufe8e' # 0x00a8 -> ARABIC LETTER ALEF FINAL FORM + u'\ufe8f' # 0x00a9 -> ARABIC LETTER BEH ISOLATED FORM + u'\ufe95' # 0x00aa -> ARABIC LETTER TEH ISOLATED FORM + u'\ufe99' # 0x00ab -> ARABIC LETTER THEH ISOLATED FORM + u'\u060c' # 0x00ac -> ARABIC COMMA + u'\ufe9d' # 0x00ad -> ARABIC LETTER JEEM ISOLATED FORM + u'\ufea1' # 0x00ae -> ARABIC LETTER HAH ISOLATED FORM + u'\ufea5' # 0x00af -> ARABIC LETTER KHAH ISOLATED FORM + u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO + u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE + u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO + u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE + u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR + u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE + u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX + u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN + u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT + u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE + u'\ufed1' # 0x00ba -> ARABIC LETTER FEH ISOLATED FORM + u'\u061b' # 0x00bb -> ARABIC SEMICOLON + u'\ufeb1' # 0x00bc -> ARABIC LETTER SEEN ISOLATED FORM + u'\ufeb5' # 0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM + u'\ufeb9' # 0x00be -> ARABIC LETTER SAD ISOLATED FORM + u'\u061f' # 0x00bf -> ARABIC QUESTION MARK + u'\xa2' # 0x00c0 -> CENT SIGN + u'\ufe80' # 0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM + u'\ufe81' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufe83' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + u'\ufe85' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + u'\ufeca' # 0x00c5 -> ARABIC LETTER AIN FINAL FORM + u'\ufe8b' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + u'\ufe8d' # 0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM + u'\ufe91' # 0x00c8 -> ARABIC LETTER BEH INITIAL FORM + u'\ufe93' # 0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM + u'\ufe97' # 0x00ca -> ARABIC LETTER TEH INITIAL FORM + u'\ufe9b' # 0x00cb -> ARABIC LETTER THEH INITIAL FORM + u'\ufe9f' # 0x00cc -> ARABIC LETTER JEEM INITIAL FORM + u'\ufea3' # 0x00cd -> ARABIC LETTER HAH INITIAL FORM + u'\ufea7' # 0x00ce -> ARABIC LETTER KHAH INITIAL FORM + u'\ufea9' # 0x00cf -> ARABIC LETTER DAL ISOLATED FORM + u'\ufeab' # 0x00d0 -> ARABIC LETTER THAL ISOLATED FORM + u'\ufead' # 0x00d1 -> ARABIC LETTER REH ISOLATED FORM + u'\ufeaf' # 0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM + u'\ufeb3' # 0x00d3 -> ARABIC LETTER SEEN INITIAL FORM + u'\ufeb7' # 0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM + u'\ufebb' # 0x00d5 -> ARABIC LETTER SAD INITIAL FORM + u'\ufebf' # 0x00d6 -> ARABIC LETTER DAD INITIAL FORM + u'\ufec1' # 0x00d7 -> ARABIC LETTER TAH ISOLATED FORM + u'\ufec5' # 0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM + u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM + u'\ufecf' # 0x00da -> ARABIC LETTER GHAIN INITIAL FORM + u'\xa6' # 0x00db -> BROKEN VERTICAL BAR + u'\xac' # 0x00dc -> NOT SIGN + u'\xf7' # 0x00dd -> DIVISION SIGN + u'\xd7' # 0x00de -> MULTIPLICATION SIGN + u'\ufec9' # 0x00df -> ARABIC LETTER AIN ISOLATED FORM + u'\u0640' # 0x00e0 -> ARABIC TATWEEL + u'\ufed3' # 0x00e1 -> ARABIC LETTER FEH INITIAL FORM + u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM + u'\ufedb' # 0x00e3 -> ARABIC LETTER KAF INITIAL FORM + u'\ufedf' # 0x00e4 -> ARABIC LETTER LAM INITIAL FORM + u'\ufee3' # 0x00e5 -> ARABIC LETTER MEEM INITIAL FORM + u'\ufee7' # 0x00e6 -> ARABIC LETTER NOON INITIAL FORM + u'\ufeeb' # 0x00e7 -> ARABIC LETTER HEH INITIAL FORM + u'\ufeed' # 0x00e8 -> ARABIC LETTER WAW ISOLATED FORM + u'\ufeef' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM + u'\ufef3' # 0x00ea -> ARABIC LETTER YEH INITIAL FORM + u'\ufebd' # 0x00eb -> ARABIC LETTER DAD ISOLATED FORM + u'\ufecc' # 0x00ec -> ARABIC LETTER AIN MEDIAL FORM + u'\ufece' # 0x00ed -> ARABIC LETTER GHAIN FINAL FORM + u'\ufecd' # 0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM + u'\ufee1' # 0x00ef -> ARABIC LETTER MEEM ISOLATED FORM + u'\ufe7d' # 0x00f0 -> ARABIC SHADDA MEDIAL FORM + u'\u0651' # 0x00f1 -> ARABIC SHADDAH + u'\ufee5' # 0x00f2 -> ARABIC LETTER NOON ISOLATED FORM + u'\ufee9' # 0x00f3 -> ARABIC LETTER HEH ISOLATED FORM + u'\ufeec' # 0x00f4 -> ARABIC LETTER HEH MEDIAL FORM + u'\ufef0' # 0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM + u'\ufef2' # 0x00f6 -> ARABIC LETTER YEH FINAL FORM + u'\ufed0' # 0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM + u'\ufed5' # 0x00f8 -> ARABIC LETTER QAF ISOLATED FORM + u'\ufef5' # 0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + u'\ufef6' # 0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + u'\ufedd' # 0x00fb -> ARABIC LETTER LAM ISOLATED FORM + u'\ufed9' # 0x00fc -> ARABIC LETTER KAF ISOLATED FORM + u'\ufef1' # 0x00fd -> ARABIC LETTER YEH ISOLATED FORM + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\ufffe' # 0x00ff -> UNDEFINED +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00a0, # NON-BREAKING SPACE + 0x00a2: 0x00c0, # CENT SIGN + 0x00a3: 0x00a3, # POUND SIGN + 0x00a4: 0x00a4, # CURRENCY SIGN + 0x00a6: 0x00db, # BROKEN VERTICAL BAR + 0x00ab: 0x0097, # LEFT POINTING GUILLEMET + 0x00ac: 0x00dc, # NOT SIGN + 0x00ad: 0x00a1, # SOFT HYPHEN + 0x00b0: 0x0080, # DEGREE SIGN + 0x00b1: 0x0093, # PLUS-OR-MINUS SIGN + 0x00b7: 0x0081, # MIDDLE DOT + 0x00bb: 0x0098, # RIGHT POINTING GUILLEMET + 0x00bc: 0x0095, # FRACTION 1/4 + 0x00bd: 0x0094, # FRACTION 1/2 + 0x00d7: 0x00de, # MULTIPLICATION SIGN + 0x00f7: 0x00dd, # DIVISION SIGN + 0x03b2: 0x0090, # GREEK SMALL BETA + 0x03c6: 0x0092, # GREEK SMALL PHI + 0x060c: 0x00ac, # ARABIC COMMA + 0x061b: 0x00bb, # ARABIC SEMICOLON + 0x061f: 0x00bf, # ARABIC QUESTION MARK + 0x0640: 0x00e0, # ARABIC TATWEEL + 0x0651: 0x00f1, # ARABIC SHADDAH + 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO + 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE + 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO + 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE + 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR + 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE + 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX + 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN + 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT + 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE + 0x066a: 0x0025, # ARABIC PERCENT SIGN + 0x2219: 0x0082, # BULLET OPERATOR + 0x221a: 0x0083, # SQUARE ROOT + 0x221e: 0x0091, # INFINITY + 0x2248: 0x0096, # ALMOST EQUAL TO + 0x2500: 0x0085, # FORMS LIGHT HORIZONTAL + 0x2502: 0x0086, # FORMS LIGHT VERTICAL + 0x250c: 0x008d, # FORMS LIGHT DOWN AND RIGHT + 0x2510: 0x008c, # FORMS LIGHT DOWN AND LEFT + 0x2514: 0x008e, # FORMS LIGHT UP AND RIGHT + 0x2518: 0x008f, # FORMS LIGHT UP AND LEFT + 0x251c: 0x008a, # FORMS LIGHT VERTICAL AND RIGHT + 0x2524: 0x0088, # FORMS LIGHT VERTICAL AND LEFT + 0x252c: 0x0089, # FORMS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x008b, # FORMS LIGHT UP AND HORIZONTAL + 0x253c: 0x0087, # FORMS LIGHT VERTICAL AND HORIZONTAL + 0x2592: 0x0084, # MEDIUM SHADE + 0x25a0: 0x00fe, # BLACK SQUARE + 0xfe7d: 0x00f0, # ARABIC SHADDA MEDIAL FORM + 0xfe80: 0x00c1, # ARABIC LETTER HAMZA ISOLATED FORM + 0xfe81: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM + 0xfe82: 0x00a2, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM + 0xfe83: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM + 0xfe84: 0x00a5, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM + 0xfe85: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM + 0xfe8b: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM + 0xfe8d: 0x00c7, # ARABIC LETTER ALEF ISOLATED FORM + 0xfe8e: 0x00a8, # ARABIC LETTER ALEF FINAL FORM + 0xfe8f: 0x00a9, # ARABIC LETTER BEH ISOLATED FORM + 0xfe91: 0x00c8, # ARABIC LETTER BEH INITIAL FORM + 0xfe93: 0x00c9, # ARABIC LETTER TEH MARBUTA ISOLATED FORM + 0xfe95: 0x00aa, # ARABIC LETTER TEH ISOLATED FORM + 0xfe97: 0x00ca, # ARABIC LETTER TEH INITIAL FORM + 0xfe99: 0x00ab, # ARABIC LETTER THEH ISOLATED FORM + 0xfe9b: 0x00cb, # ARABIC LETTER THEH INITIAL FORM + 0xfe9d: 0x00ad, # ARABIC LETTER JEEM ISOLATED FORM + 0xfe9f: 0x00cc, # ARABIC LETTER JEEM INITIAL FORM + 0xfea1: 0x00ae, # ARABIC LETTER HAH ISOLATED FORM + 0xfea3: 0x00cd, # ARABIC LETTER HAH INITIAL FORM + 0xfea5: 0x00af, # ARABIC LETTER KHAH ISOLATED FORM + 0xfea7: 0x00ce, # ARABIC LETTER KHAH INITIAL FORM + 0xfea9: 0x00cf, # ARABIC LETTER DAL ISOLATED FORM + 0xfeab: 0x00d0, # ARABIC LETTER THAL ISOLATED FORM + 0xfead: 0x00d1, # ARABIC LETTER REH ISOLATED FORM + 0xfeaf: 0x00d2, # ARABIC LETTER ZAIN ISOLATED FORM + 0xfeb1: 0x00bc, # ARABIC LETTER SEEN ISOLATED FORM + 0xfeb3: 0x00d3, # ARABIC LETTER SEEN INITIAL FORM + 0xfeb5: 0x00bd, # ARABIC LETTER SHEEN ISOLATED FORM + 0xfeb7: 0x00d4, # ARABIC LETTER SHEEN INITIAL FORM + 0xfeb9: 0x00be, # ARABIC LETTER SAD ISOLATED FORM + 0xfebb: 0x00d5, # ARABIC LETTER SAD INITIAL FORM + 0xfebd: 0x00eb, # ARABIC LETTER DAD ISOLATED FORM + 0xfebf: 0x00d6, # ARABIC LETTER DAD INITIAL FORM + 0xfec1: 0x00d7, # ARABIC LETTER TAH ISOLATED FORM + 0xfec5: 0x00d8, # ARABIC LETTER ZAH ISOLATED FORM + 0xfec9: 0x00df, # ARABIC LETTER AIN ISOLATED FORM + 0xfeca: 0x00c5, # ARABIC LETTER AIN FINAL FORM + 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM + 0xfecc: 0x00ec, # ARABIC LETTER AIN MEDIAL FORM + 0xfecd: 0x00ee, # ARABIC LETTER GHAIN ISOLATED FORM + 0xfece: 0x00ed, # ARABIC LETTER GHAIN FINAL FORM + 0xfecf: 0x00da, # ARABIC LETTER GHAIN INITIAL FORM + 0xfed0: 0x00f7, # ARABIC LETTER GHAIN MEDIAL FORM + 0xfed1: 0x00ba, # ARABIC LETTER FEH ISOLATED FORM + 0xfed3: 0x00e1, # ARABIC LETTER FEH INITIAL FORM + 0xfed5: 0x00f8, # ARABIC LETTER QAF ISOLATED FORM + 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM + 0xfed9: 0x00fc, # ARABIC LETTER KAF ISOLATED FORM + 0xfedb: 0x00e3, # ARABIC LETTER KAF INITIAL FORM + 0xfedd: 0x00fb, # ARABIC LETTER LAM ISOLATED FORM + 0xfedf: 0x00e4, # ARABIC LETTER LAM INITIAL FORM + 0xfee1: 0x00ef, # ARABIC LETTER MEEM ISOLATED FORM + 0xfee3: 0x00e5, # ARABIC LETTER MEEM INITIAL FORM + 0xfee5: 0x00f2, # ARABIC LETTER NOON ISOLATED FORM + 0xfee7: 0x00e6, # ARABIC LETTER NOON INITIAL FORM + 0xfee9: 0x00f3, # ARABIC LETTER HEH ISOLATED FORM + 0xfeeb: 0x00e7, # ARABIC LETTER HEH INITIAL FORM + 0xfeec: 0x00f4, # ARABIC LETTER HEH MEDIAL FORM + 0xfeed: 0x00e8, # ARABIC LETTER WAW ISOLATED FORM + 0xfeef: 0x00e9, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM + 0xfef0: 0x00f5, # ARABIC LETTER ALEF MAKSURA FINAL FORM + 0xfef1: 0x00fd, # ARABIC LETTER YEH ISOLATED FORM + 0xfef2: 0x00f6, # ARABIC LETTER YEH FINAL FORM + 0xfef3: 0x00ea, # ARABIC LETTER YEH INITIAL FORM + 0xfef5: 0x00f9, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM + 0xfef6: 0x00fa, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM + 0xfef7: 0x0099, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM + 0xfef8: 0x009a, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM + 0xfefb: 0x009d, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM + 0xfefc: 0x009e, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp865.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP865.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp865', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE + 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE + 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x009e: 0x20a7, # PESETA SIGN + 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00a8: 0x00bf, # INVERTED QUESTION MARK + 0x00a9: 0x2310, # REVERSED NOT SIGN + 0x00aa: 0x00ac, # NOT SIGN + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00a4, # CURRENCY SIGN + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00e3: 0x03c0, # GREEK SMALL LETTER PI + 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00e6: 0x00b5, # MICRO SIGN + 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU + 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00ec: 0x221e, # INFINITY + 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI + 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00ef: 0x2229, # INTERSECTION + 0x00f0: 0x2261, # IDENTICAL TO + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO + 0x00f4: 0x2320, # TOP HALF INTEGRAL + 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL + 0x00f6: 0x00f7, # DIVISION SIGN + 0x00f7: 0x2248, # ALMOST EQUAL TO + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N + 0x00fd: 0x00b2, # SUPERSCRIPT TWO + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE + u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE + u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE + u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE + u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE + u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE + u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE + u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE + u'\xa3' # 0x009c -> POUND SIGN + u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE + u'\u20a7' # 0x009e -> PESETA SIGN + u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK + u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE + u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE + u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE + u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE + u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE + u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR + u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR + u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK + u'\u2310' # 0x00a9 -> REVERSED NOT SIGN + u'\xac' # 0x00aa -> NOT SIGN + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER + u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xa4' # 0x00af -> CURRENCY SIGN + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA + u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S + u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA + u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI + u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA + u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA + u'\xb5' # 0x00e6 -> MICRO SIGN + u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU + u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI + u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA + u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA + u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA + u'\u221e' # 0x00ec -> INFINITY + u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI + u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON + u'\u2229' # 0x00ef -> INTERSECTION + u'\u2261' # 0x00f0 -> IDENTICAL TO + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO + u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO + u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL + u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL + u'\xf7' # 0x00f6 -> DIVISION SIGN + u'\u2248' # 0x00f7 -> ALMOST EQUAL TO + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N + u'\xb2' # 0x00fd -> SUPERSCRIPT TWO + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK + 0x00a3: 0x009c, # POUND SIGN + 0x00a4: 0x00af, # CURRENCY SIGN + 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x00aa, # NOT SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x00fd, # SUPERSCRIPT TWO + 0x00b5: 0x00e6, # MICRO SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR + 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x00bf: 0x00a8, # INVERTED QUESTION MARK + 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE + 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE + 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S + 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE + 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE + 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE + 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE + 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x00f6, # DIVISION SIGN + 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE + 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK + 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA + 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA + 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA + 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI + 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA + 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA + 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON + 0x03c0: 0x00e3, # GREEK SMALL LETTER PI + 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU + 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI + 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N + 0x20a7: 0x009e, # PESETA SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x221e: 0x00ec, # INFINITY + 0x2229: 0x00ef, # INTERSECTION + 0x2248: 0x00f7, # ALMOST EQUAL TO + 0x2261: 0x00f0, # IDENTICAL TO + 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO + 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO + 0x2310: 0x00a9, # REVERSED NOT SIGN + 0x2320: 0x00f4, # TOP HALF INTEGRAL + 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp866.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp866', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O + 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x258c, # LEFT HALF BLOCK + 0x00de: 0x2590, # RIGHT HALF BLOCK + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E + 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA + 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI + 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U + 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x2219, # BULLET OPERATOR + 0x00fa: 0x00b7, # MIDDLE DOT + 0x00fb: 0x221a, # SQUARE ROOT + 0x00fc: 0x2116, # NUMERO SIGN + 0x00fd: 0x00a4, # CURRENCY SIGN + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O + u'\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u258c' # 0x00dd -> LEFT HALF BLOCK + u'\u2590' # 0x00de -> RIGHT HALF BLOCK + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E + u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA + u'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO + u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO + u'\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI + u'\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI + u'\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\u2219' # 0x00f9 -> BULLET OPERATOR + u'\xb7' # 0x00fa -> MIDDLE DOT + u'\u221a' # 0x00fb -> SQUARE ROOT + u'\u2116' # 0x00fc -> NUMERO SIGN + u'\xa4' # 0x00fd -> CURRENCY SIGN + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a4: 0x00fd, # CURRENCY SIGN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b7: 0x00fa, # MIDDLE DOT + 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO + 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI + 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U + 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A + 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE + 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE + 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE + 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE + 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE + 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE + 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE + 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I + 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I + 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA + 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL + 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM + 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN + 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O + 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE + 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER + 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES + 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE + 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U + 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF + 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA + 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE + 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE + 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA + 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA + 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU + 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E + 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU + 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA + 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A + 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE + 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE + 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE + 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE + 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE + 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE + 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE + 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I + 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I + 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA + 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL + 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM + 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN + 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O + 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE + 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER + 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES + 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE + 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U + 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF + 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA + 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE + 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE + 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA + 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA + 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN + 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU + 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN + 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E + 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU + 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA + 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO + 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI + 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U + 0x2116: 0x00fc, # NUMERO SIGN + 0x2219: 0x00f9, # BULLET OPERATOR + 0x221a: 0x00fb, # SQUARE ROOT + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x258c: 0x00dd, # LEFT HALF BLOCK + 0x2590: 0x00de, # RIGHT HALF BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp869.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,689 @@ +""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP869.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp869', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: None, # UNDEFINED + 0x0081: None, # UNDEFINED + 0x0082: None, # UNDEFINED + 0x0083: None, # UNDEFINED + 0x0084: None, # UNDEFINED + 0x0085: None, # UNDEFINED + 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0087: None, # UNDEFINED + 0x0088: 0x00b7, # MIDDLE DOT + 0x0089: 0x00ac, # NOT SIGN + 0x008a: 0x00a6, # BROKEN BAR + 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x008e: 0x2015, # HORIZONTAL BAR + 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x0093: None, # UNDEFINED + 0x0094: None, # UNDEFINED + 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x0097: 0x00a9, # COPYRIGHT SIGN + 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0099: 0x00b2, # SUPERSCRIPT TWO + 0x009a: 0x00b3, # SUPERSCRIPT THREE + 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x009c: 0x00a3, # POUND SIGN + 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS + 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS + 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA + 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA + 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA + 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA + 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON + 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA + 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA + 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA + 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA + 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00b0: 0x2591, # LIGHT SHADE + 0x00b1: 0x2592, # MEDIUM SHADE + 0x00b2: 0x2593, # DARK SHADE + 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL + 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA + 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA + 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU + 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU + 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL + 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI + 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON + 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL + 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI + 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA + 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU + 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON + 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI + 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI + 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI + 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA + 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA + 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA + 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA + 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT + 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x00db: 0x2588, # FULL BLOCK + 0x00dc: 0x2584, # LOWER HALF BLOCK + 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA + 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON + 0x00df: 0x2580, # UPPER HALF BLOCK + 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA + 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA + 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA + 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA + 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA + 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA + 0x00e6: 0x03bc, # GREEK SMALL LETTER MU + 0x00e7: 0x03bd, # GREEK SMALL LETTER NU + 0x00e8: 0x03be, # GREEK SMALL LETTER XI + 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON + 0x00ea: 0x03c0, # GREEK SMALL LETTER PI + 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO + 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA + 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA + 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU + 0x00ef: 0x0384, # GREEK TONOS + 0x00f0: 0x00ad, # SOFT HYPHEN + 0x00f1: 0x00b1, # PLUS-MINUS SIGN + 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON + 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI + 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI + 0x00f5: 0x00a7, # SECTION SIGN + 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI + 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS + 0x00f8: 0x00b0, # DEGREE SIGN + 0x00f9: 0x00a8, # DIAERESIS + 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA + 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x00fe: 0x25a0, # BLACK SQUARE + 0x00ff: 0x00a0, # NO-BREAK SPACE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> NULL + u'\x01' # 0x0001 -> START OF HEADING + u'\x02' # 0x0002 -> START OF TEXT + u'\x03' # 0x0003 -> END OF TEXT + u'\x04' # 0x0004 -> END OF TRANSMISSION + u'\x05' # 0x0005 -> ENQUIRY + u'\x06' # 0x0006 -> ACKNOWLEDGE + u'\x07' # 0x0007 -> BELL + u'\x08' # 0x0008 -> BACKSPACE + u'\t' # 0x0009 -> HORIZONTAL TABULATION + u'\n' # 0x000a -> LINE FEED + u'\x0b' # 0x000b -> VERTICAL TABULATION + u'\x0c' # 0x000c -> FORM FEED + u'\r' # 0x000d -> CARRIAGE RETURN + u'\x0e' # 0x000e -> SHIFT OUT + u'\x0f' # 0x000f -> SHIFT IN + u'\x10' # 0x0010 -> DATA LINK ESCAPE + u'\x11' # 0x0011 -> DEVICE CONTROL ONE + u'\x12' # 0x0012 -> DEVICE CONTROL TWO + u'\x13' # 0x0013 -> DEVICE CONTROL THREE + u'\x14' # 0x0014 -> DEVICE CONTROL FOUR + u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x0016 -> SYNCHRONOUS IDLE + u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x0018 -> CANCEL + u'\x19' # 0x0019 -> END OF MEDIUM + u'\x1a' # 0x001a -> SUBSTITUTE + u'\x1b' # 0x001b -> ESCAPE + u'\x1c' # 0x001c -> FILE SEPARATOR + u'\x1d' # 0x001d -> GROUP SEPARATOR + u'\x1e' # 0x001e -> RECORD SEPARATOR + u'\x1f' # 0x001f -> UNIT SEPARATOR + u' ' # 0x0020 -> SPACE + u'!' # 0x0021 -> EXCLAMATION MARK + u'"' # 0x0022 -> QUOTATION MARK + u'#' # 0x0023 -> NUMBER SIGN + u'$' # 0x0024 -> DOLLAR SIGN + u'%' # 0x0025 -> PERCENT SIGN + u'&' # 0x0026 -> AMPERSAND + u"'" # 0x0027 -> APOSTROPHE + u'(' # 0x0028 -> LEFT PARENTHESIS + u')' # 0x0029 -> RIGHT PARENTHESIS + u'*' # 0x002a -> ASTERISK + u'+' # 0x002b -> PLUS SIGN + u',' # 0x002c -> COMMA + u'-' # 0x002d -> HYPHEN-MINUS + u'.' # 0x002e -> FULL STOP + u'/' # 0x002f -> SOLIDUS + u'0' # 0x0030 -> DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE + u':' # 0x003a -> COLON + u';' # 0x003b -> SEMICOLON + u'<' # 0x003c -> LESS-THAN SIGN + u'=' # 0x003d -> EQUALS SIGN + u'>' # 0x003e -> GREATER-THAN SIGN + u'?' # 0x003f -> QUESTION MARK + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET + u'\\' # 0x005c -> REVERSE SOLIDUS + u']' # 0x005d -> RIGHT SQUARE BRACKET + u'^' # 0x005e -> CIRCUMFLEX ACCENT + u'_' # 0x005f -> LOW LINE + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET + u'|' # 0x007c -> VERTICAL LINE + u'}' # 0x007d -> RIGHT CURLY BRACKET + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> DELETE + u'\ufffe' # 0x0080 -> UNDEFINED + u'\ufffe' # 0x0081 -> UNDEFINED + u'\ufffe' # 0x0082 -> UNDEFINED + u'\ufffe' # 0x0083 -> UNDEFINED + u'\ufffe' # 0x0084 -> UNDEFINED + u'\ufffe' # 0x0085 -> UNDEFINED + u'\u0386' # 0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\ufffe' # 0x0087 -> UNDEFINED + u'\xb7' # 0x0088 -> MIDDLE DOT + u'\xac' # 0x0089 -> NOT SIGN + u'\xa6' # 0x008a -> BROKEN BAR + u'\u2018' # 0x008b -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x008c -> RIGHT SINGLE QUOTATION MARK + u'\u0388' # 0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u2015' # 0x008e -> HORIZONTAL BAR + u'\u0389' # 0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u03aa' # 0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u038c' # 0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\ufffe' # 0x0093 -> UNDEFINED + u'\ufffe' # 0x0094 -> UNDEFINED + u'\u038e' # 0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u03ab' # 0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\xa9' # 0x0097 -> COPYRIGHT SIGN + u'\u038f' # 0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\xb2' # 0x0099 -> SUPERSCRIPT TWO + u'\xb3' # 0x009a -> SUPERSCRIPT THREE + u'\u03ac' # 0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\xa3' # 0x009c -> POUND SIGN + u'\u03ad' # 0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0x009e -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0x009f -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03ca' # 0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u0390' # 0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03cc' # 0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u0391' # 0x00a4 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x00a5 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x00a6 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x00a7 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x00a8 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x00a9 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x00aa -> GREEK CAPITAL LETTER ETA + u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF + u'\u0398' # 0x00ac -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x00ad -> GREEK CAPITAL LETTER IOTA + u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2591' # 0x00b0 -> LIGHT SHADE + u'\u2592' # 0x00b1 -> MEDIUM SHADE + u'\u2593' # 0x00b2 -> DARK SHADE + u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL + u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u039a' # 0x00b5 -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x00b6 -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x00b7 -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x00b8 -> GREEK CAPITAL LETTER NU + u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u039e' # 0x00bd -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x00be -> GREEK CAPITAL LETTER OMICRON + u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u03a0' # 0x00c6 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x00c7 -> GREEK CAPITAL LETTER RHO + u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\u03a3' # 0x00cf -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0x00d0 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x00d1 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x00d2 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x00d3 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x00d4 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x00d5 -> GREEK CAPITAL LETTER OMEGA + u'\u03b1' # 0x00d6 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x00d7 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x00d8 -> GREEK SMALL LETTER GAMMA + u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2588' # 0x00db -> FULL BLOCK + u'\u2584' # 0x00dc -> LOWER HALF BLOCK + u'\u03b4' # 0x00dd -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x00de -> GREEK SMALL LETTER EPSILON + u'\u2580' # 0x00df -> UPPER HALF BLOCK + u'\u03b6' # 0x00e0 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0x00e1 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x00e2 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x00e3 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x00e4 -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x00e5 -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x00e6 -> GREEK SMALL LETTER MU + u'\u03bd' # 0x00e7 -> GREEK SMALL LETTER NU + u'\u03be' # 0x00e8 -> GREEK SMALL LETTER XI + u'\u03bf' # 0x00e9 -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0x00ea -> GREEK SMALL LETTER PI + u'\u03c1' # 0x00eb -> GREEK SMALL LETTER RHO + u'\u03c3' # 0x00ec -> GREEK SMALL LETTER SIGMA + u'\u03c2' # 0x00ed -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c4' # 0x00ee -> GREEK SMALL LETTER TAU + u'\u0384' # 0x00ef -> GREEK TONOS + u'\xad' # 0x00f0 -> SOFT HYPHEN + u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN + u'\u03c5' # 0x00f2 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0x00f3 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0x00f4 -> GREEK SMALL LETTER CHI + u'\xa7' # 0x00f5 -> SECTION SIGN + u'\u03c8' # 0x00f6 -> GREEK SMALL LETTER PSI + u'\u0385' # 0x00f7 -> GREEK DIALYTIKA TONOS + u'\xb0' # 0x00f8 -> DEGREE SIGN + u'\xa8' # 0x00f9 -> DIAERESIS + u'\u03c9' # 0x00fa -> GREEK SMALL LETTER OMEGA + u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03b0' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03ce' # 0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u25a0' # 0x00fe -> BLACK SQUARE + u'\xa0' # 0x00ff -> NO-BREAK SPACE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # NULL + 0x0001: 0x0001, # START OF HEADING + 0x0002: 0x0002, # START OF TEXT + 0x0003: 0x0003, # END OF TEXT + 0x0004: 0x0004, # END OF TRANSMISSION + 0x0005: 0x0005, # ENQUIRY + 0x0006: 0x0006, # ACKNOWLEDGE + 0x0007: 0x0007, # BELL + 0x0008: 0x0008, # BACKSPACE + 0x0009: 0x0009, # HORIZONTAL TABULATION + 0x000a: 0x000a, # LINE FEED + 0x000b: 0x000b, # VERTICAL TABULATION + 0x000c: 0x000c, # FORM FEED + 0x000d: 0x000d, # CARRIAGE RETURN + 0x000e: 0x000e, # SHIFT OUT + 0x000f: 0x000f, # SHIFT IN + 0x0010: 0x0010, # DATA LINK ESCAPE + 0x0011: 0x0011, # DEVICE CONTROL ONE + 0x0012: 0x0012, # DEVICE CONTROL TWO + 0x0013: 0x0013, # DEVICE CONTROL THREE + 0x0014: 0x0014, # DEVICE CONTROL FOUR + 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE + 0x0016: 0x0016, # SYNCHRONOUS IDLE + 0x0017: 0x0017, # END OF TRANSMISSION BLOCK + 0x0018: 0x0018, # CANCEL + 0x0019: 0x0019, # END OF MEDIUM + 0x001a: 0x001a, # SUBSTITUTE + 0x001b: 0x001b, # ESCAPE + 0x001c: 0x001c, # FILE SEPARATOR + 0x001d: 0x001d, # GROUP SEPARATOR + 0x001e: 0x001e, # RECORD SEPARATOR + 0x001f: 0x001f, # UNIT SEPARATOR + 0x0020: 0x0020, # SPACE + 0x0021: 0x0021, # EXCLAMATION MARK + 0x0022: 0x0022, # QUOTATION MARK + 0x0023: 0x0023, # NUMBER SIGN + 0x0024: 0x0024, # DOLLAR SIGN + 0x0025: 0x0025, # PERCENT SIGN + 0x0026: 0x0026, # AMPERSAND + 0x0027: 0x0027, # APOSTROPHE + 0x0028: 0x0028, # LEFT PARENTHESIS + 0x0029: 0x0029, # RIGHT PARENTHESIS + 0x002a: 0x002a, # ASTERISK + 0x002b: 0x002b, # PLUS SIGN + 0x002c: 0x002c, # COMMA + 0x002d: 0x002d, # HYPHEN-MINUS + 0x002e: 0x002e, # FULL STOP + 0x002f: 0x002f, # SOLIDUS + 0x0030: 0x0030, # DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE + 0x003a: 0x003a, # COLON + 0x003b: 0x003b, # SEMICOLON + 0x003c: 0x003c, # LESS-THAN SIGN + 0x003d: 0x003d, # EQUALS SIGN + 0x003e: 0x003e, # GREATER-THAN SIGN + 0x003f: 0x003f, # QUESTION MARK + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET + 0x005c: 0x005c, # REVERSE SOLIDUS + 0x005d: 0x005d, # RIGHT SQUARE BRACKET + 0x005e: 0x005e, # CIRCUMFLEX ACCENT + 0x005f: 0x005f, # LOW LINE + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET + 0x007c: 0x007c, # VERTICAL LINE + 0x007d: 0x007d, # RIGHT CURLY BRACKET + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # DELETE + 0x00a0: 0x00ff, # NO-BREAK SPACE + 0x00a3: 0x009c, # POUND SIGN + 0x00a6: 0x008a, # BROKEN BAR + 0x00a7: 0x00f5, # SECTION SIGN + 0x00a8: 0x00f9, # DIAERESIS + 0x00a9: 0x0097, # COPYRIGHT SIGN + 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00ac: 0x0089, # NOT SIGN + 0x00ad: 0x00f0, # SOFT HYPHEN + 0x00b0: 0x00f8, # DEGREE SIGN + 0x00b1: 0x00f1, # PLUS-MINUS SIGN + 0x00b2: 0x0099, # SUPERSCRIPT TWO + 0x00b3: 0x009a, # SUPERSCRIPT THREE + 0x00b7: 0x0088, # MIDDLE DOT + 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF + 0x0384: 0x00ef, # GREEK TONOS + 0x0385: 0x00f7, # GREEK DIALYTIKA TONOS + 0x0386: 0x0086, # GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388: 0x008d, # GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389: 0x008f, # GREEK CAPITAL LETTER ETA WITH TONOS + 0x038a: 0x0090, # GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038c: 0x0092, # GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038e: 0x0095, # GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038f: 0x0098, # GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0390: 0x00a1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x0391: 0x00a4, # GREEK CAPITAL LETTER ALPHA + 0x0392: 0x00a5, # GREEK CAPITAL LETTER BETA + 0x0393: 0x00a6, # GREEK CAPITAL LETTER GAMMA + 0x0394: 0x00a7, # GREEK CAPITAL LETTER DELTA + 0x0395: 0x00a8, # GREEK CAPITAL LETTER EPSILON + 0x0396: 0x00a9, # GREEK CAPITAL LETTER ZETA + 0x0397: 0x00aa, # GREEK CAPITAL LETTER ETA + 0x0398: 0x00ac, # GREEK CAPITAL LETTER THETA + 0x0399: 0x00ad, # GREEK CAPITAL LETTER IOTA + 0x039a: 0x00b5, # GREEK CAPITAL LETTER KAPPA + 0x039b: 0x00b6, # GREEK CAPITAL LETTER LAMDA + 0x039c: 0x00b7, # GREEK CAPITAL LETTER MU + 0x039d: 0x00b8, # GREEK CAPITAL LETTER NU + 0x039e: 0x00bd, # GREEK CAPITAL LETTER XI + 0x039f: 0x00be, # GREEK CAPITAL LETTER OMICRON + 0x03a0: 0x00c6, # GREEK CAPITAL LETTER PI + 0x03a1: 0x00c7, # GREEK CAPITAL LETTER RHO + 0x03a3: 0x00cf, # GREEK CAPITAL LETTER SIGMA + 0x03a4: 0x00d0, # GREEK CAPITAL LETTER TAU + 0x03a5: 0x00d1, # GREEK CAPITAL LETTER UPSILON + 0x03a6: 0x00d2, # GREEK CAPITAL LETTER PHI + 0x03a7: 0x00d3, # GREEK CAPITAL LETTER CHI + 0x03a8: 0x00d4, # GREEK CAPITAL LETTER PSI + 0x03a9: 0x00d5, # GREEK CAPITAL LETTER OMEGA + 0x03aa: 0x0091, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03ab: 0x0096, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03ac: 0x009b, # GREEK SMALL LETTER ALPHA WITH TONOS + 0x03ad: 0x009d, # GREEK SMALL LETTER EPSILON WITH TONOS + 0x03ae: 0x009e, # GREEK SMALL LETTER ETA WITH TONOS + 0x03af: 0x009f, # GREEK SMALL LETTER IOTA WITH TONOS + 0x03b0: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + 0x03b1: 0x00d6, # GREEK SMALL LETTER ALPHA + 0x03b2: 0x00d7, # GREEK SMALL LETTER BETA + 0x03b3: 0x00d8, # GREEK SMALL LETTER GAMMA + 0x03b4: 0x00dd, # GREEK SMALL LETTER DELTA + 0x03b5: 0x00de, # GREEK SMALL LETTER EPSILON + 0x03b6: 0x00e0, # GREEK SMALL LETTER ZETA + 0x03b7: 0x00e1, # GREEK SMALL LETTER ETA + 0x03b8: 0x00e2, # GREEK SMALL LETTER THETA + 0x03b9: 0x00e3, # GREEK SMALL LETTER IOTA + 0x03ba: 0x00e4, # GREEK SMALL LETTER KAPPA + 0x03bb: 0x00e5, # GREEK SMALL LETTER LAMDA + 0x03bc: 0x00e6, # GREEK SMALL LETTER MU + 0x03bd: 0x00e7, # GREEK SMALL LETTER NU + 0x03be: 0x00e8, # GREEK SMALL LETTER XI + 0x03bf: 0x00e9, # GREEK SMALL LETTER OMICRON + 0x03c0: 0x00ea, # GREEK SMALL LETTER PI + 0x03c1: 0x00eb, # GREEK SMALL LETTER RHO + 0x03c2: 0x00ed, # GREEK SMALL LETTER FINAL SIGMA + 0x03c3: 0x00ec, # GREEK SMALL LETTER SIGMA + 0x03c4: 0x00ee, # GREEK SMALL LETTER TAU + 0x03c5: 0x00f2, # GREEK SMALL LETTER UPSILON + 0x03c6: 0x00f3, # GREEK SMALL LETTER PHI + 0x03c7: 0x00f4, # GREEK SMALL LETTER CHI + 0x03c8: 0x00f6, # GREEK SMALL LETTER PSI + 0x03c9: 0x00fa, # GREEK SMALL LETTER OMEGA + 0x03ca: 0x00a0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA + 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA + 0x03cc: 0x00a2, # GREEK SMALL LETTER OMICRON WITH TONOS + 0x03cd: 0x00a3, # GREEK SMALL LETTER UPSILON WITH TONOS + 0x03ce: 0x00fd, # GREEK SMALL LETTER OMEGA WITH TONOS + 0x2015: 0x008e, # HORIZONTAL BAR + 0x2018: 0x008b, # LEFT SINGLE QUOTATION MARK + 0x2019: 0x008c, # RIGHT SINGLE QUOTATION MARK + 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL + 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL + 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT + 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT + 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT + 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT + 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT + 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL + 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL + 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL + 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT + 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT + 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT + 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT + 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + 0x2580: 0x00df, # UPPER HALF BLOCK + 0x2584: 0x00dc, # LOWER HALF BLOCK + 0x2588: 0x00db, # FULL BLOCK + 0x2591: 0x00b0, # LIGHT SHADE + 0x2592: 0x00b1, # MEDIUM SHADE + 0x2593: 0x00b2, # DARK SHADE + 0x25a0: 0x00fe, # BLACK SQUARE +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp874.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp874 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp874', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u20ac' # 0x80 -> EURO SIGN + u'\ufffe' # 0x81 -> UNDEFINED + u'\ufffe' # 0x82 -> UNDEFINED + u'\ufffe' # 0x83 -> UNDEFINED + u'\ufffe' # 0x84 -> UNDEFINED + u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS + u'\ufffe' # 0x86 -> UNDEFINED + u'\ufffe' # 0x87 -> UNDEFINED + u'\ufffe' # 0x88 -> UNDEFINED + u'\ufffe' # 0x89 -> UNDEFINED + u'\ufffe' # 0x8A -> UNDEFINED + u'\ufffe' # 0x8B -> UNDEFINED + u'\ufffe' # 0x8C -> UNDEFINED + u'\ufffe' # 0x8D -> UNDEFINED + u'\ufffe' # 0x8E -> UNDEFINED + u'\ufffe' # 0x8F -> UNDEFINED + u'\ufffe' # 0x90 -> UNDEFINED + u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK + u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK + u'\u2022' # 0x95 -> BULLET + u'\u2013' # 0x96 -> EN DASH + u'\u2014' # 0x97 -> EM DASH + u'\ufffe' # 0x98 -> UNDEFINED + u'\ufffe' # 0x99 -> UNDEFINED + u'\ufffe' # 0x9A -> UNDEFINED + u'\ufffe' # 0x9B -> UNDEFINED + u'\ufffe' # 0x9C -> UNDEFINED + u'\ufffe' # 0x9D -> UNDEFINED + u'\ufffe' # 0x9E -> UNDEFINED + u'\ufffe' # 0x9F -> UNDEFINED + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO + u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING + u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK + u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA + u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA + u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN + u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA + u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0xC4 -> THAI CHARACTER RU + u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING + u'\u0e26' # 0xC6 -> THAI CHARACTER LU + u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA + u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP + u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG + u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A + u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I + u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II + u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U + u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU + u'\ufffe' # 0xDB -> UNDEFINED + u'\ufffe' # 0xDC -> UNDEFINED + u'\ufffe' # 0xDD -> UNDEFINED + u'\ufffe' # 0xDE -> UNDEFINED + u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E + u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O + u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN + u'\u0e50' # 0xF0 -> THAI DIGIT ZERO + u'\u0e51' # 0xF1 -> THAI DIGIT ONE + u'\u0e52' # 0xF2 -> THAI DIGIT TWO + u'\u0e53' # 0xF3 -> THAI DIGIT THREE + u'\u0e54' # 0xF4 -> THAI DIGIT FOUR + u'\u0e55' # 0xF5 -> THAI DIGIT FIVE + u'\u0e56' # 0xF6 -> THAI DIGIT SIX + u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN + u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT + u'\u0e59' # 0xF9 -> THAI DIGIT NINE + u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT + u'\ufffe' # 0xFC -> UNDEFINED + u'\ufffe' # 0xFD -> UNDEFINED + u'\ufffe' # 0xFE -> UNDEFINED + u'\ufffe' # 0xFF -> UNDEFINED +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp875.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec cp875 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='cp875', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x9c' # 0x04 -> CONTROL + u'\t' # 0x05 -> HORIZONTAL TABULATION + u'\x86' # 0x06 -> CONTROL + u'\x7f' # 0x07 -> DELETE + u'\x97' # 0x08 -> CONTROL + u'\x8d' # 0x09 -> CONTROL + u'\x8e' # 0x0A -> CONTROL + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x9d' # 0x14 -> CONTROL + u'\x85' # 0x15 -> CONTROL + u'\x08' # 0x16 -> BACKSPACE + u'\x87' # 0x17 -> CONTROL + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x92' # 0x1A -> CONTROL + u'\x8f' # 0x1B -> CONTROL + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u'\x80' # 0x20 -> CONTROL + u'\x81' # 0x21 -> CONTROL + u'\x82' # 0x22 -> CONTROL + u'\x83' # 0x23 -> CONTROL + u'\x84' # 0x24 -> CONTROL + u'\n' # 0x25 -> LINE FEED + u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK + u'\x1b' # 0x27 -> ESCAPE + u'\x88' # 0x28 -> CONTROL + u'\x89' # 0x29 -> CONTROL + u'\x8a' # 0x2A -> CONTROL + u'\x8b' # 0x2B -> CONTROL + u'\x8c' # 0x2C -> CONTROL + u'\x05' # 0x2D -> ENQUIRY + u'\x06' # 0x2E -> ACKNOWLEDGE + u'\x07' # 0x2F -> BELL + u'\x90' # 0x30 -> CONTROL + u'\x91' # 0x31 -> CONTROL + u'\x16' # 0x32 -> SYNCHRONOUS IDLE + u'\x93' # 0x33 -> CONTROL + u'\x94' # 0x34 -> CONTROL + u'\x95' # 0x35 -> CONTROL + u'\x96' # 0x36 -> CONTROL + u'\x04' # 0x37 -> END OF TRANSMISSION + u'\x98' # 0x38 -> CONTROL + u'\x99' # 0x39 -> CONTROL + u'\x9a' # 0x3A -> CONTROL + u'\x9b' # 0x3B -> CONTROL + u'\x14' # 0x3C -> DEVICE CONTROL FOUR + u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE + u'\x9e' # 0x3E -> CONTROL + u'\x1a' # 0x3F -> SUBSTITUTE + u' ' # 0x40 -> SPACE + u'\u0391' # 0x41 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0x42 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0x43 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0x44 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0x45 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0x46 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0x47 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0x48 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0x49 -> GREEK CAPITAL LETTER IOTA + u'[' # 0x4A -> LEFT SQUARE BRACKET + u'.' # 0x4B -> FULL STOP + u'<' # 0x4C -> LESS-THAN SIGN + u'(' # 0x4D -> LEFT PARENTHESIS + u'+' # 0x4E -> PLUS SIGN + u'!' # 0x4F -> EXCLAMATION MARK + u'&' # 0x50 -> AMPERSAND + u'\u039a' # 0x51 -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0x52 -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0x53 -> GREEK CAPITAL LETTER MU + u'\u039d' # 0x54 -> GREEK CAPITAL LETTER NU + u'\u039e' # 0x55 -> GREEK CAPITAL LETTER XI + u'\u039f' # 0x56 -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0x57 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0x58 -> GREEK CAPITAL LETTER RHO + u'\u03a3' # 0x59 -> GREEK CAPITAL LETTER SIGMA + u']' # 0x5A -> RIGHT SQUARE BRACKET + u'$' # 0x5B -> DOLLAR SIGN + u'*' # 0x5C -> ASTERISK + u')' # 0x5D -> RIGHT PARENTHESIS + u';' # 0x5E -> SEMICOLON + u'^' # 0x5F -> CIRCUMFLEX ACCENT + u'-' # 0x60 -> HYPHEN-MINUS + u'/' # 0x61 -> SOLIDUS + u'\u03a4' # 0x62 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0x63 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0x64 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0x65 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0x66 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0x67 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0x68 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0x69 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'|' # 0x6A -> VERTICAL LINE + u',' # 0x6B -> COMMA + u'%' # 0x6C -> PERCENT SIGN + u'_' # 0x6D -> LOW LINE + u'>' # 0x6E -> GREATER-THAN SIGN + u'?' # 0x6F -> QUESTION MARK + u'\xa8' # 0x70 -> DIAERESIS + u'\u0386' # 0x71 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\u0388' # 0x72 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0x73 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\xa0' # 0x74 -> NO-BREAK SPACE + u'\u038a' # 0x75 -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u038c' # 0x76 -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\u038e' # 0x77 -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0x78 -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'`' # 0x79 -> GRAVE ACCENT + u':' # 0x7A -> COLON + u'#' # 0x7B -> NUMBER SIGN + u'@' # 0x7C -> COMMERCIAL AT + u"'" # 0x7D -> APOSTROPHE + u'=' # 0x7E -> EQUALS SIGN + u'"' # 0x7F -> QUOTATION MARK + u'\u0385' # 0x80 -> GREEK DIALYTIKA TONOS + u'a' # 0x81 -> LATIN SMALL LETTER A + u'b' # 0x82 -> LATIN SMALL LETTER B + u'c' # 0x83 -> LATIN SMALL LETTER C + u'd' # 0x84 -> LATIN SMALL LETTER D + u'e' # 0x85 -> LATIN SMALL LETTER E + u'f' # 0x86 -> LATIN SMALL LETTER F + u'g' # 0x87 -> LATIN SMALL LETTER G + u'h' # 0x88 -> LATIN SMALL LETTER H + u'i' # 0x89 -> LATIN SMALL LETTER I + u'\u03b1' # 0x8A -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0x8B -> GREEK SMALL LETTER BETA + u'\u03b3' # 0x8C -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0x8D -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0x8E -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0x8F -> GREEK SMALL LETTER ZETA + u'\xb0' # 0x90 -> DEGREE SIGN + u'j' # 0x91 -> LATIN SMALL LETTER J + u'k' # 0x92 -> LATIN SMALL LETTER K + u'l' # 0x93 -> LATIN SMALL LETTER L + u'm' # 0x94 -> LATIN SMALL LETTER M + u'n' # 0x95 -> LATIN SMALL LETTER N + u'o' # 0x96 -> LATIN SMALL LETTER O + u'p' # 0x97 -> LATIN SMALL LETTER P + u'q' # 0x98 -> LATIN SMALL LETTER Q + u'r' # 0x99 -> LATIN SMALL LETTER R + u'\u03b7' # 0x9A -> GREEK SMALL LETTER ETA + u'\u03b8' # 0x9B -> GREEK SMALL LETTER THETA + u'\u03b9' # 0x9C -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0x9D -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0x9E -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0x9F -> GREEK SMALL LETTER MU + u'\xb4' # 0xA0 -> ACUTE ACCENT + u'~' # 0xA1 -> TILDE + u's' # 0xA2 -> LATIN SMALL LETTER S + u't' # 0xA3 -> LATIN SMALL LETTER T + u'u' # 0xA4 -> LATIN SMALL LETTER U + u'v' # 0xA5 -> LATIN SMALL LETTER V + u'w' # 0xA6 -> LATIN SMALL LETTER W + u'x' # 0xA7 -> LATIN SMALL LETTER X + u'y' # 0xA8 -> LATIN SMALL LETTER Y + u'z' # 0xA9 -> LATIN SMALL LETTER Z + u'\u03bd' # 0xAA -> GREEK SMALL LETTER NU + u'\u03be' # 0xAB -> GREEK SMALL LETTER XI + u'\u03bf' # 0xAC -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xAD -> GREEK SMALL LETTER PI + u'\u03c1' # 0xAE -> GREEK SMALL LETTER RHO + u'\u03c3' # 0xAF -> GREEK SMALL LETTER SIGMA + u'\xa3' # 0xB0 -> POUND SIGN + u'\u03ac' # 0xB1 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0xB2 -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xB3 -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03ca' # 0xB4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03af' # 0xB5 -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0xB6 -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0xB7 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03cb' # 0xB8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03ce' # 0xB9 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u03c2' # 0xBA -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c4' # 0xBB -> GREEK SMALL LETTER TAU + u'\u03c5' # 0xBC -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0xBD -> GREEK SMALL LETTER PHI + u'\u03c7' # 0xBE -> GREEK SMALL LETTER CHI + u'\u03c8' # 0xBF -> GREEK SMALL LETTER PSI + u'{' # 0xC0 -> LEFT CURLY BRACKET + u'A' # 0xC1 -> LATIN CAPITAL LETTER A + u'B' # 0xC2 -> LATIN CAPITAL LETTER B + u'C' # 0xC3 -> LATIN CAPITAL LETTER C + u'D' # 0xC4 -> LATIN CAPITAL LETTER D + u'E' # 0xC5 -> LATIN CAPITAL LETTER E + u'F' # 0xC6 -> LATIN CAPITAL LETTER F + u'G' # 0xC7 -> LATIN CAPITAL LETTER G + u'H' # 0xC8 -> LATIN CAPITAL LETTER H + u'I' # 0xC9 -> LATIN CAPITAL LETTER I + u'\xad' # 0xCA -> SOFT HYPHEN + u'\u03c9' # 0xCB -> GREEK SMALL LETTER OMEGA + u'\u0390' # 0xCC -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03b0' # 0xCD -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u2018' # 0xCE -> LEFT SINGLE QUOTATION MARK + u'\u2015' # 0xCF -> HORIZONTAL BAR + u'}' # 0xD0 -> RIGHT CURLY BRACKET + u'J' # 0xD1 -> LATIN CAPITAL LETTER J + u'K' # 0xD2 -> LATIN CAPITAL LETTER K + u'L' # 0xD3 -> LATIN CAPITAL LETTER L + u'M' # 0xD4 -> LATIN CAPITAL LETTER M + u'N' # 0xD5 -> LATIN CAPITAL LETTER N + u'O' # 0xD6 -> LATIN CAPITAL LETTER O + u'P' # 0xD7 -> LATIN CAPITAL LETTER P + u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q + u'R' # 0xD9 -> LATIN CAPITAL LETTER R + u'\xb1' # 0xDA -> PLUS-MINUS SIGN + u'\xbd' # 0xDB -> VULGAR FRACTION ONE HALF + u'\x1a' # 0xDC -> SUBSTITUTE + u'\u0387' # 0xDD -> GREEK ANO TELEIA + u'\u2019' # 0xDE -> RIGHT SINGLE QUOTATION MARK + u'\xa6' # 0xDF -> BROKEN BAR + u'\\' # 0xE0 -> REVERSE SOLIDUS + u'\x1a' # 0xE1 -> SUBSTITUTE + u'S' # 0xE2 -> LATIN CAPITAL LETTER S + u'T' # 0xE3 -> LATIN CAPITAL LETTER T + u'U' # 0xE4 -> LATIN CAPITAL LETTER U + u'V' # 0xE5 -> LATIN CAPITAL LETTER V + u'W' # 0xE6 -> LATIN CAPITAL LETTER W + u'X' # 0xE7 -> LATIN CAPITAL LETTER X + u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y + u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z + u'\xb2' # 0xEA -> SUPERSCRIPT TWO + u'\xa7' # 0xEB -> SECTION SIGN + u'\x1a' # 0xEC -> SUBSTITUTE + u'\x1a' # 0xED -> SUBSTITUTE + u'\xab' # 0xEE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xEF -> NOT SIGN + u'0' # 0xF0 -> DIGIT ZERO + u'1' # 0xF1 -> DIGIT ONE + u'2' # 0xF2 -> DIGIT TWO + u'3' # 0xF3 -> DIGIT THREE + u'4' # 0xF4 -> DIGIT FOUR + u'5' # 0xF5 -> DIGIT FIVE + u'6' # 0xF6 -> DIGIT SIX + u'7' # 0xF7 -> DIGIT SEVEN + u'8' # 0xF8 -> DIGIT EIGHT + u'9' # 0xF9 -> DIGIT NINE + u'\xb3' # 0xFA -> SUPERSCRIPT THREE + u'\xa9' # 0xFB -> COPYRIGHT SIGN + u'\x1a' # 0xFC -> SUBSTITUTE + u'\x1a' # 0xFD -> SUBSTITUTE + u'\xbb' # 0xFE -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\x9f' # 0xFF -> CONTROL +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp932.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# cp932.py: Python Unicode Codec for CP932 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_jp, codecs +import _multibytecodec as mbc + +codec = _codecs_jp.getcodec('cp932') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='cp932', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp949.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# cp949.py: Python Unicode Codec for CP949 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_kr, codecs +import _multibytecodec as mbc + +codec = _codecs_kr.getcodec('cp949') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='cp949', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/cp950.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# cp950.py: Python Unicode Codec for CP950 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_tw, codecs +import _multibytecodec as mbc + +codec = _codecs_tw.getcodec('cp950') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='cp950', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/euc_jis_2004.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_jp, codecs +import _multibytecodec as mbc + +codec = _codecs_jp.getcodec('euc_jis_2004') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='euc_jis_2004', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/euc_jisx0213.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_jp, codecs +import _multibytecodec as mbc + +codec = _codecs_jp.getcodec('euc_jisx0213') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='euc_jisx0213', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/euc_jp.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# euc_jp.py: Python Unicode Codec for EUC_JP +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_jp, codecs +import _multibytecodec as mbc + +codec = _codecs_jp.getcodec('euc_jp') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='euc_jp', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/euc_kr.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# euc_kr.py: Python Unicode Codec for EUC_KR +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_kr, codecs +import _multibytecodec as mbc + +codec = _codecs_kr.getcodec('euc_kr') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='euc_kr', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/gb18030.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# gb18030.py: Python Unicode Codec for GB18030 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_cn, codecs +import _multibytecodec as mbc + +codec = _codecs_cn.getcodec('gb18030') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='gb18030', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/gb2312.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# gb2312.py: Python Unicode Codec for GB2312 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_cn, codecs +import _multibytecodec as mbc + +codec = _codecs_cn.getcodec('gb2312') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='gb2312', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/gbk.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# gbk.py: Python Unicode Codec for GBK +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_cn, codecs +import _multibytecodec as mbc + +codec = _codecs_cn.getcodec('gbk') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='gbk', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/hex_codec.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,80 @@ +""" Python 'hex_codec' Codec - 2-digit hex content transfer encoding + + Unlike most of the other codecs which target Unicode, this codec + will return Python string objects for both encode and decode. + + Written by Marc-Andre Lemburg (mal@lemburg.com). + +""" +import codecs, binascii + +### Codec APIs + +def hex_encode(input,errors='strict'): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = binascii.b2a_hex(input) + return (output, len(input)) + +def hex_decode(input,errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = binascii.a2b_hex(input) + return (output, len(input)) + +class Codec(codecs.Codec): + + def encode(self, input,errors='strict'): + return hex_encode(input,errors) + def decode(self, input,errors='strict'): + return hex_decode(input,errors) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + assert self.errors == 'strict' + return binascii.b2a_hex(input) + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + assert self.errors == 'strict' + return binascii.a2b_hex(input) + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='hex', + encode=hex_encode, + decode=hex_decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + _is_text_encoding=False, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/hp_roman8.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,152 @@ +""" Python Character Mapping Codec generated from 'hp_roman8.txt' with gencodec.py. + + Based on data from ftp://dkuug.dk/i18n/charmaps/HP-ROMAN8 (Keld Simonsen) + + Original source: LaserJet IIP Printer User's Manual HP part no + 33471-90901, Hewlet-Packard, June 1989. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_map) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_map)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='hp-roman8', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x00a1: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x00a2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00a3: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE + 0x00a4: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00a5: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00a6: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00a7: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00a8: 0x00b4, # ACUTE ACCENT + 0x00a9: 0x02cb, # MODIFIER LETTER GRAVE ACCENT (Mandarin Chinese fourth tone) + 0x00aa: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x00ab: 0x00a8, # DIAERESIS + 0x00ac: 0x02dc, # SMALL TILDE + 0x00ad: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE + 0x00ae: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00af: 0x20a4, # LIRA SIGN + 0x00b0: 0x00af, # MACRON + 0x00b1: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00b2: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00b3: 0x00b0, # DEGREE SIGN + 0x00b4: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00b5: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x00b6: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x00b7: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x00b8: 0x00a1, # INVERTED EXCLAMATION MARK + 0x00b9: 0x00bf, # INVERTED QUESTION MARK + 0x00ba: 0x00a4, # CURRENCY SIGN + 0x00bb: 0x00a3, # POUND SIGN + 0x00bc: 0x00a5, # YEN SIGN + 0x00bd: 0x00a7, # SECTION SIGN + 0x00be: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x00bf: 0x00a2, # CENT SIGN + 0x00c0: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00c1: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00c2: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00c3: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00c4: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x00c5: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x00c6: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x00c7: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x00c8: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x00c9: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x00ca: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE + 0x00cb: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x00cc: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00cd: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ce: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00cf: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00d0: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00d1: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00d2: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE + 0x00d3: 0x00c6, # LATIN CAPITAL LETTER AE + 0x00d4: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE + 0x00d5: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x00d6: 0x00f8, # LATIN SMALL LETTER O WITH STROKE + 0x00d7: 0x00e6, # LATIN SMALL LETTER AE + 0x00d8: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00d9: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE + 0x00da: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00db: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00dc: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00dd: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00de: 0x00df, # LATIN SMALL LETTER SHARP S (German) + 0x00df: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00e0: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e1: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE + 0x00e2: 0x00e3, # LATIN SMALL LETTER A WITH TILDE + 0x00e3: 0x00d0, # LATIN CAPITAL LETTER ETH (Icelandic) + 0x00e4: 0x00f0, # LATIN SMALL LETTER ETH (Icelandic) + 0x00e5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00e6: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE + 0x00e7: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00e8: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE + 0x00e9: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ea: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x00eb: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00ec: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00ed: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00ee: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x00ef: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS + 0x00f0: 0x00de, # LATIN CAPITAL LETTER THORN (Icelandic) + 0x00f1: 0x00fe, # LATIN SMALL LETTER THORN (Icelandic) + 0x00f2: 0x00b7, # MIDDLE DOT + 0x00f3: 0x00b5, # MICRO SIGN + 0x00f4: 0x00b6, # PILCROW SIGN + 0x00f5: 0x00be, # VULGAR FRACTION THREE QUARTERS + 0x00f6: 0x2014, # EM DASH + 0x00f7: 0x00bc, # VULGAR FRACTION ONE QUARTER + 0x00f8: 0x00bd, # VULGAR FRACTION ONE HALF + 0x00f9: 0x00aa, # FEMININE ORDINAL INDICATOR + 0x00fa: 0x00ba, # MASCULINE ORDINAL INDICATOR + 0x00fb: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00fc: 0x25a0, # BLACK SQUARE + 0x00fd: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00fe: 0x00b1, # PLUS-MINUS SIGN + 0x00ff: None, +}) + +### Encoding Map + +encoding_map = codecs.make_encoding_map(decoding_map)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/hz.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# hz.py: Python Unicode Codec for HZ +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_cn, codecs +import _multibytecodec as mbc + +codec = _codecs_cn.getcodec('hz') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='hz', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/idna.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,288 @@ +# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) + +import stringprep, re, codecs +from unicodedata import ucd_3_2_0 as unicodedata + +# IDNA section 3.1 +dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]") + +# IDNA section 5 +ace_prefix = "xn--" +uace_prefix = unicode(ace_prefix, "ascii") + +# This assumes query strings, so AllowUnassigned is true +def nameprep(label): + # Map + newlabel = [] + for c in label: + if stringprep.in_table_b1(c): + # Map to nothing + continue + newlabel.append(stringprep.map_table_b2(c)) + label = u"".join(newlabel) + + # Normalize + label = unicodedata.normalize("NFKC", label) + + # Prohibit + for c in label: + if stringprep.in_table_c12(c) or \ + stringprep.in_table_c22(c) or \ + stringprep.in_table_c3(c) or \ + stringprep.in_table_c4(c) or \ + stringprep.in_table_c5(c) or \ + stringprep.in_table_c6(c) or \ + stringprep.in_table_c7(c) or \ + stringprep.in_table_c8(c) or \ + stringprep.in_table_c9(c): + raise UnicodeError("Invalid character %r" % c) + + # Check bidi + RandAL = map(stringprep.in_table_d1, label) + for c in RandAL: + if c: + # There is a RandAL char in the string. Must perform further + # tests: + # 1) The characters in section 5.8 MUST be prohibited. + # This is table C.8, which was already checked + # 2) If a string contains any RandALCat character, the string + # MUST NOT contain any LCat character. + if filter(stringprep.in_table_d2, label): + raise UnicodeError("Violation of BIDI requirement 2") + + # 3) If a string contains any RandALCat character, a + # RandALCat character MUST be the first character of the + # string, and a RandALCat character MUST be the last + # character of the string. + if not RandAL[0] or not RandAL[-1]: + raise UnicodeError("Violation of BIDI requirement 3") + + return label + +def ToASCII(label): + try: + # Step 1: try ASCII + label = label.encode("ascii") + except UnicodeError: + pass + else: + # Skip to step 3: UseSTD3ASCIIRules is false, so + # Skip to step 8. + if 0 < len(label) < 64: + return label + raise UnicodeError("label empty or too long") + + # Step 2: nameprep + label = nameprep(label) + + # Step 3: UseSTD3ASCIIRules is false + # Step 4: try ASCII + try: + label = label.encode("ascii") + except UnicodeError: + pass + else: + # Skip to step 8. + if 0 < len(label) < 64: + return label + raise UnicodeError("label empty or too long") + + # Step 5: Check ACE prefix + if label.startswith(uace_prefix): + raise UnicodeError("Label starts with ACE prefix") + + # Step 6: Encode with PUNYCODE + label = label.encode("punycode") + + # Step 7: Prepend ACE prefix + label = ace_prefix + label + + # Step 8: Check size + if 0 < len(label) < 64: + return label + raise UnicodeError("label empty or too long") + +def ToUnicode(label): + # Step 1: Check for ASCII + if isinstance(label, str): + pure_ascii = True + else: + try: + label = label.encode("ascii") + pure_ascii = True + except UnicodeError: + pure_ascii = False + if not pure_ascii: + # Step 2: Perform nameprep + label = nameprep(label) + # It doesn't say this, but apparently, it should be ASCII now + try: + label = label.encode("ascii") + except UnicodeError: + raise UnicodeError("Invalid character in IDN label") + # Step 3: Check for ACE prefix + if not label.startswith(ace_prefix): + return unicode(label, "ascii") + + # Step 4: Remove ACE prefix + label1 = label[len(ace_prefix):] + + # Step 5: Decode using PUNYCODE + result = label1.decode("punycode") + + # Step 6: Apply ToASCII + label2 = ToASCII(result) + + # Step 7: Compare the result of step 6 with the one of step 3 + # label2 will already be in lower case. + if label.lower() != label2: + raise UnicodeError("IDNA does not round-trip", label, label2) + + # Step 8: return the result of step 5 + return result + +### Codec APIs + +class Codec(codecs.Codec): + def encode(self,input,errors='strict'): + + if errors != 'strict': + # IDNA is quite clear that implementations must be strict + raise UnicodeError("unsupported error handling "+errors) + + if not input: + return "", 0 + + result = [] + labels = dots.split(input) + if labels and len(labels[-1])==0: + trailing_dot = '.' + del labels[-1] + else: + trailing_dot = '' + for label in labels: + result.append(ToASCII(label)) + # Join with U+002E + return ".".join(result)+trailing_dot, len(input) + + def decode(self,input,errors='strict'): + + if errors != 'strict': + raise UnicodeError("Unsupported error handling "+errors) + + if not input: + return u"", 0 + + # IDNA allows decoding to operate on Unicode strings, too. + if isinstance(input, unicode): + labels = dots.split(input) + else: + # Must be ASCII string + input = str(input) + unicode(input, "ascii") + labels = input.split(".") + + if labels and len(labels[-1]) == 0: + trailing_dot = u'.' + del labels[-1] + else: + trailing_dot = u'' + + result = [] + for label in labels: + result.append(ToUnicode(label)) + + return u".".join(result)+trailing_dot, len(input) + +class IncrementalEncoder(codecs.BufferedIncrementalEncoder): + def _buffer_encode(self, input, errors, final): + if errors != 'strict': + # IDNA is quite clear that implementations must be strict + raise UnicodeError("unsupported error handling "+errors) + + if not input: + return ("", 0) + + labels = dots.split(input) + trailing_dot = u'' + if labels: + if not labels[-1]: + trailing_dot = '.' + del labels[-1] + elif not final: + # Keep potentially unfinished label until the next call + del labels[-1] + if labels: + trailing_dot = '.' + + result = [] + size = 0 + for label in labels: + result.append(ToASCII(label)) + if size: + size += 1 + size += len(label) + + # Join with U+002E + result = ".".join(result) + trailing_dot + size += len(trailing_dot) + return (result, size) + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def _buffer_decode(self, input, errors, final): + if errors != 'strict': + raise UnicodeError("Unsupported error handling "+errors) + + if not input: + return (u"", 0) + + # IDNA allows decoding to operate on Unicode strings, too. + if isinstance(input, unicode): + labels = dots.split(input) + else: + # Must be ASCII string + input = str(input) + unicode(input, "ascii") + labels = input.split(".") + + trailing_dot = u'' + if labels: + if not labels[-1]: + trailing_dot = u'.' + del labels[-1] + elif not final: + # Keep potentially unfinished label until the next call + del labels[-1] + if labels: + trailing_dot = u'.' + + result = [] + size = 0 + for label in labels: + result.append(ToUnicode(label)) + if size: + size += 1 + size += len(label) + + result = u".".join(result) + trailing_dot + size += len(trailing_dot) + return (result, size) + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='idna', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso2022_jp.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# iso2022_jp.py: Python Unicode Codec for ISO2022_JP +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_iso2022, codecs +import _multibytecodec as mbc + +codec = _codecs_iso2022.getcodec('iso2022_jp') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='iso2022_jp', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso2022_jp_1.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_iso2022, codecs +import _multibytecodec as mbc + +codec = _codecs_iso2022.getcodec('iso2022_jp_1') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='iso2022_jp_1', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso2022_jp_2.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_iso2022, codecs +import _multibytecodec as mbc + +codec = _codecs_iso2022.getcodec('iso2022_jp_2') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='iso2022_jp_2', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso2022_jp_2004.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_iso2022, codecs +import _multibytecodec as mbc + +codec = _codecs_iso2022.getcodec('iso2022_jp_2004') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='iso2022_jp_2004', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso2022_jp_3.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_iso2022, codecs +import _multibytecodec as mbc + +codec = _codecs_iso2022.getcodec('iso2022_jp_3') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='iso2022_jp_3', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso2022_jp_ext.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_iso2022, codecs +import _multibytecodec as mbc + +codec = _codecs_iso2022.getcodec('iso2022_jp_ext') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='iso2022_jp_ext', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso2022_kr.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# iso2022_kr.py: Python Unicode Codec for ISO2022_KR +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_iso2022, codecs +import _multibytecodec as mbc + +codec = _codecs_iso2022.getcodec('iso2022_kr') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='iso2022_kr', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_1.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_1 generated from 'MAPPINGS/ISO8859/8859-1.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-1', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_10.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_10 generated from 'MAPPINGS/ISO8859/8859-10.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-10', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0112' # 0xA2 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0122' # 0xA3 -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u012a' # 0xA4 -> LATIN CAPITAL LETTER I WITH MACRON + u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE + u'\u0136' # 0xA6 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u013b' # 0xA8 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0110' # 0xA9 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0160' # 0xAA -> LATIN CAPITAL LETTER S WITH CARON + u'\u0166' # 0xAB -> LATIN CAPITAL LETTER T WITH STROKE + u'\u017d' # 0xAC -> LATIN CAPITAL LETTER Z WITH CARON + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u016a' # 0xAE -> LATIN CAPITAL LETTER U WITH MACRON + u'\u014a' # 0xAF -> LATIN CAPITAL LETTER ENG + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u0113' # 0xB2 -> LATIN SMALL LETTER E WITH MACRON + u'\u0123' # 0xB3 -> LATIN SMALL LETTER G WITH CEDILLA + u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON + u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE + u'\u0137' # 0xB6 -> LATIN SMALL LETTER K WITH CEDILLA + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u013c' # 0xB8 -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0111' # 0xB9 -> LATIN SMALL LETTER D WITH STROKE + u'\u0161' # 0xBA -> LATIN SMALL LETTER S WITH CARON + u'\u0167' # 0xBB -> LATIN SMALL LETTER T WITH STROKE + u'\u017e' # 0xBC -> LATIN SMALL LETTER Z WITH CARON + u'\u2015' # 0xBD -> HORIZONTAL BAR + u'\u016b' # 0xBE -> LATIN SMALL LETTER U WITH MACRON + u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG + u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) + u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u0168' # 0xD7 -> LATIN CAPITAL LETTER U WITH TILDE + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) + u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) + u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u0169' # 0xF7 -> LATIN SMALL LETTER U WITH TILDE + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) + u'\u0138' # 0xFF -> LATIN SMALL LETTER KRA +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_11.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_11 generated from 'MAPPINGS/ISO8859/8859-11.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-11', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO + u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING + u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK + u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA + u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA + u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN + u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA + u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0xC4 -> THAI CHARACTER RU + u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING + u'\u0e26' # 0xC6 -> THAI CHARACTER LU + u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA + u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP + u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG + u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A + u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I + u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II + u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U + u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E + u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O + u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN + u'\u0e50' # 0xF0 -> THAI DIGIT ZERO + u'\u0e51' # 0xF1 -> THAI DIGIT ONE + u'\u0e52' # 0xF2 -> THAI DIGIT TWO + u'\u0e53' # 0xF3 -> THAI DIGIT THREE + u'\u0e54' # 0xF4 -> THAI DIGIT FOUR + u'\u0e55' # 0xF5 -> THAI DIGIT FIVE + u'\u0e56' # 0xF6 -> THAI DIGIT SIX + u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN + u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT + u'\u0e59' # 0xF9 -> THAI DIGIT NINE + u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_13.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_13 generated from 'MAPPINGS/ISO8859/8859-13.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-13', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u201d' # 0xA1 -> RIGHT DOUBLE QUOTATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u201c' # 0xB4 -> LEFT DOUBLE QUOTATION MARK + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xe6' # 0xBF -> LATIN SMALL LETTER AE + u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON + u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) + u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK + u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK + u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON + u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK + u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE + u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA + u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON + u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA + u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK + u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE + u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE + u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON + u'\u2019' # 0xFF -> RIGHT SINGLE QUOTATION MARK +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_14.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_14 generated from 'MAPPINGS/ISO8859/8859-14.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-14', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u1e02' # 0xA1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE + u'\u1e03' # 0xA2 -> LATIN SMALL LETTER B WITH DOT ABOVE + u'\xa3' # 0xA3 -> POUND SIGN + u'\u010a' # 0xA4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE + u'\u010b' # 0xA5 -> LATIN SMALL LETTER C WITH DOT ABOVE + u'\u1e0a' # 0xA6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u1e80' # 0xA8 -> LATIN CAPITAL LETTER W WITH GRAVE + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u1e82' # 0xAA -> LATIN CAPITAL LETTER W WITH ACUTE + u'\u1e0b' # 0xAB -> LATIN SMALL LETTER D WITH DOT ABOVE + u'\u1ef2' # 0xAC -> LATIN CAPITAL LETTER Y WITH GRAVE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\u0178' # 0xAF -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u1e1e' # 0xB0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE + u'\u1e1f' # 0xB1 -> LATIN SMALL LETTER F WITH DOT ABOVE + u'\u0120' # 0xB2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE + u'\u0121' # 0xB3 -> LATIN SMALL LETTER G WITH DOT ABOVE + u'\u1e40' # 0xB4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE + u'\u1e41' # 0xB5 -> LATIN SMALL LETTER M WITH DOT ABOVE + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\u1e56' # 0xB7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE + u'\u1e81' # 0xB8 -> LATIN SMALL LETTER W WITH GRAVE + u'\u1e57' # 0xB9 -> LATIN SMALL LETTER P WITH DOT ABOVE + u'\u1e83' # 0xBA -> LATIN SMALL LETTER W WITH ACUTE + u'\u1e60' # 0xBB -> LATIN CAPITAL LETTER S WITH DOT ABOVE + u'\u1ef3' # 0xBC -> LATIN SMALL LETTER Y WITH GRAVE + u'\u1e84' # 0xBD -> LATIN CAPITAL LETTER W WITH DIAERESIS + u'\u1e85' # 0xBE -> LATIN SMALL LETTER W WITH DIAERESIS + u'\u1e61' # 0xBF -> LATIN SMALL LETTER S WITH DOT ABOVE + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0174' # 0xD0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u1e6a' # 0xD7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0176' # 0xDE -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0175' # 0xF0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u1e6b' # 0xF7 -> LATIN SMALL LETTER T WITH DOT ABOVE + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0177' # 0xFE -> LATIN SMALL LETTER Y WITH CIRCUMFLEX + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_15.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_15 generated from 'MAPPINGS/ISO8859/8859-15.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-15', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\u20ac' # 0xA4 -> EURO SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE + u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_16.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_16 generated from 'MAPPINGS/ISO8859/8859-16.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-16', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0105' # 0xA2 -> LATIN SMALL LETTER A WITH OGONEK + u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\u20ac' # 0xA4 -> EURO SIGN + u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK + u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON + u'\xa7' # 0xA7 -> SECTION SIGN + u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u0218' # 0xAA -> LATIN CAPITAL LETTER S WITH COMMA BELOW + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u017a' # 0xAE -> LATIN SMALL LETTER Z WITH ACUTE + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u010c' # 0xB2 -> LATIN CAPITAL LETTER C WITH CARON + u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE + u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON + u'\u201d' # 0xB5 -> RIGHT DOUBLE QUOTATION MARK + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON + u'\u010d' # 0xB9 -> LATIN SMALL LETTER C WITH CARON + u'\u0219' # 0xBA -> LATIN SMALL LETTER S WITH COMMA BELOW + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE + u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0106' # 0xC5 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\u015a' # 0xD7 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u0170' # 0xD8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0118' # 0xDD -> LATIN CAPITAL LETTER E WITH OGONEK + u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0107' # 0xE5 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\u015b' # 0xF7 -> LATIN SMALL LETTER S WITH ACUTE + u'\u0171' # 0xF8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0119' # 0xFD -> LATIN SMALL LETTER E WITH OGONEK + u'\u021b' # 0xFE -> LATIN SMALL LETTER T WITH COMMA BELOW + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_2.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_2 generated from 'MAPPINGS/ISO8859/8859-2.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-2', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u02d8' # 0xA2 -> BREVE + u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u013d' # 0xA5 -> LATIN CAPITAL LETTER L WITH CARON + u'\u015a' # 0xA6 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u0164' # 0xAB -> LATIN CAPITAL LETTER T WITH CARON + u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u02db' # 0xB2 -> OGONEK + u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\u013e' # 0xB5 -> LATIN SMALL LETTER L WITH CARON + u'\u015b' # 0xB6 -> LATIN SMALL LETTER S WITH ACUTE + u'\u02c7' # 0xB7 -> CARON + u'\xb8' # 0xB8 -> CEDILLA + u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON + u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA + u'\u0165' # 0xBB -> LATIN SMALL LETTER T WITH CARON + u'\u017a' # 0xBC -> LATIN SMALL LETTER Z WITH ACUTE + u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT + u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON + u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE + u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON + u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA + u'\u02d9' # 0xFF -> DOT ABOVE +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_3.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_3 generated from 'MAPPINGS/ISO8859/8859-3.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-3', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0126' # 0xA1 -> LATIN CAPITAL LETTER H WITH STROKE + u'\u02d8' # 0xA2 -> BREVE + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\ufffe' + u'\u0124' # 0xA6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\u0130' # 0xA9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u011e' # 0xAB -> LATIN CAPITAL LETTER G WITH BREVE + u'\u0134' # 0xAC -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX + u'\xad' # 0xAD -> SOFT HYPHEN + u'\ufffe' + u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0127' # 0xB1 -> LATIN SMALL LETTER H WITH STROKE + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u0125' # 0xB6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\u0131' # 0xB9 -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA + u'\u011f' # 0xBB -> LATIN SMALL LETTER G WITH BREVE + u'\u0135' # 0xBC -> LATIN SMALL LETTER J WITH CIRCUMFLEX + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\ufffe' + u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\ufffe' + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u010a' # 0xC5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE + u'\u0108' # 0xC6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\ufffe' + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0120' # 0xD5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\u011c' # 0xD8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u016c' # 0xDD -> LATIN CAPITAL LETTER U WITH BREVE + u'\u015c' # 0xDE -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\ufffe' + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u010b' # 0xE5 -> LATIN SMALL LETTER C WITH DOT ABOVE + u'\u0109' # 0xE6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\ufffe' + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\u0121' # 0xF5 -> LATIN SMALL LETTER G WITH DOT ABOVE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\u011d' # 0xF8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u016d' # 0xFD -> LATIN SMALL LETTER U WITH BREVE + u'\u015d' # 0xFE -> LATIN SMALL LETTER S WITH CIRCUMFLEX + u'\u02d9' # 0xFF -> DOT ABOVE +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_4.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_4 generated from 'MAPPINGS/ISO8859/8859-4.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-4', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\u0138' # 0xA2 -> LATIN SMALL LETTER KRA + u'\u0156' # 0xA3 -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE + u'\u013b' # 0xA6 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u0112' # 0xAA -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0122' # 0xAB -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u0166' # 0xAC -> LATIN CAPITAL LETTER T WITH STROKE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK + u'\u02db' # 0xB2 -> OGONEK + u'\u0157' # 0xB3 -> LATIN SMALL LETTER R WITH CEDILLA + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE + u'\u013c' # 0xB6 -> LATIN SMALL LETTER L WITH CEDILLA + u'\u02c7' # 0xB7 -> CARON + u'\xb8' # 0xB8 -> CEDILLA + u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON + u'\u0113' # 0xBA -> LATIN SMALL LETTER E WITH MACRON + u'\u0123' # 0xBB -> LATIN SMALL LETTER G WITH CEDILLA + u'\u0167' # 0xBC -> LATIN SMALL LETTER T WITH STROKE + u'\u014a' # 0xBD -> LATIN CAPITAL LETTER ENG + u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON + u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG + u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\u012a' # 0xCF -> LATIN CAPITAL LETTER I WITH MACRON + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON + u'\u0136' # 0xD3 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0168' # 0xDD -> LATIN CAPITAL LETTER U WITH TILDE + u'\u016a' # 0xDE -> LATIN CAPITAL LETTER U WITH MACRON + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\u012b' # 0xEF -> LATIN SMALL LETTER I WITH MACRON + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON + u'\u0137' # 0xF3 -> LATIN SMALL LETTER K WITH CEDILLA + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0169' # 0xFD -> LATIN SMALL LETTER U WITH TILDE + u'\u016b' # 0xFE -> LATIN SMALL LETTER U WITH MACRON + u'\u02d9' # 0xFF -> DOT ABOVE +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_5.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_5 generated from 'MAPPINGS/ISO8859/8859-5.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-5', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u0401' # 0xA1 -> CYRILLIC CAPITAL LETTER IO + u'\u0402' # 0xA2 -> CYRILLIC CAPITAL LETTER DJE + u'\u0403' # 0xA3 -> CYRILLIC CAPITAL LETTER GJE + u'\u0404' # 0xA4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0405' # 0xA5 -> CYRILLIC CAPITAL LETTER DZE + u'\u0406' # 0xA6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0407' # 0xA7 -> CYRILLIC CAPITAL LETTER YI + u'\u0408' # 0xA8 -> CYRILLIC CAPITAL LETTER JE + u'\u0409' # 0xA9 -> CYRILLIC CAPITAL LETTER LJE + u'\u040a' # 0xAA -> CYRILLIC CAPITAL LETTER NJE + u'\u040b' # 0xAB -> CYRILLIC CAPITAL LETTER TSHE + u'\u040c' # 0xAC -> CYRILLIC CAPITAL LETTER KJE + u'\xad' # 0xAD -> SOFT HYPHEN + u'\u040e' # 0xAE -> CYRILLIC CAPITAL LETTER SHORT U + u'\u040f' # 0xAF -> CYRILLIC CAPITAL LETTER DZHE + u'\u0410' # 0xB0 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xB1 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0xB2 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0xB3 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0xB4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xB5 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0xB6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0xB7 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0xB8 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xB9 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xBA -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xBB -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xBC -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xBD -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xBE -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xBF -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0xC0 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xC1 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xC2 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xC3 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0xC4 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0xC5 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0xC6 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0xC7 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0xC8 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0xC9 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0xCA -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0xCB -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0xCC -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0xCD -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0xCE -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0xCF -> CYRILLIC CAPITAL LETTER YA + u'\u0430' # 0xD0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xD1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0xD2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0xD3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0xD4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xD5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0xD7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0xD8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xD9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xDA -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xDB -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xDC -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xDD -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xDE -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xDF -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0xE0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xE1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xE2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xE3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0xE4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0xE5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0xE6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0xE7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0xE8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0xE9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0xEA -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0xEB -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0xEC -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0xED -> CYRILLIC SMALL LETTER E + u'\u044e' # 0xEE -> CYRILLIC SMALL LETTER YU + u'\u044f' # 0xEF -> CYRILLIC SMALL LETTER YA + u'\u2116' # 0xF0 -> NUMERO SIGN + u'\u0451' # 0xF1 -> CYRILLIC SMALL LETTER IO + u'\u0452' # 0xF2 -> CYRILLIC SMALL LETTER DJE + u'\u0453' # 0xF3 -> CYRILLIC SMALL LETTER GJE + u'\u0454' # 0xF4 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0455' # 0xF5 -> CYRILLIC SMALL LETTER DZE + u'\u0456' # 0xF6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0xF7 -> CYRILLIC SMALL LETTER YI + u'\u0458' # 0xF8 -> CYRILLIC SMALL LETTER JE + u'\u0459' # 0xF9 -> CYRILLIC SMALL LETTER LJE + u'\u045a' # 0xFA -> CYRILLIC SMALL LETTER NJE + u'\u045b' # 0xFB -> CYRILLIC SMALL LETTER TSHE + u'\u045c' # 0xFC -> CYRILLIC SMALL LETTER KJE + u'\xa7' # 0xFD -> SECTION SIGN + u'\u045e' # 0xFE -> CYRILLIC SMALL LETTER SHORT U + u'\u045f' # 0xFF -> CYRILLIC SMALL LETTER DZHE +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_6.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_6 generated from 'MAPPINGS/ISO8859/8859-6.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-6', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u060c' # 0xAC -> ARABIC COMMA + u'\xad' # 0xAD -> SOFT HYPHEN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u061b' # 0xBB -> ARABIC SEMICOLON + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u061f' # 0xBF -> ARABIC QUESTION MARK + u'\ufffe' + u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA + u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0xC7 -> ARABIC LETTER ALEF + u'\u0628' # 0xC8 -> ARABIC LETTER BEH + u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0xCA -> ARABIC LETTER TEH + u'\u062b' # 0xCB -> ARABIC LETTER THEH + u'\u062c' # 0xCC -> ARABIC LETTER JEEM + u'\u062d' # 0xCD -> ARABIC LETTER HAH + u'\u062e' # 0xCE -> ARABIC LETTER KHAH + u'\u062f' # 0xCF -> ARABIC LETTER DAL + u'\u0630' # 0xD0 -> ARABIC LETTER THAL + u'\u0631' # 0xD1 -> ARABIC LETTER REH + u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN + u'\u0633' # 0xD3 -> ARABIC LETTER SEEN + u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN + u'\u0635' # 0xD5 -> ARABIC LETTER SAD + u'\u0636' # 0xD6 -> ARABIC LETTER DAD + u'\u0637' # 0xD7 -> ARABIC LETTER TAH + u'\u0638' # 0xD8 -> ARABIC LETTER ZAH + u'\u0639' # 0xD9 -> ARABIC LETTER AIN + u'\u063a' # 0xDA -> ARABIC LETTER GHAIN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u0640' # 0xE0 -> ARABIC TATWEEL + u'\u0641' # 0xE1 -> ARABIC LETTER FEH + u'\u0642' # 0xE2 -> ARABIC LETTER QAF + u'\u0643' # 0xE3 -> ARABIC LETTER KAF + u'\u0644' # 0xE4 -> ARABIC LETTER LAM + u'\u0645' # 0xE5 -> ARABIC LETTER MEEM + u'\u0646' # 0xE6 -> ARABIC LETTER NOON + u'\u0647' # 0xE7 -> ARABIC LETTER HEH + u'\u0648' # 0xE8 -> ARABIC LETTER WAW + u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0xEA -> ARABIC LETTER YEH + u'\u064b' # 0xEB -> ARABIC FATHATAN + u'\u064c' # 0xEC -> ARABIC DAMMATAN + u'\u064d' # 0xED -> ARABIC KASRATAN + u'\u064e' # 0xEE -> ARABIC FATHA + u'\u064f' # 0xEF -> ARABIC DAMMA + u'\u0650' # 0xF0 -> ARABIC KASRA + u'\u0651' # 0xF1 -> ARABIC SHADDA + u'\u0652' # 0xF2 -> ARABIC SUKUN + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_7.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_7 generated from 'MAPPINGS/ISO8859/8859-7.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-7', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\u2018' # 0xA1 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xA2 -> RIGHT SINGLE QUOTATION MARK + u'\xa3' # 0xA3 -> POUND SIGN + u'\u20ac' # 0xA4 -> EURO SIGN + u'\u20af' # 0xA5 -> DRACHMA SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u037a' # 0xAA -> GREEK YPOGEGRAMMENI + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\ufffe' + u'\u2015' # 0xAF -> HORIZONTAL BAR + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\u0384' # 0xB4 -> GREEK TONOS + u'\u0385' # 0xB5 -> GREEK DIALYTIKA TONOS + u'\u0386' # 0xB6 -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA + u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA + u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA + u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA + u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA + u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA + u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA + u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU + u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU + u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI + u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON + u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI + u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO + u'\ufffe' + u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA + u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU + u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON + u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI + u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI + u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA + u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA + u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA + u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON + u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA + u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA + u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA + u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA + u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU + u'\u03bd' # 0xED -> GREEK SMALL LETTER NU + u'\u03be' # 0xEE -> GREEK SMALL LETTER XI + u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI + u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO + u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU + u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON + u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI + u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI + u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI + u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA + u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\ufffe' +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_8.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_8 generated from 'MAPPINGS/ISO8859/8859-8.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-8', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\ufffe' + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xd7' # 0xAA -> MULTIPLICATION SIGN + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xf7' # 0xBA -> DIVISION SIGN + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u2017' # 0xDF -> DOUBLE LOW LINE + u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF + u'\u05d1' # 0xE1 -> HEBREW LETTER BET + u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL + u'\u05d3' # 0xE3 -> HEBREW LETTER DALET + u'\u05d4' # 0xE4 -> HEBREW LETTER HE + u'\u05d5' # 0xE5 -> HEBREW LETTER VAV + u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN + u'\u05d7' # 0xE7 -> HEBREW LETTER HET + u'\u05d8' # 0xE8 -> HEBREW LETTER TET + u'\u05d9' # 0xE9 -> HEBREW LETTER YOD + u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF + u'\u05db' # 0xEB -> HEBREW LETTER KAF + u'\u05dc' # 0xEC -> HEBREW LETTER LAMED + u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM + u'\u05de' # 0xEE -> HEBREW LETTER MEM + u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN + u'\u05e0' # 0xF0 -> HEBREW LETTER NUN + u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH + u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN + u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE + u'\u05e4' # 0xF4 -> HEBREW LETTER PE + u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI + u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI + u'\u05e7' # 0xF7 -> HEBREW LETTER QOF + u'\u05e8' # 0xF8 -> HEBREW LETTER RESH + u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN + u'\u05ea' # 0xFA -> HEBREW LETTER TAV + u'\ufffe' + u'\ufffe' + u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK + u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK + u'\ufffe' +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/iso8859_9.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec iso8859_9 generated from 'MAPPINGS/ISO8859/8859-9.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-9', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\xa0' # 0xA0 -> NO-BREAK SPACE + u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa4' # 0xA4 -> CURRENCY SIGN + u'\xa5' # 0xA5 -> YEN SIGN + u'\xa6' # 0xA6 -> BROKEN BAR + u'\xa7' # 0xA7 -> SECTION SIGN + u'\xa8' # 0xA8 -> DIAERESIS + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR + u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xac' # 0xAC -> NOT SIGN + u'\xad' # 0xAD -> SOFT HYPHEN + u'\xae' # 0xAE -> REGISTERED SIGN + u'\xaf' # 0xAF -> MACRON + u'\xb0' # 0xB0 -> DEGREE SIGN + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\xb2' # 0xB2 -> SUPERSCRIPT TWO + u'\xb3' # 0xB3 -> SUPERSCRIPT THREE + u'\xb4' # 0xB4 -> ACUTE ACCENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\xb6' # 0xB6 -> PILCROW SIGN + u'\xb7' # 0xB7 -> MIDDLE DOT + u'\xb8' # 0xB8 -> CEDILLA + u'\xb9' # 0xB9 -> SUPERSCRIPT ONE + u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR + u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER + u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF + u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS + u'\xbf' # 0xBF -> INVERTED QUESTION MARK + u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE + u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE + u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE + u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE + u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xd7' # 0xD7 -> MULTIPLICATION SIGN + u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE + u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S + u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE + u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE + u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE + u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE + u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE + u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE + u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE + u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE + u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE + u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0xF7 -> DIVISION SIGN + u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE + u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE + u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE + u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I + u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA + u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/johab.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# johab.py: Python Unicode Codec for JOHAB +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_kr, codecs +import _multibytecodec as mbc + +codec = _codecs_kr.getcodec('johab') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='johab', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/koi8_r.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec koi8_r generated from 'MAPPINGS/VENDORS/MISC/KOI8-R.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='koi8-r', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL + u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u2580' # 0x8B -> UPPER HALF BLOCK + u'\u2584' # 0x8C -> LOWER HALF BLOCK + u'\u2588' # 0x8D -> FULL BLOCK + u'\u258c' # 0x8E -> LEFT HALF BLOCK + u'\u2590' # 0x8F -> RIGHT HALF BLOCK + u'\u2591' # 0x90 -> LIGHT SHADE + u'\u2592' # 0x91 -> MEDIUM SHADE + u'\u2593' # 0x92 -> DARK SHADE + u'\u2320' # 0x93 -> TOP HALF INTEGRAL + u'\u25a0' # 0x94 -> BLACK SQUARE + u'\u2219' # 0x95 -> BULLET OPERATOR + u'\u221a' # 0x96 -> SQUARE ROOT + u'\u2248' # 0x97 -> ALMOST EQUAL TO + u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO + u'\xa0' # 0x9A -> NO-BREAK SPACE + u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL + u'\xb0' # 0x9C -> DEGREE SIGN + u'\xb2' # 0x9D -> SUPERSCRIPT TWO + u'\xb7' # 0x9E -> MIDDLE DOT + u'\xf7' # 0x9F -> DIVISION SIGN + u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO + u'\u2553' # 0xA4 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u2555' # 0xA6 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + u'\u2556' # 0xA7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u255c' # 0xAD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO + u'\u2562' # 0xB4 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u2564' # 0xB6 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + u'\u2565' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u256b' # 0xBD -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa9' # 0xBF -> COPYRIGHT SIGN + u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU + u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE + u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE + u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE + u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF + u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE + u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA + u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE + u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA + u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U + u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE + u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE + u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU + u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE + u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA + u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E + u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA + u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE + u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU + u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE + u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE + u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE + u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF + u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE + u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA + u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE + u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA + u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U + u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE + u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU + u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE + u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA + u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E + u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE + u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/koi8_u.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec koi8_u generated from 'python-mappings/KOI8-U.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='koi8-u', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL + u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL + u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT + u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT + u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT + u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT + u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT + u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT + u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL + u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + u'\u2580' # 0x8B -> UPPER HALF BLOCK + u'\u2584' # 0x8C -> LOWER HALF BLOCK + u'\u2588' # 0x8D -> FULL BLOCK + u'\u258c' # 0x8E -> LEFT HALF BLOCK + u'\u2590' # 0x8F -> RIGHT HALF BLOCK + u'\u2591' # 0x90 -> LIGHT SHADE + u'\u2592' # 0x91 -> MEDIUM SHADE + u'\u2593' # 0x92 -> DARK SHADE + u'\u2320' # 0x93 -> TOP HALF INTEGRAL + u'\u25a0' # 0x94 -> BLACK SQUARE + u'\u2219' # 0x95 -> BULLET OPERATOR + u'\u221a' # 0x96 -> SQUARE ROOT + u'\u2248' # 0x97 -> ALMOST EQUAL TO + u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO + u'\xa0' # 0x9A -> NO-BREAK SPACE + u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL + u'\xb0' # 0x9C -> DEGREE SIGN + u'\xb2' # 0x9D -> SUPERSCRIPT TWO + u'\xb7' # 0x9E -> MIDDLE DOT + u'\xf7' # 0x9F -> DIVISION SIGN + u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL + u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL + u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO + u'\u0454' # 0xA4 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT + u'\u0456' # 0xA6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0457' # 0xA7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN) + u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT + u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT + u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + u'\u0491' # 0xAD -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN + u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT + u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO + u'\u0404' # 0xB4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT + u'\u0406' # 0xB6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\u0407' # 0xB7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN) + u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL + u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + u'\u0490' # 0xBD -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN + u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + u'\xa9' # 0xBF -> COPYRIGHT SIGN + u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU + u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE + u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE + u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE + u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF + u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE + u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA + u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE + u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA + u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U + u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE + u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE + u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU + u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE + u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA + u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E + u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA + u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE + u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN + u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU + u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE + u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE + u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE + u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF + u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE + u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA + u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE + u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA + u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U + u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE + u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU + u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE + u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA + u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E + u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA + u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE + u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/latin_1.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,50 @@ +""" Python 'latin-1' Codec + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + # Note: Binding these as C functions will result in the class not + # converting them to methods. This is intended. + encode = codecs.latin_1_encode + decode = codecs.latin_1_decode + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.latin_1_encode(input,self.errors)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.latin_1_decode(input,self.errors)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +class StreamConverter(StreamWriter,StreamReader): + + encode = codecs.latin_1_decode + decode = codecs.latin_1_encode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='iso8859-1', + encode=Codec.encode, + decode=Codec.decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mac_arabic.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,698 @@ +""" Python Character Mapping Codec generated from 'VENDORS/APPLE/ARABIC.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mac-arabic', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x00a0, # NO-BREAK SPACE, right-left + 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE + 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x06ba, # ARABIC LETTER NOON GHUNNA + 0x008c: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE + 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x2026, # HORIZONTAL ELLIPSIS, right-left + 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS + 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f7, # DIVISION SIGN, right-left + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE + 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x0020, # SPACE, right-left + 0x00a1: 0x0021, # EXCLAMATION MARK, right-left + 0x00a2: 0x0022, # QUOTATION MARK, right-left + 0x00a3: 0x0023, # NUMBER SIGN, right-left + 0x00a4: 0x0024, # DOLLAR SIGN, right-left + 0x00a5: 0x066a, # ARABIC PERCENT SIGN + 0x00a6: 0x0026, # AMPERSAND, right-left + 0x00a7: 0x0027, # APOSTROPHE, right-left + 0x00a8: 0x0028, # LEFT PARENTHESIS, right-left + 0x00a9: 0x0029, # RIGHT PARENTHESIS, right-left + 0x00aa: 0x002a, # ASTERISK, right-left + 0x00ab: 0x002b, # PLUS SIGN, right-left + 0x00ac: 0x060c, # ARABIC COMMA + 0x00ad: 0x002d, # HYPHEN-MINUS, right-left + 0x00ae: 0x002e, # FULL STOP, right-left + 0x00af: 0x002f, # SOLIDUS, right-left + 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO, right-left (need override) + 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE, right-left (need override) + 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO, right-left (need override) + 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE, right-left (need override) + 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR, right-left (need override) + 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE, right-left (need override) + 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX, right-left (need override) + 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) + 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) + 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE, right-left (need override) + 0x00ba: 0x003a, # COLON, right-left + 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: 0x003c, # LESS-THAN SIGN, right-left + 0x00bd: 0x003d, # EQUALS SIGN, right-left + 0x00be: 0x003e, # GREATER-THAN SIGN, right-left + 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: 0x274a, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left + 0x00c1: 0x0621, # ARABIC LETTER HAMZA + 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x00c7: 0x0627, # ARABIC LETTER ALEF + 0x00c8: 0x0628, # ARABIC LETTER BEH + 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA + 0x00ca: 0x062a, # ARABIC LETTER TEH + 0x00cb: 0x062b, # ARABIC LETTER THEH + 0x00cc: 0x062c, # ARABIC LETTER JEEM + 0x00cd: 0x062d, # ARABIC LETTER HAH + 0x00ce: 0x062e, # ARABIC LETTER KHAH + 0x00cf: 0x062f, # ARABIC LETTER DAL + 0x00d0: 0x0630, # ARABIC LETTER THAL + 0x00d1: 0x0631, # ARABIC LETTER REH + 0x00d2: 0x0632, # ARABIC LETTER ZAIN + 0x00d3: 0x0633, # ARABIC LETTER SEEN + 0x00d4: 0x0634, # ARABIC LETTER SHEEN + 0x00d5: 0x0635, # ARABIC LETTER SAD + 0x00d6: 0x0636, # ARABIC LETTER DAD + 0x00d7: 0x0637, # ARABIC LETTER TAH + 0x00d8: 0x0638, # ARABIC LETTER ZAH + 0x00d9: 0x0639, # ARABIC LETTER AIN + 0x00da: 0x063a, # ARABIC LETTER GHAIN + 0x00db: 0x005b, # LEFT SQUARE BRACKET, right-left + 0x00dc: 0x005c, # REVERSE SOLIDUS, right-left + 0x00dd: 0x005d, # RIGHT SQUARE BRACKET, right-left + 0x00de: 0x005e, # CIRCUMFLEX ACCENT, right-left + 0x00df: 0x005f, # LOW LINE, right-left + 0x00e0: 0x0640, # ARABIC TATWEEL + 0x00e1: 0x0641, # ARABIC LETTER FEH + 0x00e2: 0x0642, # ARABIC LETTER QAF + 0x00e3: 0x0643, # ARABIC LETTER KAF + 0x00e4: 0x0644, # ARABIC LETTER LAM + 0x00e5: 0x0645, # ARABIC LETTER MEEM + 0x00e6: 0x0646, # ARABIC LETTER NOON + 0x00e7: 0x0647, # ARABIC LETTER HEH + 0x00e8: 0x0648, # ARABIC LETTER WAW + 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA + 0x00ea: 0x064a, # ARABIC LETTER YEH + 0x00eb: 0x064b, # ARABIC FATHATAN + 0x00ec: 0x064c, # ARABIC DAMMATAN + 0x00ed: 0x064d, # ARABIC KASRATAN + 0x00ee: 0x064e, # ARABIC FATHA + 0x00ef: 0x064f, # ARABIC DAMMA + 0x00f0: 0x0650, # ARABIC KASRA + 0x00f1: 0x0651, # ARABIC SHADDA + 0x00f2: 0x0652, # ARABIC SUKUN + 0x00f3: 0x067e, # ARABIC LETTER PEH + 0x00f4: 0x0679, # ARABIC LETTER TTEH + 0x00f5: 0x0686, # ARABIC LETTER TCHEH + 0x00f6: 0x06d5, # ARABIC LETTER AE + 0x00f7: 0x06a4, # ARABIC LETTER VEH + 0x00f8: 0x06af, # ARABIC LETTER GAF + 0x00f9: 0x0688, # ARABIC LETTER DDAL + 0x00fa: 0x0691, # ARABIC LETTER RREH + 0x00fb: 0x007b, # LEFT CURLY BRACKET, right-left + 0x00fc: 0x007c, # VERTICAL LINE, right-left + 0x00fd: 0x007d, # RIGHT CURLY BRACKET, right-left + 0x00fe: 0x0698, # ARABIC LETTER JEH + 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE +}) + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x0000 -> CONTROL CHARACTER + u'\x01' # 0x0001 -> CONTROL CHARACTER + u'\x02' # 0x0002 -> CONTROL CHARACTER + u'\x03' # 0x0003 -> CONTROL CHARACTER + u'\x04' # 0x0004 -> CONTROL CHARACTER + u'\x05' # 0x0005 -> CONTROL CHARACTER + u'\x06' # 0x0006 -> CONTROL CHARACTER + u'\x07' # 0x0007 -> CONTROL CHARACTER + u'\x08' # 0x0008 -> CONTROL CHARACTER + u'\t' # 0x0009 -> CONTROL CHARACTER + u'\n' # 0x000a -> CONTROL CHARACTER + u'\x0b' # 0x000b -> CONTROL CHARACTER + u'\x0c' # 0x000c -> CONTROL CHARACTER + u'\r' # 0x000d -> CONTROL CHARACTER + u'\x0e' # 0x000e -> CONTROL CHARACTER + u'\x0f' # 0x000f -> CONTROL CHARACTER + u'\x10' # 0x0010 -> CONTROL CHARACTER + u'\x11' # 0x0011 -> CONTROL CHARACTER + u'\x12' # 0x0012 -> CONTROL CHARACTER + u'\x13' # 0x0013 -> CONTROL CHARACTER + u'\x14' # 0x0014 -> CONTROL CHARACTER + u'\x15' # 0x0015 -> CONTROL CHARACTER + u'\x16' # 0x0016 -> CONTROL CHARACTER + u'\x17' # 0x0017 -> CONTROL CHARACTER + u'\x18' # 0x0018 -> CONTROL CHARACTER + u'\x19' # 0x0019 -> CONTROL CHARACTER + u'\x1a' # 0x001a -> CONTROL CHARACTER + u'\x1b' # 0x001b -> CONTROL CHARACTER + u'\x1c' # 0x001c -> CONTROL CHARACTER + u'\x1d' # 0x001d -> CONTROL CHARACTER + u'\x1e' # 0x001e -> CONTROL CHARACTER + u'\x1f' # 0x001f -> CONTROL CHARACTER + u' ' # 0x0020 -> SPACE, left-right + u'!' # 0x0021 -> EXCLAMATION MARK, left-right + u'"' # 0x0022 -> QUOTATION MARK, left-right + u'#' # 0x0023 -> NUMBER SIGN, left-right + u'$' # 0x0024 -> DOLLAR SIGN, left-right + u'%' # 0x0025 -> PERCENT SIGN, left-right + u'&' # 0x0026 -> AMPERSAND, left-right + u"'" # 0x0027 -> APOSTROPHE, left-right + u'(' # 0x0028 -> LEFT PARENTHESIS, left-right + u')' # 0x0029 -> RIGHT PARENTHESIS, left-right + u'*' # 0x002a -> ASTERISK, left-right + u'+' # 0x002b -> PLUS SIGN, left-right + u',' # 0x002c -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR + u'-' # 0x002d -> HYPHEN-MINUS, left-right + u'.' # 0x002e -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR + u'/' # 0x002f -> SOLIDUS, left-right + u'0' # 0x0030 -> DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO + u'1' # 0x0031 -> DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE + u'2' # 0x0032 -> DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO + u'3' # 0x0033 -> DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE + u'4' # 0x0034 -> DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR + u'5' # 0x0035 -> DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE + u'6' # 0x0036 -> DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX + u'7' # 0x0037 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN + u'8' # 0x0038 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT + u'9' # 0x0039 -> DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE + u':' # 0x003a -> COLON, left-right + u';' # 0x003b -> SEMICOLON, left-right + u'<' # 0x003c -> LESS-THAN SIGN, left-right + u'=' # 0x003d -> EQUALS SIGN, left-right + u'>' # 0x003e -> GREATER-THAN SIGN, left-right + u'?' # 0x003f -> QUESTION MARK, left-right + u'@' # 0x0040 -> COMMERCIAL AT + u'A' # 0x0041 -> LATIN CAPITAL LETTER A + u'B' # 0x0042 -> LATIN CAPITAL LETTER B + u'C' # 0x0043 -> LATIN CAPITAL LETTER C + u'D' # 0x0044 -> LATIN CAPITAL LETTER D + u'E' # 0x0045 -> LATIN CAPITAL LETTER E + u'F' # 0x0046 -> LATIN CAPITAL LETTER F + u'G' # 0x0047 -> LATIN CAPITAL LETTER G + u'H' # 0x0048 -> LATIN CAPITAL LETTER H + u'I' # 0x0049 -> LATIN CAPITAL LETTER I + u'J' # 0x004a -> LATIN CAPITAL LETTER J + u'K' # 0x004b -> LATIN CAPITAL LETTER K + u'L' # 0x004c -> LATIN CAPITAL LETTER L + u'M' # 0x004d -> LATIN CAPITAL LETTER M + u'N' # 0x004e -> LATIN CAPITAL LETTER N + u'O' # 0x004f -> LATIN CAPITAL LETTER O + u'P' # 0x0050 -> LATIN CAPITAL LETTER P + u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q + u'R' # 0x0052 -> LATIN CAPITAL LETTER R + u'S' # 0x0053 -> LATIN CAPITAL LETTER S + u'T' # 0x0054 -> LATIN CAPITAL LETTER T + u'U' # 0x0055 -> LATIN CAPITAL LETTER U + u'V' # 0x0056 -> LATIN CAPITAL LETTER V + u'W' # 0x0057 -> LATIN CAPITAL LETTER W + u'X' # 0x0058 -> LATIN CAPITAL LETTER X + u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y + u'Z' # 0x005a -> LATIN CAPITAL LETTER Z + u'[' # 0x005b -> LEFT SQUARE BRACKET, left-right + u'\\' # 0x005c -> REVERSE SOLIDUS, left-right + u']' # 0x005d -> RIGHT SQUARE BRACKET, left-right + u'^' # 0x005e -> CIRCUMFLEX ACCENT, left-right + u'_' # 0x005f -> LOW LINE, left-right + u'`' # 0x0060 -> GRAVE ACCENT + u'a' # 0x0061 -> LATIN SMALL LETTER A + u'b' # 0x0062 -> LATIN SMALL LETTER B + u'c' # 0x0063 -> LATIN SMALL LETTER C + u'd' # 0x0064 -> LATIN SMALL LETTER D + u'e' # 0x0065 -> LATIN SMALL LETTER E + u'f' # 0x0066 -> LATIN SMALL LETTER F + u'g' # 0x0067 -> LATIN SMALL LETTER G + u'h' # 0x0068 -> LATIN SMALL LETTER H + u'i' # 0x0069 -> LATIN SMALL LETTER I + u'j' # 0x006a -> LATIN SMALL LETTER J + u'k' # 0x006b -> LATIN SMALL LETTER K + u'l' # 0x006c -> LATIN SMALL LETTER L + u'm' # 0x006d -> LATIN SMALL LETTER M + u'n' # 0x006e -> LATIN SMALL LETTER N + u'o' # 0x006f -> LATIN SMALL LETTER O + u'p' # 0x0070 -> LATIN SMALL LETTER P + u'q' # 0x0071 -> LATIN SMALL LETTER Q + u'r' # 0x0072 -> LATIN SMALL LETTER R + u's' # 0x0073 -> LATIN SMALL LETTER S + u't' # 0x0074 -> LATIN SMALL LETTER T + u'u' # 0x0075 -> LATIN SMALL LETTER U + u'v' # 0x0076 -> LATIN SMALL LETTER V + u'w' # 0x0077 -> LATIN SMALL LETTER W + u'x' # 0x0078 -> LATIN SMALL LETTER X + u'y' # 0x0079 -> LATIN SMALL LETTER Y + u'z' # 0x007a -> LATIN SMALL LETTER Z + u'{' # 0x007b -> LEFT CURLY BRACKET, left-right + u'|' # 0x007c -> VERTICAL LINE, left-right + u'}' # 0x007d -> RIGHT CURLY BRACKET, left-right + u'~' # 0x007e -> TILDE + u'\x7f' # 0x007f -> CONTROL CHARACTER + u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xa0' # 0x0081 -> NO-BREAK SPACE, right-left + u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u06ba' # 0x008b -> ARABIC LETTER NOON GHUNNA + u'\xab' # 0x008c -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE + u'\u2026' # 0x0093 -> HORIZONTAL ELLIPSIS, right-left + u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE + u'\xbb' # 0x0098 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x009b -> DIVISION SIGN, right-left + u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS + u' ' # 0x00a0 -> SPACE, right-left + u'!' # 0x00a1 -> EXCLAMATION MARK, right-left + u'"' # 0x00a2 -> QUOTATION MARK, right-left + u'#' # 0x00a3 -> NUMBER SIGN, right-left + u'$' # 0x00a4 -> DOLLAR SIGN, right-left + u'\u066a' # 0x00a5 -> ARABIC PERCENT SIGN + u'&' # 0x00a6 -> AMPERSAND, right-left + u"'" # 0x00a7 -> APOSTROPHE, right-left + u'(' # 0x00a8 -> LEFT PARENTHESIS, right-left + u')' # 0x00a9 -> RIGHT PARENTHESIS, right-left + u'*' # 0x00aa -> ASTERISK, right-left + u'+' # 0x00ab -> PLUS SIGN, right-left + u'\u060c' # 0x00ac -> ARABIC COMMA + u'-' # 0x00ad -> HYPHEN-MINUS, right-left + u'.' # 0x00ae -> FULL STOP, right-left + u'/' # 0x00af -> SOLIDUS, right-left + u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO, right-left (need override) + u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE, right-left (need override) + u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO, right-left (need override) + u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE, right-left (need override) + u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR, right-left (need override) + u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE, right-left (need override) + u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX, right-left (need override) + u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN, right-left (need override) + u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT, right-left (need override) + u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE, right-left (need override) + u':' # 0x00ba -> COLON, right-left + u'\u061b' # 0x00bb -> ARABIC SEMICOLON + u'<' # 0x00bc -> LESS-THAN SIGN, right-left + u'=' # 0x00bd -> EQUALS SIGN, right-left + u'>' # 0x00be -> GREATER-THAN SIGN, right-left + u'\u061f' # 0x00bf -> ARABIC QUESTION MARK + u'\u274a' # 0x00c0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left + u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA + u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF + u'\u0628' # 0x00c8 -> ARABIC LETTER BEH + u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0x00ca -> ARABIC LETTER TEH + u'\u062b' # 0x00cb -> ARABIC LETTER THEH + u'\u062c' # 0x00cc -> ARABIC LETTER JEEM + u'\u062d' # 0x00cd -> ARABIC LETTER HAH + u'\u062e' # 0x00ce -> ARABIC LETTER KHAH + u'\u062f' # 0x00cf -> ARABIC LETTER DAL + u'\u0630' # 0x00d0 -> ARABIC LETTER THAL + u'\u0631' # 0x00d1 -> ARABIC LETTER REH + u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN + u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN + u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN + u'\u0635' # 0x00d5 -> ARABIC LETTER SAD + u'\u0636' # 0x00d6 -> ARABIC LETTER DAD + u'\u0637' # 0x00d7 -> ARABIC LETTER TAH + u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH + u'\u0639' # 0x00d9 -> ARABIC LETTER AIN + u'\u063a' # 0x00da -> ARABIC LETTER GHAIN + u'[' # 0x00db -> LEFT SQUARE BRACKET, right-left + u'\\' # 0x00dc -> REVERSE SOLIDUS, right-left + u']' # 0x00dd -> RIGHT SQUARE BRACKET, right-left + u'^' # 0x00de -> CIRCUMFLEX ACCENT, right-left + u'_' # 0x00df -> LOW LINE, right-left + u'\u0640' # 0x00e0 -> ARABIC TATWEEL + u'\u0641' # 0x00e1 -> ARABIC LETTER FEH + u'\u0642' # 0x00e2 -> ARABIC LETTER QAF + u'\u0643' # 0x00e3 -> ARABIC LETTER KAF + u'\u0644' # 0x00e4 -> ARABIC LETTER LAM + u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM + u'\u0646' # 0x00e6 -> ARABIC LETTER NOON + u'\u0647' # 0x00e7 -> ARABIC LETTER HEH + u'\u0648' # 0x00e8 -> ARABIC LETTER WAW + u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0x00ea -> ARABIC LETTER YEH + u'\u064b' # 0x00eb -> ARABIC FATHATAN + u'\u064c' # 0x00ec -> ARABIC DAMMATAN + u'\u064d' # 0x00ed -> ARABIC KASRATAN + u'\u064e' # 0x00ee -> ARABIC FATHA + u'\u064f' # 0x00ef -> ARABIC DAMMA + u'\u0650' # 0x00f0 -> ARABIC KASRA + u'\u0651' # 0x00f1 -> ARABIC SHADDA + u'\u0652' # 0x00f2 -> ARABIC SUKUN + u'\u067e' # 0x00f3 -> ARABIC LETTER PEH + u'\u0679' # 0x00f4 -> ARABIC LETTER TTEH + u'\u0686' # 0x00f5 -> ARABIC LETTER TCHEH + u'\u06d5' # 0x00f6 -> ARABIC LETTER AE + u'\u06a4' # 0x00f7 -> ARABIC LETTER VEH + u'\u06af' # 0x00f8 -> ARABIC LETTER GAF + u'\u0688' # 0x00f9 -> ARABIC LETTER DDAL + u'\u0691' # 0x00fa -> ARABIC LETTER RREH + u'{' # 0x00fb -> LEFT CURLY BRACKET, right-left + u'|' # 0x00fc -> VERTICAL LINE, right-left + u'}' # 0x00fd -> RIGHT CURLY BRACKET, right-left + u'\u0698' # 0x00fe -> ARABIC LETTER JEH + u'\u06d2' # 0x00ff -> ARABIC LETTER YEH BARREE +) + +### Encoding Map + +encoding_map = { + 0x0000: 0x0000, # CONTROL CHARACTER + 0x0001: 0x0001, # CONTROL CHARACTER + 0x0002: 0x0002, # CONTROL CHARACTER + 0x0003: 0x0003, # CONTROL CHARACTER + 0x0004: 0x0004, # CONTROL CHARACTER + 0x0005: 0x0005, # CONTROL CHARACTER + 0x0006: 0x0006, # CONTROL CHARACTER + 0x0007: 0x0007, # CONTROL CHARACTER + 0x0008: 0x0008, # CONTROL CHARACTER + 0x0009: 0x0009, # CONTROL CHARACTER + 0x000a: 0x000a, # CONTROL CHARACTER + 0x000b: 0x000b, # CONTROL CHARACTER + 0x000c: 0x000c, # CONTROL CHARACTER + 0x000d: 0x000d, # CONTROL CHARACTER + 0x000e: 0x000e, # CONTROL CHARACTER + 0x000f: 0x000f, # CONTROL CHARACTER + 0x0010: 0x0010, # CONTROL CHARACTER + 0x0011: 0x0011, # CONTROL CHARACTER + 0x0012: 0x0012, # CONTROL CHARACTER + 0x0013: 0x0013, # CONTROL CHARACTER + 0x0014: 0x0014, # CONTROL CHARACTER + 0x0015: 0x0015, # CONTROL CHARACTER + 0x0016: 0x0016, # CONTROL CHARACTER + 0x0017: 0x0017, # CONTROL CHARACTER + 0x0018: 0x0018, # CONTROL CHARACTER + 0x0019: 0x0019, # CONTROL CHARACTER + 0x001a: 0x001a, # CONTROL CHARACTER + 0x001b: 0x001b, # CONTROL CHARACTER + 0x001c: 0x001c, # CONTROL CHARACTER + 0x001d: 0x001d, # CONTROL CHARACTER + 0x001e: 0x001e, # CONTROL CHARACTER + 0x001f: 0x001f, # CONTROL CHARACTER + 0x0020: 0x0020, # SPACE, left-right + 0x0020: 0x00a0, # SPACE, right-left + 0x0021: 0x0021, # EXCLAMATION MARK, left-right + 0x0021: 0x00a1, # EXCLAMATION MARK, right-left + 0x0022: 0x0022, # QUOTATION MARK, left-right + 0x0022: 0x00a2, # QUOTATION MARK, right-left + 0x0023: 0x0023, # NUMBER SIGN, left-right + 0x0023: 0x00a3, # NUMBER SIGN, right-left + 0x0024: 0x0024, # DOLLAR SIGN, left-right + 0x0024: 0x00a4, # DOLLAR SIGN, right-left + 0x0025: 0x0025, # PERCENT SIGN, left-right + 0x0026: 0x0026, # AMPERSAND, left-right + 0x0026: 0x00a6, # AMPERSAND, right-left + 0x0027: 0x0027, # APOSTROPHE, left-right + 0x0027: 0x00a7, # APOSTROPHE, right-left + 0x0028: 0x0028, # LEFT PARENTHESIS, left-right + 0x0028: 0x00a8, # LEFT PARENTHESIS, right-left + 0x0029: 0x0029, # RIGHT PARENTHESIS, left-right + 0x0029: 0x00a9, # RIGHT PARENTHESIS, right-left + 0x002a: 0x002a, # ASTERISK, left-right + 0x002a: 0x00aa, # ASTERISK, right-left + 0x002b: 0x002b, # PLUS SIGN, left-right + 0x002b: 0x00ab, # PLUS SIGN, right-left + 0x002c: 0x002c, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR + 0x002d: 0x002d, # HYPHEN-MINUS, left-right + 0x002d: 0x00ad, # HYPHEN-MINUS, right-left + 0x002e: 0x002e, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR + 0x002e: 0x00ae, # FULL STOP, right-left + 0x002f: 0x002f, # SOLIDUS, left-right + 0x002f: 0x00af, # SOLIDUS, right-left + 0x0030: 0x0030, # DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO + 0x0031: 0x0031, # DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE + 0x0032: 0x0032, # DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO + 0x0033: 0x0033, # DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE + 0x0034: 0x0034, # DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR + 0x0035: 0x0035, # DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE + 0x0036: 0x0036, # DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX + 0x0037: 0x0037, # DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN + 0x0038: 0x0038, # DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT + 0x0039: 0x0039, # DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE + 0x003a: 0x003a, # COLON, left-right + 0x003a: 0x00ba, # COLON, right-left + 0x003b: 0x003b, # SEMICOLON, left-right + 0x003c: 0x003c, # LESS-THAN SIGN, left-right + 0x003c: 0x00bc, # LESS-THAN SIGN, right-left + 0x003d: 0x003d, # EQUALS SIGN, left-right + 0x003d: 0x00bd, # EQUALS SIGN, right-left + 0x003e: 0x003e, # GREATER-THAN SIGN, left-right + 0x003e: 0x00be, # GREATER-THAN SIGN, right-left + 0x003f: 0x003f, # QUESTION MARK, left-right + 0x0040: 0x0040, # COMMERCIAL AT + 0x0041: 0x0041, # LATIN CAPITAL LETTER A + 0x0042: 0x0042, # LATIN CAPITAL LETTER B + 0x0043: 0x0043, # LATIN CAPITAL LETTER C + 0x0044: 0x0044, # LATIN CAPITAL LETTER D + 0x0045: 0x0045, # LATIN CAPITAL LETTER E + 0x0046: 0x0046, # LATIN CAPITAL LETTER F + 0x0047: 0x0047, # LATIN CAPITAL LETTER G + 0x0048: 0x0048, # LATIN CAPITAL LETTER H + 0x0049: 0x0049, # LATIN CAPITAL LETTER I + 0x004a: 0x004a, # LATIN CAPITAL LETTER J + 0x004b: 0x004b, # LATIN CAPITAL LETTER K + 0x004c: 0x004c, # LATIN CAPITAL LETTER L + 0x004d: 0x004d, # LATIN CAPITAL LETTER M + 0x004e: 0x004e, # LATIN CAPITAL LETTER N + 0x004f: 0x004f, # LATIN CAPITAL LETTER O + 0x0050: 0x0050, # LATIN CAPITAL LETTER P + 0x0051: 0x0051, # LATIN CAPITAL LETTER Q + 0x0052: 0x0052, # LATIN CAPITAL LETTER R + 0x0053: 0x0053, # LATIN CAPITAL LETTER S + 0x0054: 0x0054, # LATIN CAPITAL LETTER T + 0x0055: 0x0055, # LATIN CAPITAL LETTER U + 0x0056: 0x0056, # LATIN CAPITAL LETTER V + 0x0057: 0x0057, # LATIN CAPITAL LETTER W + 0x0058: 0x0058, # LATIN CAPITAL LETTER X + 0x0059: 0x0059, # LATIN CAPITAL LETTER Y + 0x005a: 0x005a, # LATIN CAPITAL LETTER Z + 0x005b: 0x005b, # LEFT SQUARE BRACKET, left-right + 0x005b: 0x00db, # LEFT SQUARE BRACKET, right-left + 0x005c: 0x005c, # REVERSE SOLIDUS, left-right + 0x005c: 0x00dc, # REVERSE SOLIDUS, right-left + 0x005d: 0x005d, # RIGHT SQUARE BRACKET, left-right + 0x005d: 0x00dd, # RIGHT SQUARE BRACKET, right-left + 0x005e: 0x005e, # CIRCUMFLEX ACCENT, left-right + 0x005e: 0x00de, # CIRCUMFLEX ACCENT, right-left + 0x005f: 0x005f, # LOW LINE, left-right + 0x005f: 0x00df, # LOW LINE, right-left + 0x0060: 0x0060, # GRAVE ACCENT + 0x0061: 0x0061, # LATIN SMALL LETTER A + 0x0062: 0x0062, # LATIN SMALL LETTER B + 0x0063: 0x0063, # LATIN SMALL LETTER C + 0x0064: 0x0064, # LATIN SMALL LETTER D + 0x0065: 0x0065, # LATIN SMALL LETTER E + 0x0066: 0x0066, # LATIN SMALL LETTER F + 0x0067: 0x0067, # LATIN SMALL LETTER G + 0x0068: 0x0068, # LATIN SMALL LETTER H + 0x0069: 0x0069, # LATIN SMALL LETTER I + 0x006a: 0x006a, # LATIN SMALL LETTER J + 0x006b: 0x006b, # LATIN SMALL LETTER K + 0x006c: 0x006c, # LATIN SMALL LETTER L + 0x006d: 0x006d, # LATIN SMALL LETTER M + 0x006e: 0x006e, # LATIN SMALL LETTER N + 0x006f: 0x006f, # LATIN SMALL LETTER O + 0x0070: 0x0070, # LATIN SMALL LETTER P + 0x0071: 0x0071, # LATIN SMALL LETTER Q + 0x0072: 0x0072, # LATIN SMALL LETTER R + 0x0073: 0x0073, # LATIN SMALL LETTER S + 0x0074: 0x0074, # LATIN SMALL LETTER T + 0x0075: 0x0075, # LATIN SMALL LETTER U + 0x0076: 0x0076, # LATIN SMALL LETTER V + 0x0077: 0x0077, # LATIN SMALL LETTER W + 0x0078: 0x0078, # LATIN SMALL LETTER X + 0x0079: 0x0079, # LATIN SMALL LETTER Y + 0x007a: 0x007a, # LATIN SMALL LETTER Z + 0x007b: 0x007b, # LEFT CURLY BRACKET, left-right + 0x007b: 0x00fb, # LEFT CURLY BRACKET, right-left + 0x007c: 0x007c, # VERTICAL LINE, left-right + 0x007c: 0x00fc, # VERTICAL LINE, right-left + 0x007d: 0x007d, # RIGHT CURLY BRACKET, left-right + 0x007d: 0x00fd, # RIGHT CURLY BRACKET, right-left + 0x007e: 0x007e, # TILDE + 0x007f: 0x007f, # CONTROL CHARACTER + 0x00a0: 0x0081, # NO-BREAK SPACE, right-left + 0x00ab: 0x008c, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x00bb: 0x0098, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA + 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE + 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE + 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE + 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE + 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX + 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS + 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA + 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE + 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE + 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX + 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS + 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE + 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX + 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS + 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE + 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE + 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS + 0x00f7: 0x009b, # DIVISION SIGN, right-left + 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE + 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE + 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX + 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS + 0x060c: 0x00ac, # ARABIC COMMA + 0x061b: 0x00bb, # ARABIC SEMICOLON + 0x061f: 0x00bf, # ARABIC QUESTION MARK + 0x0621: 0x00c1, # ARABIC LETTER HAMZA + 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE + 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE + 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE + 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW + 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE + 0x0627: 0x00c7, # ARABIC LETTER ALEF + 0x0628: 0x00c8, # ARABIC LETTER BEH + 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA + 0x062a: 0x00ca, # ARABIC LETTER TEH + 0x062b: 0x00cb, # ARABIC LETTER THEH + 0x062c: 0x00cc, # ARABIC LETTER JEEM + 0x062d: 0x00cd, # ARABIC LETTER HAH + 0x062e: 0x00ce, # ARABIC LETTER KHAH + 0x062f: 0x00cf, # ARABIC LETTER DAL + 0x0630: 0x00d0, # ARABIC LETTER THAL + 0x0631: 0x00d1, # ARABIC LETTER REH + 0x0632: 0x00d2, # ARABIC LETTER ZAIN + 0x0633: 0x00d3, # ARABIC LETTER SEEN + 0x0634: 0x00d4, # ARABIC LETTER SHEEN + 0x0635: 0x00d5, # ARABIC LETTER SAD + 0x0636: 0x00d6, # ARABIC LETTER DAD + 0x0637: 0x00d7, # ARABIC LETTER TAH + 0x0638: 0x00d8, # ARABIC LETTER ZAH + 0x0639: 0x00d9, # ARABIC LETTER AIN + 0x063a: 0x00da, # ARABIC LETTER GHAIN + 0x0640: 0x00e0, # ARABIC TATWEEL + 0x0641: 0x00e1, # ARABIC LETTER FEH + 0x0642: 0x00e2, # ARABIC LETTER QAF + 0x0643: 0x00e3, # ARABIC LETTER KAF + 0x0644: 0x00e4, # ARABIC LETTER LAM + 0x0645: 0x00e5, # ARABIC LETTER MEEM + 0x0646: 0x00e6, # ARABIC LETTER NOON + 0x0647: 0x00e7, # ARABIC LETTER HEH + 0x0648: 0x00e8, # ARABIC LETTER WAW + 0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA + 0x064a: 0x00ea, # ARABIC LETTER YEH + 0x064b: 0x00eb, # ARABIC FATHATAN + 0x064c: 0x00ec, # ARABIC DAMMATAN + 0x064d: 0x00ed, # ARABIC KASRATAN + 0x064e: 0x00ee, # ARABIC FATHA + 0x064f: 0x00ef, # ARABIC DAMMA + 0x0650: 0x00f0, # ARABIC KASRA + 0x0651: 0x00f1, # ARABIC SHADDA + 0x0652: 0x00f2, # ARABIC SUKUN + 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO, right-left (need override) + 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE, right-left (need override) + 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO, right-left (need override) + 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE, right-left (need override) + 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR, right-left (need override) + 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE, right-left (need override) + 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX, right-left (need override) + 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) + 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) + 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE, right-left (need override) + 0x066a: 0x00a5, # ARABIC PERCENT SIGN + 0x0679: 0x00f4, # ARABIC LETTER TTEH + 0x067e: 0x00f3, # ARABIC LETTER PEH + 0x0686: 0x00f5, # ARABIC LETTER TCHEH + 0x0688: 0x00f9, # ARABIC LETTER DDAL + 0x0691: 0x00fa, # ARABIC LETTER RREH + 0x0698: 0x00fe, # ARABIC LETTER JEH + 0x06a4: 0x00f7, # ARABIC LETTER VEH + 0x06af: 0x00f8, # ARABIC LETTER GAF + 0x06ba: 0x008b, # ARABIC LETTER NOON GHUNNA + 0x06d2: 0x00ff, # ARABIC LETTER YEH BARREE + 0x06d5: 0x00f6, # ARABIC LETTER AE + 0x2026: 0x0093, # HORIZONTAL ELLIPSIS, right-left + 0x274a: 0x00c0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mac_centeuro.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec mac_centeuro generated from 'MAPPINGS/VENDORS/APPLE/CENTEURO.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mac-centeuro', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\u0100' # 0x81 -> LATIN CAPITAL LETTER A WITH MACRON + u'\u0101' # 0x82 -> LATIN SMALL LETTER A WITH MACRON + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\u0104' # 0x84 -> LATIN CAPITAL LETTER A WITH OGONEK + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\u0105' # 0x88 -> LATIN SMALL LETTER A WITH OGONEK + u'\u010c' # 0x89 -> LATIN CAPITAL LETTER C WITH CARON + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u010d' # 0x8B -> LATIN SMALL LETTER C WITH CARON + u'\u0106' # 0x8C -> LATIN CAPITAL LETTER C WITH ACUTE + u'\u0107' # 0x8D -> LATIN SMALL LETTER C WITH ACUTE + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE + u'\u017a' # 0x90 -> LATIN SMALL LETTER Z WITH ACUTE + u'\u010e' # 0x91 -> LATIN CAPITAL LETTER D WITH CARON + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\u010f' # 0x93 -> LATIN SMALL LETTER D WITH CARON + u'\u0112' # 0x94 -> LATIN CAPITAL LETTER E WITH MACRON + u'\u0113' # 0x95 -> LATIN SMALL LETTER E WITH MACRON + u'\u0116' # 0x96 -> LATIN CAPITAL LETTER E WITH DOT ABOVE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\u0117' # 0x98 -> LATIN SMALL LETTER E WITH DOT ABOVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\u011a' # 0x9D -> LATIN CAPITAL LETTER E WITH CARON + u'\u011b' # 0x9E -> LATIN SMALL LETTER E WITH CARON + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\u0118' # 0xA2 -> LATIN CAPITAL LETTER E WITH OGONEK + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\u0119' # 0xAB -> LATIN SMALL LETTER E WITH OGONEK + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u0123' # 0xAE -> LATIN SMALL LETTER G WITH CEDILLA + u'\u012e' # 0xAF -> LATIN CAPITAL LETTER I WITH OGONEK + u'\u012f' # 0xB0 -> LATIN SMALL LETTER I WITH OGONEK + u'\u012a' # 0xB1 -> LATIN CAPITAL LETTER I WITH MACRON + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON + u'\u0136' # 0xB5 -> LATIN CAPITAL LETTER K WITH CEDILLA + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u0142' # 0xB8 -> LATIN SMALL LETTER L WITH STROKE + u'\u013b' # 0xB9 -> LATIN CAPITAL LETTER L WITH CEDILLA + u'\u013c' # 0xBA -> LATIN SMALL LETTER L WITH CEDILLA + u'\u013d' # 0xBB -> LATIN CAPITAL LETTER L WITH CARON + u'\u013e' # 0xBC -> LATIN SMALL LETTER L WITH CARON + u'\u0139' # 0xBD -> LATIN CAPITAL LETTER L WITH ACUTE + u'\u013a' # 0xBE -> LATIN SMALL LETTER L WITH ACUTE + u'\u0145' # 0xBF -> LATIN CAPITAL LETTER N WITH CEDILLA + u'\u0146' # 0xC0 -> LATIN SMALL LETTER N WITH CEDILLA + u'\u0143' # 0xC1 -> LATIN CAPITAL LETTER N WITH ACUTE + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0144' # 0xC4 -> LATIN SMALL LETTER N WITH ACUTE + u'\u0147' # 0xC5 -> LATIN CAPITAL LETTER N WITH CARON + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\u0148' # 0xCB -> LATIN SMALL LETTER N WITH CARON + u'\u0150' # 0xCC -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0151' # 0xCE -> LATIN SMALL LETTER O WITH DOUBLE ACUTE + u'\u014c' # 0xCF -> LATIN CAPITAL LETTER O WITH MACRON + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\u014d' # 0xD8 -> LATIN SMALL LETTER O WITH MACRON + u'\u0154' # 0xD9 -> LATIN CAPITAL LETTER R WITH ACUTE + u'\u0155' # 0xDA -> LATIN SMALL LETTER R WITH ACUTE + u'\u0158' # 0xDB -> LATIN CAPITAL LETTER R WITH CARON + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u0159' # 0xDE -> LATIN SMALL LETTER R WITH CARON + u'\u0156' # 0xDF -> LATIN CAPITAL LETTER R WITH CEDILLA + u'\u0157' # 0xE0 -> LATIN SMALL LETTER R WITH CEDILLA + u'\u0160' # 0xE1 -> LATIN CAPITAL LETTER S WITH CARON + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u0161' # 0xE4 -> LATIN SMALL LETTER S WITH CARON + u'\u015a' # 0xE5 -> LATIN CAPITAL LETTER S WITH ACUTE + u'\u015b' # 0xE6 -> LATIN SMALL LETTER S WITH ACUTE + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\u0164' # 0xE8 -> LATIN CAPITAL LETTER T WITH CARON + u'\u0165' # 0xE9 -> LATIN SMALL LETTER T WITH CARON + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\u017d' # 0xEB -> LATIN CAPITAL LETTER Z WITH CARON + u'\u017e' # 0xEC -> LATIN SMALL LETTER Z WITH CARON + u'\u016a' # 0xED -> LATIN CAPITAL LETTER U WITH MACRON + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u016b' # 0xF0 -> LATIN SMALL LETTER U WITH MACRON + u'\u016e' # 0xF1 -> LATIN CAPITAL LETTER U WITH RING ABOVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\u016f' # 0xF3 -> LATIN SMALL LETTER U WITH RING ABOVE + u'\u0170' # 0xF4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + u'\u0171' # 0xF5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE + u'\u0172' # 0xF6 -> LATIN CAPITAL LETTER U WITH OGONEK + u'\u0173' # 0xF7 -> LATIN SMALL LETTER U WITH OGONEK + u'\xdd' # 0xF8 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xfd' # 0xF9 -> LATIN SMALL LETTER Y WITH ACUTE + u'\u0137' # 0xFA -> LATIN SMALL LETTER K WITH CEDILLA + u'\u017b' # 0xFB -> LATIN CAPITAL LETTER Z WITH DOT ABOVE + u'\u0141' # 0xFC -> LATIN CAPITAL LETTER L WITH STROKE + u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE + u'\u0122' # 0xFE -> LATIN CAPITAL LETTER G WITH CEDILLA + u'\u02c7' # 0xFF -> CARON +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mac_croatian.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec mac_croatian generated from 'MAPPINGS/VENDORS/APPLE/CROATIAN.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mac-croatian', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\u2206' # 0xB4 -> INCREMENT + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\uf8ff' # 0xD8 -> Apple logo + u'\xa9' # 0xD9 -> COPYRIGHT SIGN + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\xc6' # 0xDE -> LATIN CAPITAL LETTER AE + u'\xbb' # 0xDF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2013' # 0xE0 -> EN DASH + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u03c0' # 0xF9 -> GREEK SMALL LETTER PI + u'\xcb' # 0xFA -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\xca' # 0xFD -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xe6' # 0xFE -> LATIN SMALL LETTER AE + u'\u02c7' # 0xFF -> CARON +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mac_cyrillic.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec mac_cyrillic generated from 'MAPPINGS/VENDORS/APPLE/CYRILLIC.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mac-cyrillic', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\u0410' # 0x80 -> CYRILLIC CAPITAL LETTER A + u'\u0411' # 0x81 -> CYRILLIC CAPITAL LETTER BE + u'\u0412' # 0x82 -> CYRILLIC CAPITAL LETTER VE + u'\u0413' # 0x83 -> CYRILLIC CAPITAL LETTER GHE + u'\u0414' # 0x84 -> CYRILLIC CAPITAL LETTER DE + u'\u0415' # 0x85 -> CYRILLIC CAPITAL LETTER IE + u'\u0416' # 0x86 -> CYRILLIC CAPITAL LETTER ZHE + u'\u0417' # 0x87 -> CYRILLIC CAPITAL LETTER ZE + u'\u0418' # 0x88 -> CYRILLIC CAPITAL LETTER I + u'\u0419' # 0x89 -> CYRILLIC CAPITAL LETTER SHORT I + u'\u041a' # 0x8A -> CYRILLIC CAPITAL LETTER KA + u'\u041b' # 0x8B -> CYRILLIC CAPITAL LETTER EL + u'\u041c' # 0x8C -> CYRILLIC CAPITAL LETTER EM + u'\u041d' # 0x8D -> CYRILLIC CAPITAL LETTER EN + u'\u041e' # 0x8E -> CYRILLIC CAPITAL LETTER O + u'\u041f' # 0x8F -> CYRILLIC CAPITAL LETTER PE + u'\u0420' # 0x90 -> CYRILLIC CAPITAL LETTER ER + u'\u0421' # 0x91 -> CYRILLIC CAPITAL LETTER ES + u'\u0422' # 0x92 -> CYRILLIC CAPITAL LETTER TE + u'\u0423' # 0x93 -> CYRILLIC CAPITAL LETTER U + u'\u0424' # 0x94 -> CYRILLIC CAPITAL LETTER EF + u'\u0425' # 0x95 -> CYRILLIC CAPITAL LETTER HA + u'\u0426' # 0x96 -> CYRILLIC CAPITAL LETTER TSE + u'\u0427' # 0x97 -> CYRILLIC CAPITAL LETTER CHE + u'\u0428' # 0x98 -> CYRILLIC CAPITAL LETTER SHA + u'\u0429' # 0x99 -> CYRILLIC CAPITAL LETTER SHCHA + u'\u042a' # 0x9A -> CYRILLIC CAPITAL LETTER HARD SIGN + u'\u042b' # 0x9B -> CYRILLIC CAPITAL LETTER YERU + u'\u042c' # 0x9C -> CYRILLIC CAPITAL LETTER SOFT SIGN + u'\u042d' # 0x9D -> CYRILLIC CAPITAL LETTER E + u'\u042e' # 0x9E -> CYRILLIC CAPITAL LETTER YU + u'\u042f' # 0x9F -> CYRILLIC CAPITAL LETTER YA + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\u0490' # 0xA2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\u0406' # 0xA7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\u0402' # 0xAB -> CYRILLIC CAPITAL LETTER DJE + u'\u0452' # 0xAC -> CYRILLIC SMALL LETTER DJE + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u0403' # 0xAE -> CYRILLIC CAPITAL LETTER GJE + u'\u0453' # 0xAF -> CYRILLIC SMALL LETTER GJE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\u0456' # 0xB4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u0491' # 0xB6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN + u'\u0408' # 0xB7 -> CYRILLIC CAPITAL LETTER JE + u'\u0404' # 0xB8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + u'\u0454' # 0xB9 -> CYRILLIC SMALL LETTER UKRAINIAN IE + u'\u0407' # 0xBA -> CYRILLIC CAPITAL LETTER YI + u'\u0457' # 0xBB -> CYRILLIC SMALL LETTER YI + u'\u0409' # 0xBC -> CYRILLIC CAPITAL LETTER LJE + u'\u0459' # 0xBD -> CYRILLIC SMALL LETTER LJE + u'\u040a' # 0xBE -> CYRILLIC CAPITAL LETTER NJE + u'\u045a' # 0xBF -> CYRILLIC SMALL LETTER NJE + u'\u0458' # 0xC0 -> CYRILLIC SMALL LETTER JE + u'\u0405' # 0xC1 -> CYRILLIC CAPITAL LETTER DZE + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\u040b' # 0xCB -> CYRILLIC CAPITAL LETTER TSHE + u'\u045b' # 0xCC -> CYRILLIC SMALL LETTER TSHE + u'\u040c' # 0xCD -> CYRILLIC CAPITAL LETTER KJE + u'\u045c' # 0xCE -> CYRILLIC SMALL LETTER KJE + u'\u0455' # 0xCF -> CYRILLIC SMALL LETTER DZE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u201e' # 0xD7 -> DOUBLE LOW-9 QUOTATION MARK + u'\u040e' # 0xD8 -> CYRILLIC CAPITAL LETTER SHORT U + u'\u045e' # 0xD9 -> CYRILLIC SMALL LETTER SHORT U + u'\u040f' # 0xDA -> CYRILLIC CAPITAL LETTER DZHE + u'\u045f' # 0xDB -> CYRILLIC SMALL LETTER DZHE + u'\u2116' # 0xDC -> NUMERO SIGN + u'\u0401' # 0xDD -> CYRILLIC CAPITAL LETTER IO + u'\u0451' # 0xDE -> CYRILLIC SMALL LETTER IO + u'\u044f' # 0xDF -> CYRILLIC SMALL LETTER YA + u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A + u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE + u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE + u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE + u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE + u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE + u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE + u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE + u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I + u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I + u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA + u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL + u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM + u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN + u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O + u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE + u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER + u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES + u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE + u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U + u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF + u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA + u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE + u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE + u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA + u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA + u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN + u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU + u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN + u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E + u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU + u'\u20ac' # 0xFF -> EURO SIGN +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mac_farsi.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec mac_farsi generated from 'MAPPINGS/VENDORS/APPLE/FARSI.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mac-farsi', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE, left-right + u'!' # 0x21 -> EXCLAMATION MARK, left-right + u'"' # 0x22 -> QUOTATION MARK, left-right + u'#' # 0x23 -> NUMBER SIGN, left-right + u'$' # 0x24 -> DOLLAR SIGN, left-right + u'%' # 0x25 -> PERCENT SIGN, left-right + u'&' # 0x26 -> AMPERSAND, left-right + u"'" # 0x27 -> APOSTROPHE, left-right + u'(' # 0x28 -> LEFT PARENTHESIS, left-right + u')' # 0x29 -> RIGHT PARENTHESIS, left-right + u'*' # 0x2A -> ASTERISK, left-right + u'+' # 0x2B -> PLUS SIGN, left-right + u',' # 0x2C -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR + u'-' # 0x2D -> HYPHEN-MINUS, left-right + u'.' # 0x2E -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR + u'/' # 0x2F -> SOLIDUS, left-right + u'0' # 0x30 -> DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE + u'2' # 0x32 -> DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO + u'3' # 0x33 -> DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE + u':' # 0x3A -> COLON, left-right + u';' # 0x3B -> SEMICOLON, left-right + u'<' # 0x3C -> LESS-THAN SIGN, left-right + u'=' # 0x3D -> EQUALS SIGN, left-right + u'>' # 0x3E -> GREATER-THAN SIGN, left-right + u'?' # 0x3F -> QUESTION MARK, left-right + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET, left-right + u'\\' # 0x5C -> REVERSE SOLIDUS, left-right + u']' # 0x5D -> RIGHT SQUARE BRACKET, left-right + u'^' # 0x5E -> CIRCUMFLEX ACCENT, left-right + u'_' # 0x5F -> LOW LINE, left-right + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET, left-right + u'|' # 0x7C -> VERTICAL LINE, left-right + u'}' # 0x7D -> RIGHT CURLY BRACKET, left-right + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xa0' # 0x81 -> NO-BREAK SPACE, right-left + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u06ba' # 0x8B -> ARABIC LETTER NOON GHUNNA + u'\xab' # 0x8C -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\u2026' # 0x93 -> HORIZONTAL ELLIPSIS, right-left + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xbb' # 0x98 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf7' # 0x9B -> DIVISION SIGN, right-left + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u' ' # 0xA0 -> SPACE, right-left + u'!' # 0xA1 -> EXCLAMATION MARK, right-left + u'"' # 0xA2 -> QUOTATION MARK, right-left + u'#' # 0xA3 -> NUMBER SIGN, right-left + u'$' # 0xA4 -> DOLLAR SIGN, right-left + u'\u066a' # 0xA5 -> ARABIC PERCENT SIGN + u'&' # 0xA6 -> AMPERSAND, right-left + u"'" # 0xA7 -> APOSTROPHE, right-left + u'(' # 0xA8 -> LEFT PARENTHESIS, right-left + u')' # 0xA9 -> RIGHT PARENTHESIS, right-left + u'*' # 0xAA -> ASTERISK, right-left + u'+' # 0xAB -> PLUS SIGN, right-left + u'\u060c' # 0xAC -> ARABIC COMMA + u'-' # 0xAD -> HYPHEN-MINUS, right-left + u'.' # 0xAE -> FULL STOP, right-left + u'/' # 0xAF -> SOLIDUS, right-left + u'\u06f0' # 0xB0 -> EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) + u'\u06f1' # 0xB1 -> EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) + u'\u06f2' # 0xB2 -> EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) + u'\u06f3' # 0xB3 -> EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) + u'\u06f4' # 0xB4 -> EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) + u'\u06f5' # 0xB5 -> EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) + u'\u06f6' # 0xB6 -> EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) + u'\u06f7' # 0xB7 -> EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) + u'\u06f8' # 0xB8 -> EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) + u'\u06f9' # 0xB9 -> EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) + u':' # 0xBA -> COLON, right-left + u'\u061b' # 0xBB -> ARABIC SEMICOLON + u'<' # 0xBC -> LESS-THAN SIGN, right-left + u'=' # 0xBD -> EQUALS SIGN, right-left + u'>' # 0xBE -> GREATER-THAN SIGN, right-left + u'\u061f' # 0xBF -> ARABIC QUESTION MARK + u'\u274a' # 0xC0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left + u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA + u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE + u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE + u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE + u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW + u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE + u'\u0627' # 0xC7 -> ARABIC LETTER ALEF + u'\u0628' # 0xC8 -> ARABIC LETTER BEH + u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA + u'\u062a' # 0xCA -> ARABIC LETTER TEH + u'\u062b' # 0xCB -> ARABIC LETTER THEH + u'\u062c' # 0xCC -> ARABIC LETTER JEEM + u'\u062d' # 0xCD -> ARABIC LETTER HAH + u'\u062e' # 0xCE -> ARABIC LETTER KHAH + u'\u062f' # 0xCF -> ARABIC LETTER DAL + u'\u0630' # 0xD0 -> ARABIC LETTER THAL + u'\u0631' # 0xD1 -> ARABIC LETTER REH + u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN + u'\u0633' # 0xD3 -> ARABIC LETTER SEEN + u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN + u'\u0635' # 0xD5 -> ARABIC LETTER SAD + u'\u0636' # 0xD6 -> ARABIC LETTER DAD + u'\u0637' # 0xD7 -> ARABIC LETTER TAH + u'\u0638' # 0xD8 -> ARABIC LETTER ZAH + u'\u0639' # 0xD9 -> ARABIC LETTER AIN + u'\u063a' # 0xDA -> ARABIC LETTER GHAIN + u'[' # 0xDB -> LEFT SQUARE BRACKET, right-left + u'\\' # 0xDC -> REVERSE SOLIDUS, right-left + u']' # 0xDD -> RIGHT SQUARE BRACKET, right-left + u'^' # 0xDE -> CIRCUMFLEX ACCENT, right-left + u'_' # 0xDF -> LOW LINE, right-left + u'\u0640' # 0xE0 -> ARABIC TATWEEL + u'\u0641' # 0xE1 -> ARABIC LETTER FEH + u'\u0642' # 0xE2 -> ARABIC LETTER QAF + u'\u0643' # 0xE3 -> ARABIC LETTER KAF + u'\u0644' # 0xE4 -> ARABIC LETTER LAM + u'\u0645' # 0xE5 -> ARABIC LETTER MEEM + u'\u0646' # 0xE6 -> ARABIC LETTER NOON + u'\u0647' # 0xE7 -> ARABIC LETTER HEH + u'\u0648' # 0xE8 -> ARABIC LETTER WAW + u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA + u'\u064a' # 0xEA -> ARABIC LETTER YEH + u'\u064b' # 0xEB -> ARABIC FATHATAN + u'\u064c' # 0xEC -> ARABIC DAMMATAN + u'\u064d' # 0xED -> ARABIC KASRATAN + u'\u064e' # 0xEE -> ARABIC FATHA + u'\u064f' # 0xEF -> ARABIC DAMMA + u'\u0650' # 0xF0 -> ARABIC KASRA + u'\u0651' # 0xF1 -> ARABIC SHADDA + u'\u0652' # 0xF2 -> ARABIC SUKUN + u'\u067e' # 0xF3 -> ARABIC LETTER PEH + u'\u0679' # 0xF4 -> ARABIC LETTER TTEH + u'\u0686' # 0xF5 -> ARABIC LETTER TCHEH + u'\u06d5' # 0xF6 -> ARABIC LETTER AE + u'\u06a4' # 0xF7 -> ARABIC LETTER VEH + u'\u06af' # 0xF8 -> ARABIC LETTER GAF + u'\u0688' # 0xF9 -> ARABIC LETTER DDAL + u'\u0691' # 0xFA -> ARABIC LETTER RREH + u'{' # 0xFB -> LEFT CURLY BRACKET, right-left + u'|' # 0xFC -> VERTICAL LINE, right-left + u'}' # 0xFD -> RIGHT CURLY BRACKET, right-left + u'\u0698' # 0xFE -> ARABIC LETTER JEH + u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mac_greek.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec mac_greek generated from 'MAPPINGS/VENDORS/APPLE/GREEK.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mac-greek', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xb9' # 0x81 -> SUPERSCRIPT ONE + u'\xb2' # 0x82 -> SUPERSCRIPT TWO + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xb3' # 0x84 -> SUPERSCRIPT THREE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\u0385' # 0x87 -> GREEK DIALYTIKA TONOS + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\u0384' # 0x8B -> GREEK TONOS + u'\xa8' # 0x8C -> DIAERESIS + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xa3' # 0x92 -> POUND SIGN + u'\u2122' # 0x93 -> TRADE MARK SIGN + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\u2022' # 0x96 -> BULLET + u'\xbd' # 0x97 -> VULGAR FRACTION ONE HALF + u'\u2030' # 0x98 -> PER MILLE SIGN + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xa6' # 0x9B -> BROKEN BAR + u'\u20ac' # 0x9C -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\u0393' # 0xA1 -> GREEK CAPITAL LETTER GAMMA + u'\u0394' # 0xA2 -> GREEK CAPITAL LETTER DELTA + u'\u0398' # 0xA3 -> GREEK CAPITAL LETTER THETA + u'\u039b' # 0xA4 -> GREEK CAPITAL LETTER LAMDA + u'\u039e' # 0xA5 -> GREEK CAPITAL LETTER XI + u'\u03a0' # 0xA6 -> GREEK CAPITAL LETTER PI + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u03a3' # 0xAA -> GREEK CAPITAL LETTER SIGMA + u'\u03aa' # 0xAB -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + u'\xa7' # 0xAC -> SECTION SIGN + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xb0' # 0xAE -> DEGREE SIGN + u'\xb7' # 0xAF -> MIDDLE DOT + u'\u0391' # 0xB0 -> GREEK CAPITAL LETTER ALPHA + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\u0392' # 0xB5 -> GREEK CAPITAL LETTER BETA + u'\u0395' # 0xB6 -> GREEK CAPITAL LETTER EPSILON + u'\u0396' # 0xB7 -> GREEK CAPITAL LETTER ZETA + u'\u0397' # 0xB8 -> GREEK CAPITAL LETTER ETA + u'\u0399' # 0xB9 -> GREEK CAPITAL LETTER IOTA + u'\u039a' # 0xBA -> GREEK CAPITAL LETTER KAPPA + u'\u039c' # 0xBB -> GREEK CAPITAL LETTER MU + u'\u03a6' # 0xBC -> GREEK CAPITAL LETTER PHI + u'\u03ab' # 0xBD -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + u'\u03a8' # 0xBE -> GREEK CAPITAL LETTER PSI + u'\u03a9' # 0xBF -> GREEK CAPITAL LETTER OMEGA + u'\u03ac' # 0xC0 -> GREEK SMALL LETTER ALPHA WITH TONOS + u'\u039d' # 0xC1 -> GREEK CAPITAL LETTER NU + u'\xac' # 0xC2 -> NOT SIGN + u'\u039f' # 0xC3 -> GREEK CAPITAL LETTER OMICRON + u'\u03a1' # 0xC4 -> GREEK CAPITAL LETTER RHO + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u03a4' # 0xC6 -> GREEK CAPITAL LETTER TAU + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\u03a5' # 0xCB -> GREEK CAPITAL LETTER UPSILON + u'\u03a7' # 0xCC -> GREEK CAPITAL LETTER CHI + u'\u0386' # 0xCD -> GREEK CAPITAL LETTER ALPHA WITH TONOS + u'\u0388' # 0xCE -> GREEK CAPITAL LETTER EPSILON WITH TONOS + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2015' # 0xD1 -> HORIZONTAL BAR + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u0389' # 0xD7 -> GREEK CAPITAL LETTER ETA WITH TONOS + u'\u038a' # 0xD8 -> GREEK CAPITAL LETTER IOTA WITH TONOS + u'\u038c' # 0xD9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS + u'\u038e' # 0xDA -> GREEK CAPITAL LETTER UPSILON WITH TONOS + u'\u03ad' # 0xDB -> GREEK SMALL LETTER EPSILON WITH TONOS + u'\u03ae' # 0xDC -> GREEK SMALL LETTER ETA WITH TONOS + u'\u03af' # 0xDD -> GREEK SMALL LETTER IOTA WITH TONOS + u'\u03cc' # 0xDE -> GREEK SMALL LETTER OMICRON WITH TONOS + u'\u038f' # 0xDF -> GREEK CAPITAL LETTER OMEGA WITH TONOS + u'\u03cd' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH TONOS + u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA + u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA + u'\u03c8' # 0xE3 -> GREEK SMALL LETTER PSI + u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA + u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON + u'\u03c6' # 0xE6 -> GREEK SMALL LETTER PHI + u'\u03b3' # 0xE7 -> GREEK SMALL LETTER GAMMA + u'\u03b7' # 0xE8 -> GREEK SMALL LETTER ETA + u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA + u'\u03be' # 0xEA -> GREEK SMALL LETTER XI + u'\u03ba' # 0xEB -> GREEK SMALL LETTER KAPPA + u'\u03bb' # 0xEC -> GREEK SMALL LETTER LAMDA + u'\u03bc' # 0xED -> GREEK SMALL LETTER MU + u'\u03bd' # 0xEE -> GREEK SMALL LETTER NU + u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON + u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI + u'\u03ce' # 0xF1 -> GREEK SMALL LETTER OMEGA WITH TONOS + u'\u03c1' # 0xF2 -> GREEK SMALL LETTER RHO + u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA + u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU + u'\u03b8' # 0xF5 -> GREEK SMALL LETTER THETA + u'\u03c9' # 0xF6 -> GREEK SMALL LETTER OMEGA + u'\u03c2' # 0xF7 -> GREEK SMALL LETTER FINAL SIGMA + u'\u03c7' # 0xF8 -> GREEK SMALL LETTER CHI + u'\u03c5' # 0xF9 -> GREEK SMALL LETTER UPSILON + u'\u03b6' # 0xFA -> GREEK SMALL LETTER ZETA + u'\u03ca' # 0xFB -> GREEK SMALL LETTER IOTA WITH DIALYTIKA + u'\u03cb' # 0xFC -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA + u'\u0390' # 0xFD -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + u'\u03b0' # 0xFE -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + u'\xad' # 0xFF -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mac_iceland.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec mac_iceland generated from 'MAPPINGS/VENDORS/APPLE/ICELAND.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mac-iceland', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\xdd' # 0xA0 -> LATIN CAPITAL LETTER Y WITH ACUTE + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0xBE -> LATIN SMALL LETTER AE + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\xd0' # 0xDC -> LATIN CAPITAL LETTER ETH + u'\xf0' # 0xDD -> LATIN SMALL LETTER ETH + u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN + u'\xfe' # 0xDF -> LATIN SMALL LETTER THORN + u'\xfd' # 0xE0 -> LATIN SMALL LETTER Y WITH ACUTE + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mac_latin2.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,183 @@ +""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py. + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_map) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_map)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mac-latin2', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS + 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON + 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON + 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE + 0x0084: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK + 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS + 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS + 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE + 0x0088: 0x0105, # LATIN SMALL LETTER A WITH OGONEK + 0x0089: 0x010c, # LATIN CAPITAL LETTER C WITH CARON + 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS + 0x008b: 0x010d, # LATIN SMALL LETTER C WITH CARON + 0x008c: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE + 0x008d: 0x0107, # LATIN SMALL LETTER C WITH ACUTE + 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE + 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE + 0x0090: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE + 0x0091: 0x010e, # LATIN CAPITAL LETTER D WITH CARON + 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE + 0x0093: 0x010f, # LATIN SMALL LETTER D WITH CARON + 0x0094: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON + 0x0095: 0x0113, # LATIN SMALL LETTER E WITH MACRON + 0x0096: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE + 0x0098: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE + 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX + 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS + 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE + 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE + 0x009d: 0x011a, # LATIN CAPITAL LETTER E WITH CARON + 0x009e: 0x011b, # LATIN SMALL LETTER E WITH CARON + 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS + 0x00a0: 0x2020, # DAGGER + 0x00a1: 0x00b0, # DEGREE SIGN + 0x00a2: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK + 0x00a4: 0x00a7, # SECTION SIGN + 0x00a5: 0x2022, # BULLET + 0x00a6: 0x00b6, # PILCROW SIGN + 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S + 0x00a8: 0x00ae, # REGISTERED SIGN + 0x00aa: 0x2122, # TRADE MARK SIGN + 0x00ab: 0x0119, # LATIN SMALL LETTER E WITH OGONEK + 0x00ac: 0x00a8, # DIAERESIS + 0x00ad: 0x2260, # NOT EQUAL TO + 0x00ae: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA + 0x00af: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK + 0x00b0: 0x012f, # LATIN SMALL LETTER I WITH OGONEK + 0x00b1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON + 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO + 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO + 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON + 0x00b5: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA + 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL + 0x00b7: 0x2211, # N-ARY SUMMATION + 0x00b8: 0x0142, # LATIN SMALL LETTER L WITH STROKE + 0x00b9: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA + 0x00ba: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA + 0x00bb: 0x013d, # LATIN CAPITAL LETTER L WITH CARON + 0x00bc: 0x013e, # LATIN SMALL LETTER L WITH CARON + 0x00bd: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE + 0x00be: 0x013a, # LATIN SMALL LETTER L WITH ACUTE + 0x00bf: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA + 0x00c0: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA + 0x00c1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE + 0x00c2: 0x00ac, # NOT SIGN + 0x00c3: 0x221a, # SQUARE ROOT + 0x00c4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE + 0x00c5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON + 0x00c6: 0x2206, # INCREMENT + 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS + 0x00ca: 0x00a0, # NO-BREAK SPACE + 0x00cb: 0x0148, # LATIN SMALL LETTER N WITH CARON + 0x00cc: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE + 0x00ce: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE + 0x00cf: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON + 0x00d0: 0x2013, # EN DASH + 0x00d1: 0x2014, # EM DASH + 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00d6: 0x00f7, # DIVISION SIGN + 0x00d7: 0x25ca, # LOZENGE + 0x00d8: 0x014d, # LATIN SMALL LETTER O WITH MACRON + 0x00d9: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE + 0x00da: 0x0155, # LATIN SMALL LETTER R WITH ACUTE + 0x00db: 0x0158, # LATIN CAPITAL LETTER R WITH CARON + 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x00de: 0x0159, # LATIN SMALL LETTER R WITH CARON + 0x00df: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA + 0x00e0: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA + 0x00e1: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x00e4: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x00e5: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE + 0x00e6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE + 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE + 0x00e8: 0x0164, # LATIN CAPITAL LETTER T WITH CARON + 0x00e9: 0x0165, # LATIN SMALL LETTER T WITH CARON + 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE + 0x00eb: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON + 0x00ec: 0x017e, # LATIN SMALL LETTER Z WITH CARON + 0x00ed: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON + 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE + 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00f0: 0x016b, # LATIN SMALL LETTER U WITH MACRON + 0x00f1: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE + 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE + 0x00f3: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE + 0x00f4: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x00f5: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE + 0x00f6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK + 0x00f7: 0x0173, # LATIN SMALL LETTER U WITH OGONEK + 0x00f8: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE + 0x00f9: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE + 0x00fa: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA + 0x00fb: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x00fc: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE + 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA + 0x00ff: 0x02c7, # CARON +}) + +### Encoding Map + +encoding_map = codecs.make_encoding_map(decoding_map)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mac_roman.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec mac_roman generated from 'MAPPINGS/VENDORS/APPLE/ROMAN.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mac-roman', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0xBE -> LATIN SMALL LETTER AE + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\ufb01' # 0xDE -> LATIN SMALL LIGATURE FI + u'\ufb02' # 0xDF -> LATIN SMALL LIGATURE FL + u'\u2021' # 0xE0 -> DOUBLE DAGGER + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mac_romanian.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec mac_romanian generated from 'MAPPINGS/VENDORS/APPLE/ROMANIAN.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mac-romanian', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\u0102' # 0xAE -> LATIN CAPITAL LETTER A WITH BREVE + u'\u0218' # 0xAF -> LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\u0103' # 0xBE -> LATIN SMALL LETTER A WITH BREVE + u'\u0219' # 0xBF -> LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u2044' # 0xDA -> FRACTION SLASH + u'\u20ac' # 0xDB -> EURO SIGN + u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK + u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later + u'\u021b' # 0xDF -> LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later + u'\u2021' # 0xE0 -> DOUBLE DAGGER + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mac_turkish.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec mac_turkish generated from 'MAPPINGS/VENDORS/APPLE/TURKISH.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mac-turkish', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> CONTROL CHARACTER + u'\x01' # 0x01 -> CONTROL CHARACTER + u'\x02' # 0x02 -> CONTROL CHARACTER + u'\x03' # 0x03 -> CONTROL CHARACTER + u'\x04' # 0x04 -> CONTROL CHARACTER + u'\x05' # 0x05 -> CONTROL CHARACTER + u'\x06' # 0x06 -> CONTROL CHARACTER + u'\x07' # 0x07 -> CONTROL CHARACTER + u'\x08' # 0x08 -> CONTROL CHARACTER + u'\t' # 0x09 -> CONTROL CHARACTER + u'\n' # 0x0A -> CONTROL CHARACTER + u'\x0b' # 0x0B -> CONTROL CHARACTER + u'\x0c' # 0x0C -> CONTROL CHARACTER + u'\r' # 0x0D -> CONTROL CHARACTER + u'\x0e' # 0x0E -> CONTROL CHARACTER + u'\x0f' # 0x0F -> CONTROL CHARACTER + u'\x10' # 0x10 -> CONTROL CHARACTER + u'\x11' # 0x11 -> CONTROL CHARACTER + u'\x12' # 0x12 -> CONTROL CHARACTER + u'\x13' # 0x13 -> CONTROL CHARACTER + u'\x14' # 0x14 -> CONTROL CHARACTER + u'\x15' # 0x15 -> CONTROL CHARACTER + u'\x16' # 0x16 -> CONTROL CHARACTER + u'\x17' # 0x17 -> CONTROL CHARACTER + u'\x18' # 0x18 -> CONTROL CHARACTER + u'\x19' # 0x19 -> CONTROL CHARACTER + u'\x1a' # 0x1A -> CONTROL CHARACTER + u'\x1b' # 0x1B -> CONTROL CHARACTER + u'\x1c' # 0x1C -> CONTROL CHARACTER + u'\x1d' # 0x1D -> CONTROL CHARACTER + u'\x1e' # 0x1E -> CONTROL CHARACTER + u'\x1f' # 0x1F -> CONTROL CHARACTER + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> CONTROL CHARACTER + u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS + u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE + u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA + u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE + u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE + u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS + u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS + u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE + u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE + u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS + u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE + u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE + u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA + u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE + u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE + u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS + u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE + u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE + u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX + u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS + u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE + u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE + u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE + u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS + u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE + u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE + u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE + u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX + u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS + u'\u2020' # 0xA0 -> DAGGER + u'\xb0' # 0xA1 -> DEGREE SIGN + u'\xa2' # 0xA2 -> CENT SIGN + u'\xa3' # 0xA3 -> POUND SIGN + u'\xa7' # 0xA4 -> SECTION SIGN + u'\u2022' # 0xA5 -> BULLET + u'\xb6' # 0xA6 -> PILCROW SIGN + u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S + u'\xae' # 0xA8 -> REGISTERED SIGN + u'\xa9' # 0xA9 -> COPYRIGHT SIGN + u'\u2122' # 0xAA -> TRADE MARK SIGN + u'\xb4' # 0xAB -> ACUTE ACCENT + u'\xa8' # 0xAC -> DIAERESIS + u'\u2260' # 0xAD -> NOT EQUAL TO + u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE + u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE + u'\u221e' # 0xB0 -> INFINITY + u'\xb1' # 0xB1 -> PLUS-MINUS SIGN + u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO + u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO + u'\xa5' # 0xB4 -> YEN SIGN + u'\xb5' # 0xB5 -> MICRO SIGN + u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL + u'\u2211' # 0xB7 -> N-ARY SUMMATION + u'\u220f' # 0xB8 -> N-ARY PRODUCT + u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI + u'\u222b' # 0xBA -> INTEGRAL + u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR + u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR + u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA + u'\xe6' # 0xBE -> LATIN SMALL LETTER AE + u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE + u'\xbf' # 0xC0 -> INVERTED QUESTION MARK + u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK + u'\xac' # 0xC2 -> NOT SIGN + u'\u221a' # 0xC3 -> SQUARE ROOT + u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK + u'\u2248' # 0xC5 -> ALMOST EQUAL TO + u'\u2206' # 0xC6 -> INCREMENT + u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS + u'\xa0' # 0xCA -> NO-BREAK SPACE + u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE + u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE + u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE + u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE + u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE + u'\u2013' # 0xD0 -> EN DASH + u'\u2014' # 0xD1 -> EM DASH + u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK + u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK + u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK + u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK + u'\xf7' # 0xD6 -> DIVISION SIGN + u'\u25ca' # 0xD7 -> LOZENGE + u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS + u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS + u'\u011e' # 0xDA -> LATIN CAPITAL LETTER G WITH BREVE + u'\u011f' # 0xDB -> LATIN SMALL LETTER G WITH BREVE + u'\u0130' # 0xDC -> LATIN CAPITAL LETTER I WITH DOT ABOVE + u'\u0131' # 0xDD -> LATIN SMALL LETTER DOTLESS I + u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA + u'\u015f' # 0xDF -> LATIN SMALL LETTER S WITH CEDILLA + u'\u2021' # 0xE0 -> DOUBLE DAGGER + u'\xb7' # 0xE1 -> MIDDLE DOT + u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK + u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK + u'\u2030' # 0xE4 -> PER MILLE SIGN + u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX + u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX + u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE + u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS + u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE + u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE + u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX + u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS + u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE + u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE + u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX + u'\uf8ff' # 0xF0 -> Apple logo + u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE + u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE + u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX + u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE + u'\uf8a0' # 0xF5 -> undefined1 + u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT + u'\u02dc' # 0xF7 -> SMALL TILDE + u'\xaf' # 0xF8 -> MACRON + u'\u02d8' # 0xF9 -> BREVE + u'\u02d9' # 0xFA -> DOT ABOVE + u'\u02da' # 0xFB -> RING ABOVE + u'\xb8' # 0xFC -> CEDILLA + u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT + u'\u02db' # 0xFE -> OGONEK + u'\u02c7' # 0xFF -> CARON +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/mbcs.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,47 @@ +""" Python 'mbcs' Codec for Windows + + +Cloned by Mark Hammond (mhammond@skippinet.com.au) from ascii.py, +which was written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +# Import them explicitly to cause an ImportError +# on non-Windows systems +from codecs import mbcs_encode, mbcs_decode +# for IncrementalDecoder, IncrementalEncoder, ... +import codecs + +### Codec APIs + +encode = mbcs_encode + +def decode(input, errors='strict'): + return mbcs_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return mbcs_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + _buffer_decode = mbcs_decode + +class StreamWriter(codecs.StreamWriter): + encode = mbcs_encode + +class StreamReader(codecs.StreamReader): + decode = mbcs_decode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='mbcs', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/palmos.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,83 @@ +""" Python Character Mapping Codec for PalmOS 3.5. + +Written by Sjoerd Mullender (sjoerd@acm.org); based on iso8859_15.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_map) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_map)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='palmos', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) + +# The PalmOS character set is mostly iso-8859-1 with some differences. +decoding_map.update({ + 0x0080: 0x20ac, # EURO SIGN + 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK + 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x2020, # DAGGER + 0x0087: 0x2021, # DOUBLE DAGGER + 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x0089: 0x2030, # PER MILLE SIGN + 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON + 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE + 0x008d: 0x2666, # BLACK DIAMOND SUIT + 0x008e: 0x2663, # BLACK CLUB SUIT + 0x008f: 0x2665, # BLACK HEART SUIT + 0x0090: 0x2660, # BLACK SPADE SUIT + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x02dc, # SMALL TILDE + 0x0099: 0x2122, # TRADE MARK SIGN + 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON + 0x009c: 0x0153, # LATIN SMALL LIGATURE OE + 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS +}) + +### Encoding Map + +encoding_map = codecs.make_encoding_map(decoding_map)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/ptcp154.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,175 @@ +""" Python Character Mapping Codec generated from 'PTCP154.txt' with gencodec.py. + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_map) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_map)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='ptcp154', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0080: 0x0496, # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER + 0x0081: 0x0492, # CYRILLIC CAPITAL LETTER GHE WITH STROKE + 0x0082: 0x04ee, # CYRILLIC CAPITAL LETTER U WITH MACRON + 0x0083: 0x0493, # CYRILLIC SMALL LETTER GHE WITH STROKE + 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK + 0x0085: 0x2026, # HORIZONTAL ELLIPSIS + 0x0086: 0x04b6, # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER + 0x0087: 0x04ae, # CYRILLIC CAPITAL LETTER STRAIGHT U + 0x0088: 0x04b2, # CYRILLIC CAPITAL LETTER HA WITH DESCENDER + 0x0089: 0x04af, # CYRILLIC SMALL LETTER STRAIGHT U + 0x008a: 0x04a0, # CYRILLIC CAPITAL LETTER BASHKIR KA + 0x008b: 0x04e2, # CYRILLIC CAPITAL LETTER I WITH MACRON + 0x008c: 0x04a2, # CYRILLIC CAPITAL LETTER EN WITH DESCENDER + 0x008d: 0x049a, # CYRILLIC CAPITAL LETTER KA WITH DESCENDER + 0x008e: 0x04ba, # CYRILLIC CAPITAL LETTER SHHA + 0x008f: 0x04b8, # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE + 0x0090: 0x0497, # CYRILLIC SMALL LETTER ZHE WITH DESCENDER + 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK + 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK + 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK + 0x0095: 0x2022, # BULLET + 0x0096: 0x2013, # EN DASH + 0x0097: 0x2014, # EM DASH + 0x0098: 0x04b3, # CYRILLIC SMALL LETTER HA WITH DESCENDER + 0x0099: 0x04b7, # CYRILLIC SMALL LETTER CHE WITH DESCENDER + 0x009a: 0x04a1, # CYRILLIC SMALL LETTER BASHKIR KA + 0x009b: 0x04e3, # CYRILLIC SMALL LETTER I WITH MACRON + 0x009c: 0x04a3, # CYRILLIC SMALL LETTER EN WITH DESCENDER + 0x009d: 0x049b, # CYRILLIC SMALL LETTER KA WITH DESCENDER + 0x009e: 0x04bb, # CYRILLIC SMALL LETTER SHHA + 0x009f: 0x04b9, # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE + 0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U (Byelorussian) + 0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U (Byelorussian) + 0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE + 0x00a4: 0x04e8, # CYRILLIC CAPITAL LETTER BARRED O + 0x00a5: 0x0498, # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER + 0x00a6: 0x04b0, # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE + 0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO + 0x00aa: 0x04d8, # CYRILLIC CAPITAL LETTER SCHWA + 0x00ad: 0x04ef, # CYRILLIC SMALL LETTER U WITH MACRON + 0x00af: 0x049c, # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE + 0x00b1: 0x04b1, # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE + 0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00b4: 0x0499, # CYRILLIC SMALL LETTER ZE WITH DESCENDER + 0x00b5: 0x04e9, # CYRILLIC SMALL LETTER BARRED O + 0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO + 0x00b9: 0x2116, # NUMERO SIGN + 0x00ba: 0x04d9, # CYRILLIC SMALL LETTER SCHWA + 0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE + 0x00bd: 0x04aa, # CYRILLIC CAPITAL LETTER ES WITH DESCENDER + 0x00be: 0x04ab, # CYRILLIC SMALL LETTER ES WITH DESCENDER + 0x00bf: 0x049d, # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE + 0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A + 0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE + 0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE + 0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE + 0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE + 0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE + 0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE + 0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I + 0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I + 0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA + 0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL + 0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM + 0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN + 0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O + 0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE + 0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER + 0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES + 0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE + 0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U + 0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF + 0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA + 0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE + 0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE + 0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA + 0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA + 0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN + 0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU + 0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN + 0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E + 0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU + 0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA + 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A + 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE + 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE + 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE + 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE + 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE + 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE + 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE + 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I + 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I + 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA + 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL + 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM + 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN + 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O + 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE + 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER + 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES + 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE + 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U + 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF + 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA + 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE + 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE + 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA + 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA + 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN + 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU + 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN + 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E + 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU + 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA +}) + +### Encoding Map + +encoding_map = codecs.make_encoding_map(decoding_map)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/punycode.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,238 @@ +# -*- coding: iso-8859-1 -*- +""" Codec for the Punicode encoding, as specified in RFC 3492 + +Written by Martin v. Löwis. +""" + +import codecs + +##################### Encoding ##################################### + +def segregate(str): + """3.1 Basic code point segregation""" + base = [] + extended = {} + for c in str: + if ord(c) < 128: + base.append(c) + else: + extended[c] = 1 + extended = extended.keys() + extended.sort() + return "".join(base).encode("ascii"),extended + +def selective_len(str, max): + """Return the length of str, considering only characters below max.""" + res = 0 + for c in str: + if ord(c) < max: + res += 1 + return res + +def selective_find(str, char, index, pos): + """Return a pair (index, pos), indicating the next occurrence of + char in str. index is the position of the character considering + only ordinals up to and including char, and pos is the position in + the full string. index/pos is the starting position in the full + string.""" + + l = len(str) + while 1: + pos += 1 + if pos == l: + return (-1, -1) + c = str[pos] + if c == char: + return index+1, pos + elif c < char: + index += 1 + +def insertion_unsort(str, extended): + """3.2 Insertion unsort coding""" + oldchar = 0x80 + result = [] + oldindex = -1 + for c in extended: + index = pos = -1 + char = ord(c) + curlen = selective_len(str, char) + delta = (curlen+1) * (char - oldchar) + while 1: + index,pos = selective_find(str,c,index,pos) + if index == -1: + break + delta += index - oldindex + result.append(delta-1) + oldindex = index + delta = 0 + oldchar = char + + return result + +def T(j, bias): + # Punycode parameters: tmin = 1, tmax = 26, base = 36 + res = 36 * (j + 1) - bias + if res < 1: return 1 + if res > 26: return 26 + return res + +digits = "abcdefghijklmnopqrstuvwxyz0123456789" +def generate_generalized_integer(N, bias): + """3.3 Generalized variable-length integers""" + result = [] + j = 0 + while 1: + t = T(j, bias) + if N < t: + result.append(digits[N]) + return result + result.append(digits[t + ((N - t) % (36 - t))]) + N = (N - t) // (36 - t) + j += 1 + +def adapt(delta, first, numchars): + if first: + delta //= 700 + else: + delta //= 2 + delta += delta // numchars + # ((base - tmin) * tmax) // 2 == 455 + divisions = 0 + while delta > 455: + delta = delta // 35 # base - tmin + divisions += 36 + bias = divisions + (36 * delta // (delta + 38)) + return bias + + +def generate_integers(baselen, deltas): + """3.4 Bias adaptation""" + # Punycode parameters: initial bias = 72, damp = 700, skew = 38 + result = [] + bias = 72 + for points, delta in enumerate(deltas): + s = generate_generalized_integer(delta, bias) + result.extend(s) + bias = adapt(delta, points==0, baselen+points+1) + return "".join(result) + +def punycode_encode(text): + base, extended = segregate(text) + base = base.encode("ascii") + deltas = insertion_unsort(text, extended) + extended = generate_integers(len(base), deltas) + if base: + return base + "-" + extended + return extended + +##################### Decoding ##################################### + +def decode_generalized_number(extended, extpos, bias, errors): + """3.3 Generalized variable-length integers""" + result = 0 + w = 1 + j = 0 + while 1: + try: + char = ord(extended[extpos]) + except IndexError: + if errors == "strict": + raise UnicodeError, "incomplete punicode string" + return extpos + 1, None + extpos += 1 + if 0x41 <= char <= 0x5A: # A-Z + digit = char - 0x41 + elif 0x30 <= char <= 0x39: + digit = char - 22 # 0x30-26 + elif errors == "strict": + raise UnicodeError("Invalid extended code point '%s'" + % extended[extpos]) + else: + return extpos, None + t = T(j, bias) + result += digit * w + if digit < t: + return extpos, result + w = w * (36 - t) + j += 1 + + +def insertion_sort(base, extended, errors): + """3.2 Insertion unsort coding""" + char = 0x80 + pos = -1 + bias = 72 + extpos = 0 + while extpos < len(extended): + newpos, delta = decode_generalized_number(extended, extpos, + bias, errors) + if delta is None: + # There was an error in decoding. We can't continue because + # synchronization is lost. + return base + pos += delta+1 + char += pos // (len(base) + 1) + if char > 0x10FFFF: + if errors == "strict": + raise UnicodeError, ("Invalid character U+%x" % char) + char = ord('?') + pos = pos % (len(base) + 1) + base = base[:pos] + unichr(char) + base[pos:] + bias = adapt(delta, (extpos == 0), len(base)) + extpos = newpos + return base + +def punycode_decode(text, errors): + pos = text.rfind("-") + if pos == -1: + base = "" + extended = text + else: + base = text[:pos] + extended = text[pos+1:] + base = unicode(base, "ascii", errors) + extended = extended.upper() + return insertion_sort(base, extended, errors) + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + res = punycode_encode(input) + return res, len(input) + + def decode(self,input,errors='strict'): + if errors not in ('strict', 'replace', 'ignore'): + raise UnicodeError, "Unsupported error handling "+errors + res = punycode_decode(input, errors) + return res, len(input) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return punycode_encode(input) + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + if self.errors not in ('strict', 'replace', 'ignore'): + raise UnicodeError, "Unsupported error handling "+self.errors + return punycode_decode(input, self.errors) + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='punycode', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/quopri_codec.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,76 @@ +"""Codec for quoted-printable encoding. + +Like base64 and rot13, this returns Python strings, not Unicode. +""" + +import codecs, quopri +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +def quopri_encode(input, errors='strict'): + """Encode the input, returning a tuple (output object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + # using str() because of cStringIO's Unicode undesired Unicode behavior. + f = StringIO(str(input)) + g = StringIO() + quopri.encode(f, g, quotetabs=True) + output = g.getvalue() + return (output, len(input)) + +def quopri_decode(input, errors='strict'): + """Decode the input, returning a tuple (output object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + f = StringIO(str(input)) + g = StringIO() + quopri.decode(f, g) + output = g.getvalue() + return (output, len(input)) + +class Codec(codecs.Codec): + + def encode(self, input,errors='strict'): + return quopri_encode(input,errors) + def decode(self, input,errors='strict'): + return quopri_decode(input,errors) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return quopri_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return quopri_decode(input, self.errors)[0] + +class StreamWriter(Codec, codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +# encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='quopri', + encode=quopri_encode, + decode=quopri_decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + _is_text_encoding=False, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/raw_unicode_escape.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,45 @@ +""" Python 'raw-unicode-escape' Codec + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + # Note: Binding these as C functions will result in the class not + # converting them to methods. This is intended. + encode = codecs.raw_unicode_escape_encode + decode = codecs.raw_unicode_escape_decode + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.raw_unicode_escape_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.raw_unicode_escape_decode(input, self.errors)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='raw-unicode-escape', + encode=Codec.encode, + decode=Codec.decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/rot_13.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,119 @@ +#! /usr/bin/python2.7 +""" Python Character Mapping Codec for ROT13. + + See http://ucsub.colorado.edu/~kominek/rot13/ for details. + + Written by Marc-Andre Lemburg (mal@lemburg.com). + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_map) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_map)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_map)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='rot-13', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + _is_text_encoding=False, + ) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0041: 0x004e, + 0x0042: 0x004f, + 0x0043: 0x0050, + 0x0044: 0x0051, + 0x0045: 0x0052, + 0x0046: 0x0053, + 0x0047: 0x0054, + 0x0048: 0x0055, + 0x0049: 0x0056, + 0x004a: 0x0057, + 0x004b: 0x0058, + 0x004c: 0x0059, + 0x004d: 0x005a, + 0x004e: 0x0041, + 0x004f: 0x0042, + 0x0050: 0x0043, + 0x0051: 0x0044, + 0x0052: 0x0045, + 0x0053: 0x0046, + 0x0054: 0x0047, + 0x0055: 0x0048, + 0x0056: 0x0049, + 0x0057: 0x004a, + 0x0058: 0x004b, + 0x0059: 0x004c, + 0x005a: 0x004d, + 0x0061: 0x006e, + 0x0062: 0x006f, + 0x0063: 0x0070, + 0x0064: 0x0071, + 0x0065: 0x0072, + 0x0066: 0x0073, + 0x0067: 0x0074, + 0x0068: 0x0075, + 0x0069: 0x0076, + 0x006a: 0x0077, + 0x006b: 0x0078, + 0x006c: 0x0079, + 0x006d: 0x007a, + 0x006e: 0x0061, + 0x006f: 0x0062, + 0x0070: 0x0063, + 0x0071: 0x0064, + 0x0072: 0x0065, + 0x0073: 0x0066, + 0x0074: 0x0067, + 0x0075: 0x0068, + 0x0076: 0x0069, + 0x0077: 0x006a, + 0x0078: 0x006b, + 0x0079: 0x006c, + 0x007a: 0x006d, +}) + +### Encoding Map + +encoding_map = codecs.make_encoding_map(decoding_map) + +### Filter API + +def rot13(infile, outfile): + outfile.write(infile.read().encode('rot-13')) + +if __name__ == '__main__': + import sys + rot13(sys.stdin, sys.stdout)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/shift_jis.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# shift_jis.py: Python Unicode Codec for SHIFT_JIS +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_jp, codecs +import _multibytecodec as mbc + +codec = _codecs_jp.getcodec('shift_jis') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='shift_jis', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/shift_jis_2004.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_jp, codecs +import _multibytecodec as mbc + +codec = _codecs_jp.getcodec('shift_jis_2004') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='shift_jis_2004', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/shift_jisx0213.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,39 @@ +# +# shift_jisx0213.py: Python Unicode Codec for SHIFT_JISX0213 +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_jp, codecs +import _multibytecodec as mbc + +codec = _codecs_jp.getcodec('shift_jisx0213') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='shift_jisx0213', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/string_escape.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,38 @@ +# -*- coding: iso-8859-1 -*- +""" Python 'escape' Codec + + +Written by Martin v. Löwis (martin@v.loewis.de). + +""" +import codecs + +class Codec(codecs.Codec): + + encode = codecs.escape_encode + decode = codecs.escape_decode + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.escape_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.escape_decode(input, self.errors)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +def getregentry(): + return codecs.CodecInfo( + name='string-escape', + encode=Codec.encode, + decode=Codec.decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/tis_620.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,307 @@ +""" Python Character Mapping Codec tis_620 generated from 'python-mappings/TIS-620.TXT' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_table) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_table) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_table)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_table)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='tis-620', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +### Decoding Table + +decoding_table = ( + u'\x00' # 0x00 -> NULL + u'\x01' # 0x01 -> START OF HEADING + u'\x02' # 0x02 -> START OF TEXT + u'\x03' # 0x03 -> END OF TEXT + u'\x04' # 0x04 -> END OF TRANSMISSION + u'\x05' # 0x05 -> ENQUIRY + u'\x06' # 0x06 -> ACKNOWLEDGE + u'\x07' # 0x07 -> BELL + u'\x08' # 0x08 -> BACKSPACE + u'\t' # 0x09 -> HORIZONTAL TABULATION + u'\n' # 0x0A -> LINE FEED + u'\x0b' # 0x0B -> VERTICAL TABULATION + u'\x0c' # 0x0C -> FORM FEED + u'\r' # 0x0D -> CARRIAGE RETURN + u'\x0e' # 0x0E -> SHIFT OUT + u'\x0f' # 0x0F -> SHIFT IN + u'\x10' # 0x10 -> DATA LINK ESCAPE + u'\x11' # 0x11 -> DEVICE CONTROL ONE + u'\x12' # 0x12 -> DEVICE CONTROL TWO + u'\x13' # 0x13 -> DEVICE CONTROL THREE + u'\x14' # 0x14 -> DEVICE CONTROL FOUR + u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + u'\x16' # 0x16 -> SYNCHRONOUS IDLE + u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK + u'\x18' # 0x18 -> CANCEL + u'\x19' # 0x19 -> END OF MEDIUM + u'\x1a' # 0x1A -> SUBSTITUTE + u'\x1b' # 0x1B -> ESCAPE + u'\x1c' # 0x1C -> FILE SEPARATOR + u'\x1d' # 0x1D -> GROUP SEPARATOR + u'\x1e' # 0x1E -> RECORD SEPARATOR + u'\x1f' # 0x1F -> UNIT SEPARATOR + u' ' # 0x20 -> SPACE + u'!' # 0x21 -> EXCLAMATION MARK + u'"' # 0x22 -> QUOTATION MARK + u'#' # 0x23 -> NUMBER SIGN + u'$' # 0x24 -> DOLLAR SIGN + u'%' # 0x25 -> PERCENT SIGN + u'&' # 0x26 -> AMPERSAND + u"'" # 0x27 -> APOSTROPHE + u'(' # 0x28 -> LEFT PARENTHESIS + u')' # 0x29 -> RIGHT PARENTHESIS + u'*' # 0x2A -> ASTERISK + u'+' # 0x2B -> PLUS SIGN + u',' # 0x2C -> COMMA + u'-' # 0x2D -> HYPHEN-MINUS + u'.' # 0x2E -> FULL STOP + u'/' # 0x2F -> SOLIDUS + u'0' # 0x30 -> DIGIT ZERO + u'1' # 0x31 -> DIGIT ONE + u'2' # 0x32 -> DIGIT TWO + u'3' # 0x33 -> DIGIT THREE + u'4' # 0x34 -> DIGIT FOUR + u'5' # 0x35 -> DIGIT FIVE + u'6' # 0x36 -> DIGIT SIX + u'7' # 0x37 -> DIGIT SEVEN + u'8' # 0x38 -> DIGIT EIGHT + u'9' # 0x39 -> DIGIT NINE + u':' # 0x3A -> COLON + u';' # 0x3B -> SEMICOLON + u'<' # 0x3C -> LESS-THAN SIGN + u'=' # 0x3D -> EQUALS SIGN + u'>' # 0x3E -> GREATER-THAN SIGN + u'?' # 0x3F -> QUESTION MARK + u'@' # 0x40 -> COMMERCIAL AT + u'A' # 0x41 -> LATIN CAPITAL LETTER A + u'B' # 0x42 -> LATIN CAPITAL LETTER B + u'C' # 0x43 -> LATIN CAPITAL LETTER C + u'D' # 0x44 -> LATIN CAPITAL LETTER D + u'E' # 0x45 -> LATIN CAPITAL LETTER E + u'F' # 0x46 -> LATIN CAPITAL LETTER F + u'G' # 0x47 -> LATIN CAPITAL LETTER G + u'H' # 0x48 -> LATIN CAPITAL LETTER H + u'I' # 0x49 -> LATIN CAPITAL LETTER I + u'J' # 0x4A -> LATIN CAPITAL LETTER J + u'K' # 0x4B -> LATIN CAPITAL LETTER K + u'L' # 0x4C -> LATIN CAPITAL LETTER L + u'M' # 0x4D -> LATIN CAPITAL LETTER M + u'N' # 0x4E -> LATIN CAPITAL LETTER N + u'O' # 0x4F -> LATIN CAPITAL LETTER O + u'P' # 0x50 -> LATIN CAPITAL LETTER P + u'Q' # 0x51 -> LATIN CAPITAL LETTER Q + u'R' # 0x52 -> LATIN CAPITAL LETTER R + u'S' # 0x53 -> LATIN CAPITAL LETTER S + u'T' # 0x54 -> LATIN CAPITAL LETTER T + u'U' # 0x55 -> LATIN CAPITAL LETTER U + u'V' # 0x56 -> LATIN CAPITAL LETTER V + u'W' # 0x57 -> LATIN CAPITAL LETTER W + u'X' # 0x58 -> LATIN CAPITAL LETTER X + u'Y' # 0x59 -> LATIN CAPITAL LETTER Y + u'Z' # 0x5A -> LATIN CAPITAL LETTER Z + u'[' # 0x5B -> LEFT SQUARE BRACKET + u'\\' # 0x5C -> REVERSE SOLIDUS + u']' # 0x5D -> RIGHT SQUARE BRACKET + u'^' # 0x5E -> CIRCUMFLEX ACCENT + u'_' # 0x5F -> LOW LINE + u'`' # 0x60 -> GRAVE ACCENT + u'a' # 0x61 -> LATIN SMALL LETTER A + u'b' # 0x62 -> LATIN SMALL LETTER B + u'c' # 0x63 -> LATIN SMALL LETTER C + u'd' # 0x64 -> LATIN SMALL LETTER D + u'e' # 0x65 -> LATIN SMALL LETTER E + u'f' # 0x66 -> LATIN SMALL LETTER F + u'g' # 0x67 -> LATIN SMALL LETTER G + u'h' # 0x68 -> LATIN SMALL LETTER H + u'i' # 0x69 -> LATIN SMALL LETTER I + u'j' # 0x6A -> LATIN SMALL LETTER J + u'k' # 0x6B -> LATIN SMALL LETTER K + u'l' # 0x6C -> LATIN SMALL LETTER L + u'm' # 0x6D -> LATIN SMALL LETTER M + u'n' # 0x6E -> LATIN SMALL LETTER N + u'o' # 0x6F -> LATIN SMALL LETTER O + u'p' # 0x70 -> LATIN SMALL LETTER P + u'q' # 0x71 -> LATIN SMALL LETTER Q + u'r' # 0x72 -> LATIN SMALL LETTER R + u's' # 0x73 -> LATIN SMALL LETTER S + u't' # 0x74 -> LATIN SMALL LETTER T + u'u' # 0x75 -> LATIN SMALL LETTER U + u'v' # 0x76 -> LATIN SMALL LETTER V + u'w' # 0x77 -> LATIN SMALL LETTER W + u'x' # 0x78 -> LATIN SMALL LETTER X + u'y' # 0x79 -> LATIN SMALL LETTER Y + u'z' # 0x7A -> LATIN SMALL LETTER Z + u'{' # 0x7B -> LEFT CURLY BRACKET + u'|' # 0x7C -> VERTICAL LINE + u'}' # 0x7D -> RIGHT CURLY BRACKET + u'~' # 0x7E -> TILDE + u'\x7f' # 0x7F -> DELETE + u'\x80' # 0x80 -> <control> + u'\x81' # 0x81 -> <control> + u'\x82' # 0x82 -> <control> + u'\x83' # 0x83 -> <control> + u'\x84' # 0x84 -> <control> + u'\x85' # 0x85 -> <control> + u'\x86' # 0x86 -> <control> + u'\x87' # 0x87 -> <control> + u'\x88' # 0x88 -> <control> + u'\x89' # 0x89 -> <control> + u'\x8a' # 0x8A -> <control> + u'\x8b' # 0x8B -> <control> + u'\x8c' # 0x8C -> <control> + u'\x8d' # 0x8D -> <control> + u'\x8e' # 0x8E -> <control> + u'\x8f' # 0x8F -> <control> + u'\x90' # 0x90 -> <control> + u'\x91' # 0x91 -> <control> + u'\x92' # 0x92 -> <control> + u'\x93' # 0x93 -> <control> + u'\x94' # 0x94 -> <control> + u'\x95' # 0x95 -> <control> + u'\x96' # 0x96 -> <control> + u'\x97' # 0x97 -> <control> + u'\x98' # 0x98 -> <control> + u'\x99' # 0x99 -> <control> + u'\x9a' # 0x9A -> <control> + u'\x9b' # 0x9B -> <control> + u'\x9c' # 0x9C -> <control> + u'\x9d' # 0x9D -> <control> + u'\x9e' # 0x9E -> <control> + u'\x9f' # 0x9F -> <control> + u'\ufffe' + u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI + u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI + u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT + u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI + u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON + u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG + u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU + u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN + u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING + u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG + u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO + u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE + u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING + u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA + u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK + u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN + u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO + u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO + u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN + u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK + u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO + u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG + u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN + u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG + u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU + u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI + u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA + u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG + u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA + u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN + u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN + u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO + u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA + u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK + u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA + u'\u0e24' # 0xC4 -> THAI CHARACTER RU + u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING + u'\u0e26' # 0xC6 -> THAI CHARACTER LU + u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN + u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA + u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI + u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA + u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP + u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA + u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG + u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK + u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI + u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A + u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT + u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA + u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM + u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I + u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II + u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE + u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE + u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U + u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU + u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT + u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E + u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE + u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O + u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN + u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI + u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO + u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK + u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU + u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK + u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO + u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI + u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA + u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT + u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT + u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN + u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN + u'\u0e50' # 0xF0 -> THAI DIGIT ZERO + u'\u0e51' # 0xF1 -> THAI DIGIT ONE + u'\u0e52' # 0xF2 -> THAI DIGIT TWO + u'\u0e53' # 0xF3 -> THAI DIGIT THREE + u'\u0e54' # 0xF4 -> THAI DIGIT FOUR + u'\u0e55' # 0xF5 -> THAI DIGIT FIVE + u'\u0e56' # 0xF6 -> THAI DIGIT SIX + u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN + u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT + u'\u0e59' # 0xF9 -> THAI DIGIT NINE + u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU + u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT + u'\ufffe' + u'\ufffe' + u'\ufffe' + u'\ufffe' +) + +### Encoding table +encoding_table=codecs.charmap_build(decoding_table)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/undefined.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,49 @@ +""" Python 'undefined' Codec + + This codec will always raise a ValueError exception when being + used. It is intended for use by the site.py file to switch off + automatic string to Unicode coercion. + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + raise UnicodeError("undefined encoding") + + def decode(self,input,errors='strict'): + raise UnicodeError("undefined encoding") + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + raise UnicodeError("undefined encoding") + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + raise UnicodeError("undefined encoding") + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='undefined', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/unicode_escape.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,45 @@ +""" Python 'unicode-escape' Codec + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + # Note: Binding these as C functions will result in the class not + # converting them to methods. This is intended. + encode = codecs.unicode_escape_encode + decode = codecs.unicode_escape_decode + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.unicode_escape_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.unicode_escape_decode(input, self.errors)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='unicode-escape', + encode=Codec.encode, + decode=Codec.decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/unicode_internal.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,45 @@ +""" Python 'unicode-internal' Codec + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + # Note: Binding these as C functions will result in the class not + # converting them to methods. This is intended. + encode = codecs.unicode_internal_encode + decode = codecs.unicode_internal_decode + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.unicode_internal_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.unicode_internal_decode(input, self.errors)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='unicode-internal', + encode=Codec.encode, + decode=Codec.decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/utf_16.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,126 @@ +""" Python 'utf-16' Codec + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +import codecs, sys + +### Codec APIs + +encode = codecs.utf_16_encode + +def decode(input, errors='strict'): + return codecs.utf_16_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def __init__(self, errors='strict'): + codecs.IncrementalEncoder.__init__(self, errors) + self.encoder = None + + def encode(self, input, final=False): + if self.encoder is None: + result = codecs.utf_16_encode(input, self.errors)[0] + if sys.byteorder == 'little': + self.encoder = codecs.utf_16_le_encode + else: + self.encoder = codecs.utf_16_be_encode + return result + return self.encoder(input, self.errors)[0] + + def reset(self): + codecs.IncrementalEncoder.reset(self) + self.encoder = None + + def getstate(self): + # state info we return to the caller: + # 0: stream is in natural order for this platform + # 2: endianness hasn't been determined yet + # (we're never writing in unnatural order) + return (2 if self.encoder is None else 0) + + def setstate(self, state): + if state: + self.encoder = None + else: + if sys.byteorder == 'little': + self.encoder = codecs.utf_16_le_encode + else: + self.encoder = codecs.utf_16_be_encode + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def __init__(self, errors='strict'): + codecs.BufferedIncrementalDecoder.__init__(self, errors) + self.decoder = None + + def _buffer_decode(self, input, errors, final): + if self.decoder is None: + (output, consumed, byteorder) = \ + codecs.utf_16_ex_decode(input, errors, 0, final) + if byteorder == -1: + self.decoder = codecs.utf_16_le_decode + elif byteorder == 1: + self.decoder = codecs.utf_16_be_decode + elif consumed >= 2: + raise UnicodeError("UTF-16 stream does not start with BOM") + return (output, consumed) + return self.decoder(input, self.errors, final) + + def reset(self): + codecs.BufferedIncrementalDecoder.reset(self) + self.decoder = None + +class StreamWriter(codecs.StreamWriter): + def __init__(self, stream, errors='strict'): + codecs.StreamWriter.__init__(self, stream, errors) + self.encoder = None + + def reset(self): + codecs.StreamWriter.reset(self) + self.encoder = None + + def encode(self, input, errors='strict'): + if self.encoder is None: + result = codecs.utf_16_encode(input, errors) + if sys.byteorder == 'little': + self.encoder = codecs.utf_16_le_encode + else: + self.encoder = codecs.utf_16_be_encode + return result + else: + return self.encoder(input, errors) + +class StreamReader(codecs.StreamReader): + + def reset(self): + codecs.StreamReader.reset(self) + try: + del self.decode + except AttributeError: + pass + + def decode(self, input, errors='strict'): + (object, consumed, byteorder) = \ + codecs.utf_16_ex_decode(input, errors, 0, False) + if byteorder == -1: + self.decode = codecs.utf_16_le_decode + elif byteorder == 1: + self.decode = codecs.utf_16_be_decode + elif consumed>=2: + raise UnicodeError,"UTF-16 stream does not start with BOM" + return (object, consumed) + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-16', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/utf_16_be.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,42 @@ +""" Python 'utf-16-be' Codec + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +import codecs + +### Codec APIs + +encode = codecs.utf_16_be_encode + +def decode(input, errors='strict'): + return codecs.utf_16_be_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.utf_16_be_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + _buffer_decode = codecs.utf_16_be_decode + +class StreamWriter(codecs.StreamWriter): + encode = codecs.utf_16_be_encode + +class StreamReader(codecs.StreamReader): + decode = codecs.utf_16_be_decode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-16-be', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/utf_16_le.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,42 @@ +""" Python 'utf-16-le' Codec + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +import codecs + +### Codec APIs + +encode = codecs.utf_16_le_encode + +def decode(input, errors='strict'): + return codecs.utf_16_le_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.utf_16_le_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + _buffer_decode = codecs.utf_16_le_decode + +class StreamWriter(codecs.StreamWriter): + encode = codecs.utf_16_le_encode + +class StreamReader(codecs.StreamReader): + decode = codecs.utf_16_le_decode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-16-le', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/utf_32.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,150 @@ +""" +Python 'utf-32' Codec +""" +import codecs, sys + +### Codec APIs + +encode = codecs.utf_32_encode + +def decode(input, errors='strict'): + return codecs.utf_32_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def __init__(self, errors='strict'): + codecs.IncrementalEncoder.__init__(self, errors) + self.encoder = None + + def encode(self, input, final=False): + if self.encoder is None: + result = codecs.utf_32_encode(input, self.errors)[0] + if sys.byteorder == 'little': + self.encoder = codecs.utf_32_le_encode + else: + self.encoder = codecs.utf_32_be_encode + return result + return self.encoder(input, self.errors)[0] + + def reset(self): + codecs.IncrementalEncoder.reset(self) + self.encoder = None + + def getstate(self): + # state info we return to the caller: + # 0: stream is in natural order for this platform + # 2: endianness hasn't been determined yet + # (we're never writing in unnatural order) + return (2 if self.encoder is None else 0) + + def setstate(self, state): + if state: + self.encoder = None + else: + if sys.byteorder == 'little': + self.encoder = codecs.utf_32_le_encode + else: + self.encoder = codecs.utf_32_be_encode + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def __init__(self, errors='strict'): + codecs.BufferedIncrementalDecoder.__init__(self, errors) + self.decoder = None + + def _buffer_decode(self, input, errors, final): + if self.decoder is None: + (output, consumed, byteorder) = \ + codecs.utf_32_ex_decode(input, errors, 0, final) + if byteorder == -1: + self.decoder = codecs.utf_32_le_decode + elif byteorder == 1: + self.decoder = codecs.utf_32_be_decode + elif consumed >= 4: + raise UnicodeError("UTF-32 stream does not start with BOM") + return (output, consumed) + return self.decoder(input, self.errors, final) + + def reset(self): + codecs.BufferedIncrementalDecoder.reset(self) + self.decoder = None + + def getstate(self): + # additional state info from the base class must be None here, + # as it isn't passed along to the caller + state = codecs.BufferedIncrementalDecoder.getstate(self)[0] + # additional state info we pass to the caller: + # 0: stream is in natural order for this platform + # 1: stream is in unnatural order + # 2: endianness hasn't been determined yet + if self.decoder is None: + return (state, 2) + addstate = int((sys.byteorder == "big") != + (self.decoder is codecs.utf_32_be_decode)) + return (state, addstate) + + def setstate(self, state): + # state[1] will be ignored by BufferedIncrementalDecoder.setstate() + codecs.BufferedIncrementalDecoder.setstate(self, state) + state = state[1] + if state == 0: + self.decoder = (codecs.utf_32_be_decode + if sys.byteorder == "big" + else codecs.utf_32_le_decode) + elif state == 1: + self.decoder = (codecs.utf_32_le_decode + if sys.byteorder == "big" + else codecs.utf_32_be_decode) + else: + self.decoder = None + +class StreamWriter(codecs.StreamWriter): + def __init__(self, stream, errors='strict'): + self.encoder = None + codecs.StreamWriter.__init__(self, stream, errors) + + def reset(self): + codecs.StreamWriter.reset(self) + self.encoder = None + + def encode(self, input, errors='strict'): + if self.encoder is None: + result = codecs.utf_32_encode(input, errors) + if sys.byteorder == 'little': + self.encoder = codecs.utf_32_le_encode + else: + self.encoder = codecs.utf_32_be_encode + return result + else: + return self.encoder(input, errors) + +class StreamReader(codecs.StreamReader): + + def reset(self): + codecs.StreamReader.reset(self) + try: + del self.decode + except AttributeError: + pass + + def decode(self, input, errors='strict'): + (object, consumed, byteorder) = \ + codecs.utf_32_ex_decode(input, errors, 0, False) + if byteorder == -1: + self.decode = codecs.utf_32_le_decode + elif byteorder == 1: + self.decode = codecs.utf_32_be_decode + elif consumed>=4: + raise UnicodeError,"UTF-32 stream does not start with BOM" + return (object, consumed) + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-32', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/utf_32_be.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,37 @@ +""" +Python 'utf-32-be' Codec +""" +import codecs + +### Codec APIs + +encode = codecs.utf_32_be_encode + +def decode(input, errors='strict'): + return codecs.utf_32_be_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.utf_32_be_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + _buffer_decode = codecs.utf_32_be_decode + +class StreamWriter(codecs.StreamWriter): + encode = codecs.utf_32_be_encode + +class StreamReader(codecs.StreamReader): + decode = codecs.utf_32_be_decode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-32-be', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/utf_32_le.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,37 @@ +""" +Python 'utf-32-le' Codec +""" +import codecs + +### Codec APIs + +encode = codecs.utf_32_le_encode + +def decode(input, errors='strict'): + return codecs.utf_32_le_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.utf_32_le_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + _buffer_decode = codecs.utf_32_le_decode + +class StreamWriter(codecs.StreamWriter): + encode = codecs.utf_32_le_encode + +class StreamReader(codecs.StreamReader): + decode = codecs.utf_32_le_decode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-32-le', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/utf_7.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,38 @@ +""" Python 'utf-7' Codec + +Written by Brian Quinlan (brian@sweetapp.com). +""" +import codecs + +### Codec APIs + +encode = codecs.utf_7_encode + +def decode(input, errors='strict'): + return codecs.utf_7_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.utf_7_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + _buffer_decode = codecs.utf_7_decode + +class StreamWriter(codecs.StreamWriter): + encode = codecs.utf_7_encode + +class StreamReader(codecs.StreamReader): + decode = codecs.utf_7_decode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-7', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/utf_8.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,42 @@ +""" Python 'utf-8' Codec + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +import codecs + +### Codec APIs + +encode = codecs.utf_8_encode + +def decode(input, errors='strict'): + return codecs.utf_8_decode(input, errors, True) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.utf_8_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + _buffer_decode = codecs.utf_8_decode + +class StreamWriter(codecs.StreamWriter): + encode = codecs.utf_8_encode + +class StreamReader(codecs.StreamReader): + decode = codecs.utf_8_decode + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-8', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/utf_8_sig.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,117 @@ +""" Python 'utf-8-sig' Codec +This work similar to UTF-8 with the following changes: + +* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the + first three bytes. + +* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these + bytes will be skipped. +""" +import codecs + +### Codec APIs + +def encode(input, errors='strict'): + return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input)) + +def decode(input, errors='strict'): + prefix = 0 + if input[:3] == codecs.BOM_UTF8: + input = input[3:] + prefix = 3 + (output, consumed) = codecs.utf_8_decode(input, errors, True) + return (output, consumed+prefix) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def __init__(self, errors='strict'): + codecs.IncrementalEncoder.__init__(self, errors) + self.first = 1 + + def encode(self, input, final=False): + if self.first: + self.first = 0 + return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0] + else: + return codecs.utf_8_encode(input, self.errors)[0] + + def reset(self): + codecs.IncrementalEncoder.reset(self) + self.first = 1 + + def getstate(self): + return self.first + + def setstate(self, state): + self.first = state + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def __init__(self, errors='strict'): + codecs.BufferedIncrementalDecoder.__init__(self, errors) + self.first = True + + def _buffer_decode(self, input, errors, final): + if self.first: + if len(input) < 3: + if codecs.BOM_UTF8.startswith(input): + # not enough data to decide if this really is a BOM + # => try again on the next call + return (u"", 0) + else: + self.first = None + else: + self.first = None + if input[:3] == codecs.BOM_UTF8: + (output, consumed) = codecs.utf_8_decode(input[3:], errors, final) + return (output, consumed+3) + return codecs.utf_8_decode(input, errors, final) + + def reset(self): + codecs.BufferedIncrementalDecoder.reset(self) + self.first = True + +class StreamWriter(codecs.StreamWriter): + def reset(self): + codecs.StreamWriter.reset(self) + try: + del self.encode + except AttributeError: + pass + + def encode(self, input, errors='strict'): + self.encode = codecs.utf_8_encode + return encode(input, errors) + +class StreamReader(codecs.StreamReader): + def reset(self): + codecs.StreamReader.reset(self) + try: + del self.decode + except AttributeError: + pass + + def decode(self, input, errors='strict'): + if len(input) < 3: + if codecs.BOM_UTF8.startswith(input): + # not enough data to decide if this is a BOM + # => try again on the next call + return (u"", 0) + elif input[:3] == codecs.BOM_UTF8: + self.decode = codecs.utf_8_decode + (output, consumed) = codecs.utf_8_decode(input[3:],errors) + return (output, consumed+3) + # (else) no BOM present + self.decode = codecs.utf_8_decode + return codecs.utf_8_decode(input, errors) + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='utf-8-sig', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/uu_codec.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,130 @@ +""" Python 'uu_codec' Codec - UU content transfer encoding + + Unlike most of the other codecs which target Unicode, this codec + will return Python string objects for both encode and decode. + + Written by Marc-Andre Lemburg (mal@lemburg.com). Some details were + adapted from uu.py which was written by Lance Ellinghouse and + modified by Jack Jansen and Fredrik Lundh. + +""" +import codecs, binascii + +### Codec APIs + +def uu_encode(input,errors='strict',filename='<data>',mode=0666): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + from cStringIO import StringIO + from binascii import b2a_uu + # using str() because of cStringIO's Unicode undesired Unicode behavior. + infile = StringIO(str(input)) + outfile = StringIO() + read = infile.read + write = outfile.write + + # Encode + write('begin %o %s\n' % (mode & 0777, filename)) + chunk = read(45) + while chunk: + write(b2a_uu(chunk)) + chunk = read(45) + write(' \nend\n') + + return (outfile.getvalue(), len(input)) + +def uu_decode(input,errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + Note: filename and file mode information in the input data is + ignored. + + """ + assert errors == 'strict' + from cStringIO import StringIO + from binascii import a2b_uu + infile = StringIO(str(input)) + outfile = StringIO() + readline = infile.readline + write = outfile.write + + # Find start of encoded data + while 1: + s = readline() + if not s: + raise ValueError, 'Missing "begin" line in input data' + if s[:5] == 'begin': + break + + # Decode + while 1: + s = readline() + if not s or \ + s == 'end\n': + break + try: + data = a2b_uu(s) + except binascii.Error, v: + # Workaround for broken uuencoders by /Fredrik Lundh + nbytes = (((ord(s[0])-32) & 63) * 4 + 5) // 3 + data = a2b_uu(s[:nbytes]) + #sys.stderr.write("Warning: %s\n" % str(v)) + write(data) + if not s: + raise ValueError, 'Truncated input data' + + return (outfile.getvalue(), len(input)) + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return uu_encode(input,errors) + + def decode(self,input,errors='strict'): + return uu_decode(input,errors) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return uu_encode(input, self.errors)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return uu_decode(input, self.errors)[0] + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='uu', + encode=uu_encode, + decode=uu_decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + _is_text_encoding=False, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/encodings/zlib_codec.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,103 @@ +""" Python 'zlib_codec' Codec - zlib compression encoding + + Unlike most of the other codecs which target Unicode, this codec + will return Python string objects for both encode and decode. + + Written by Marc-Andre Lemburg (mal@lemburg.com). + +""" +import codecs +import zlib # this codec needs the optional zlib module ! + +### Codec APIs + +def zlib_encode(input,errors='strict'): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = zlib.compress(input) + return (output, len(input)) + +def zlib_decode(input,errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = zlib.decompress(input) + return (output, len(input)) + +class Codec(codecs.Codec): + + def encode(self, input, errors='strict'): + return zlib_encode(input, errors) + def decode(self, input, errors='strict'): + return zlib_decode(input, errors) + +class IncrementalEncoder(codecs.IncrementalEncoder): + def __init__(self, errors='strict'): + assert errors == 'strict' + self.errors = errors + self.compressobj = zlib.compressobj() + + def encode(self, input, final=False): + if final: + c = self.compressobj.compress(input) + return c + self.compressobj.flush() + else: + return self.compressobj.compress(input) + + def reset(self): + self.compressobj = zlib.compressobj() + +class IncrementalDecoder(codecs.IncrementalDecoder): + def __init__(self, errors='strict'): + assert errors == 'strict' + self.errors = errors + self.decompressobj = zlib.decompressobj() + + def decode(self, input, final=False): + if final: + c = self.decompressobj.decompress(input) + return c + self.decompressobj.flush() + else: + return self.decompressobj.decompress(input) + + def reset(self): + self.decompressobj = zlib.decompressobj() + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='zlib', + encode=zlib_encode, + decode=zlib_decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + _is_text_encoding=False, + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/fnmatch.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,120 @@ +"""Filename matching with shell patterns. + +fnmatch(FILENAME, PATTERN) matches according to the local convention. +fnmatchcase(FILENAME, PATTERN) always takes case in account. + +The functions operate by translating the pattern into a regular +expression. They cache the compiled regular expressions for speed. + +The function translate(PATTERN) returns a regular expression +corresponding to PATTERN. (It does not compile it.) +""" + +import re + +__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] + +_cache = {} +_MAXCACHE = 100 + +def _purge(): + """Clear the pattern cache""" + _cache.clear() + +def fnmatch(name, pat): + """Test whether FILENAME matches PATTERN. + + Patterns are Unix shell style: + + * matches everything + ? matches any single character + [seq] matches any character in seq + [!seq] matches any char not in seq + + An initial period in FILENAME is not special. + Both FILENAME and PATTERN are first case-normalized + if the operating system requires it. + If you don't want this, use fnmatchcase(FILENAME, PATTERN). + """ + + import os + name = os.path.normcase(name) + pat = os.path.normcase(pat) + return fnmatchcase(name, pat) + +def filter(names, pat): + """Return the subset of the list NAMES that match PAT""" + import os,posixpath + result=[] + pat=os.path.normcase(pat) + try: + re_pat = _cache[pat] + except KeyError: + res = translate(pat) + if len(_cache) >= _MAXCACHE: + _cache.clear() + _cache[pat] = re_pat = re.compile(res) + match = re_pat.match + if os.path is posixpath: + # normcase on posix is NOP. Optimize it away from the loop. + for name in names: + if match(name): + result.append(name) + else: + for name in names: + if match(os.path.normcase(name)): + result.append(name) + return result + +def fnmatchcase(name, pat): + """Test whether FILENAME matches PATTERN, including case. + + This is a version of fnmatch() which doesn't case-normalize + its arguments. + """ + + try: + re_pat = _cache[pat] + except KeyError: + res = translate(pat) + if len(_cache) >= _MAXCACHE: + _cache.clear() + _cache[pat] = re_pat = re.compile(res) + return re_pat.match(name) is not None + +def translate(pat): + """Translate a shell PATTERN to a regular expression. + + There is no way to quote meta-characters. + """ + + i, n = 0, len(pat) + res = '' + while i < n: + c = pat[i] + i = i+1 + if c == '*': + res = res + '.*' + elif c == '?': + res = res + '.' + elif c == '[': + j = i + if j < n and pat[j] == '!': + j = j+1 + if j < n and pat[j] == ']': + j = j+1 + while j < n and pat[j] != ']': + j = j+1 + if j >= n: + res = res + '\\[' + else: + stuff = pat[i:j].replace('\\','\\\\') + i = j+1 + if stuff[0] == '!': + stuff = '^' + stuff[1:] + elif stuff[0] == '^': + stuff = '\\' + stuff + res = '%s[%s]' % (res, stuff) + else: + res = res + re.escape(c) + return res + '\Z(?ms)'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/genericpath.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,113 @@ +""" +Path operations common to more than one OS +Do not use directly. The OS specific modules import the appropriate +functions from this module themselves. +""" +import os +import stat + +__all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime', + 'getsize', 'isdir', 'isfile'] + + +try: + _unicode = unicode +except NameError: + # If Python is built without Unicode support, the unicode type + # will not exist. Fake one. + class _unicode(object): + pass + +# Does a path exist? +# This is false for dangling symbolic links on systems that support them. +def exists(path): + """Test whether a path exists. Returns False for broken symbolic links""" + try: + os.stat(path) + except os.error: + return False + return True + + +# This follows symbolic links, so both islink() and isdir() can be true +# for the same path on systems that support symlinks +def isfile(path): + """Test whether a path is a regular file""" + try: + st = os.stat(path) + except os.error: + return False + return stat.S_ISREG(st.st_mode) + + +# Is a path a directory? +# This follows symbolic links, so both islink() and isdir() +# can be true for the same path on systems that support symlinks +def isdir(s): + """Return true if the pathname refers to an existing directory.""" + try: + st = os.stat(s) + except os.error: + return False + return stat.S_ISDIR(st.st_mode) + + +def getsize(filename): + """Return the size of a file, reported by os.stat().""" + return os.stat(filename).st_size + + +def getmtime(filename): + """Return the last modification time of a file, reported by os.stat().""" + return os.stat(filename).st_mtime + + +def getatime(filename): + """Return the last access time of a file, reported by os.stat().""" + return os.stat(filename).st_atime + + +def getctime(filename): + """Return the metadata change time of a file, reported by os.stat().""" + return os.stat(filename).st_ctime + + +# Return the longest prefix of all list elements. +def commonprefix(m): + "Given a list of pathnames, returns the longest common leading component" + if not m: return '' + s1 = min(m) + s2 = max(m) + for i, c in enumerate(s1): + if c != s2[i]: + return s1[:i] + return s1 + +# Split a path in root and extension. +# The extension is everything starting at the last dot in the last +# pathname component; the root is everything before that. +# It is always true that root + ext == p. + +# Generic implementation of splitext, to be parametrized with +# the separators +def _splitext(p, sep, altsep, extsep): + """Split the extension from a pathname. + + Extension is everything from the last dot to the end, ignoring + leading dots. Returns "(root, ext)"; ext may be empty.""" + + sepIndex = p.rfind(sep) + if altsep: + altsepIndex = p.rfind(altsep) + sepIndex = max(sepIndex, altsepIndex) + + dotIndex = p.rfind(extsep) + if dotIndex > sepIndex: + # skip all leading dots + filenameIndex = sepIndex + 1 + while filenameIndex < dotIndex: + if p[filenameIndex] != extsep: + return p[:dotIndex], p[dotIndex:] + filenameIndex += 1 + + return p, ''
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/lib-dynload/Python-2.7.egg-info Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,31 @@ +Metadata-Version: 1.1 +Name: Python +Version: 2.7.12 +Summary: A high-level object-oriented programming language +Home-page: http://www.python.org/2.7 +Author: Guido van Rossum and the Python community +Author-email: python-dev@python.org +License: PSF license +Description: Python is an interpreted, interactive, object-oriented programming + language. It is often compared to Tcl, Perl, Scheme or Java. + + Python combines remarkable power with very clear syntax. It has + modules, classes, exceptions, very high level dynamic data types, and + dynamic typing. There are interfaces to many system calls and + libraries, as well as to various windowing systems (X11, Motif, Tk, + Mac, MFC). New built-in modules are easily written in C or C++. Python + is also usable as an extension language for applications that need a + programmable interface. + + The Python implementation is portable: it runs on many brands of UNIX, + on Windows, DOS, OS/2, Mac, Amiga... If your favorite system isn't + listed here, it may still be supported, if there's a C compiler for + it. Ask around on comp.lang.python -- or just try compiling Python + yourself. +Platform: Many +Classifier: Development Status :: 6 - Mature +Classifier: License :: OSI Approved :: Python Software Foundation License +Classifier: Natural Language :: English +Classifier: Programming Language :: C +Classifier: Programming Language :: Python +Classifier: Topic :: Software Development
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/linecache.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,139 @@ +"""Cache lines from files. + +This is intended to read lines from modules imported -- hence if a filename +is not found, it will look down the module search path for a file by +that name. +""" + +import sys +import os + +__all__ = ["getline", "clearcache", "checkcache"] + +def getline(filename, lineno, module_globals=None): + lines = getlines(filename, module_globals) + if 1 <= lineno <= len(lines): + return lines[lineno-1] + else: + return '' + + +# The cache + +cache = {} # The cache + + +def clearcache(): + """Clear the cache entirely.""" + + global cache + cache = {} + + +def getlines(filename, module_globals=None): + """Get the lines for a file from the cache. + Update the cache if it doesn't contain an entry for this file already.""" + + if filename in cache: + return cache[filename][2] + + try: + return updatecache(filename, module_globals) + except MemoryError: + clearcache() + return [] + + +def checkcache(filename=None): + """Discard cache entries that are out of date. + (This is not checked upon each call!)""" + + if filename is None: + filenames = cache.keys() + else: + if filename in cache: + filenames = [filename] + else: + return + + for filename in filenames: + size, mtime, lines, fullname = cache[filename] + if mtime is None: + continue # no-op for files loaded via a __loader__ + try: + stat = os.stat(fullname) + except os.error: + del cache[filename] + continue + if size != stat.st_size or mtime != stat.st_mtime: + del cache[filename] + + +def updatecache(filename, module_globals=None): + """Update a cache entry and return its list of lines. + If something's wrong, print a message, discard the cache entry, + and return an empty list.""" + + if filename in cache: + del cache[filename] + if not filename or (filename.startswith('<') and filename.endswith('>')): + return [] + + fullname = filename + try: + stat = os.stat(fullname) + except OSError: + basename = filename + + # Try for a __loader__, if available + if module_globals and '__loader__' in module_globals: + name = module_globals.get('__name__') + loader = module_globals['__loader__'] + get_source = getattr(loader, 'get_source', None) + + if name and get_source: + try: + data = get_source(name) + except (ImportError, IOError): + pass + else: + if data is None: + # No luck, the PEP302 loader cannot find the source + # for this module. + return [] + cache[filename] = ( + len(data), None, + [line+'\n' for line in data.splitlines()], fullname + ) + return cache[filename][2] + + # Try looking through the module search path, which is only useful + # when handling a relative filename. + if os.path.isabs(filename): + return [] + + for dirname in sys.path: + # When using imputil, sys.path may contain things other than + # strings; ignore them when it happens. + try: + fullname = os.path.join(dirname, basename) + except (TypeError, AttributeError): + # Not sufficiently string-like to do anything useful with. + continue + try: + stat = os.stat(fullname) + break + except os.error: + pass + else: + return [] + try: + with open(fullname, 'rU') as fp: + lines = fp.readlines() + except IOError: + return [] + if lines and not lines[-1].endswith('\n'): + lines[-1] += '\n' + size, mtime = stat.st_size, stat.st_mtime + cache[filename] = size, mtime, lines, fullname + return lines
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/locale.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,2064 @@ +"""Locale support module. + +The module provides low-level access to the C lib's locale APIs and adds high +level number formatting APIs as well as a locale aliasing engine to complement +these. + +The aliasing engine includes support for many commonly used locale names and +maps them to values suitable for passing to the C lib's setlocale() function. It +also includes default encodings for all supported locale names. +""" + +import sys +import encodings +import encodings.aliases +import re +import operator +import functools + +# keep a copy of the builtin str type, because 'str' name is overridden +# in globals by a function below +_str = str + +try: + _unicode = unicode +except NameError: + # If Python is built without Unicode support, the unicode type + # will not exist. Fake one. + class _unicode(object): + pass + +# Try importing the _locale module. +# +# If this fails, fall back on a basic 'C' locale emulation. + +# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before +# trying the import. So __all__ is also fiddled at the end of the file. +__all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error", + "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm", + "str", "atof", "atoi", "format", "format_string", "currency", + "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY", + "LC_NUMERIC", "LC_ALL", "CHAR_MAX"] + +try: + + from _locale import * + +except ImportError: + + # Locale emulation + + CHAR_MAX = 127 + LC_ALL = 6 + LC_COLLATE = 3 + LC_CTYPE = 0 + LC_MESSAGES = 5 + LC_MONETARY = 4 + LC_NUMERIC = 1 + LC_TIME = 2 + Error = ValueError + + def localeconv(): + """ localeconv() -> dict. + Returns numeric and monetary locale-specific parameters. + """ + # 'C' locale default values + return {'grouping': [127], + 'currency_symbol': '', + 'n_sign_posn': 127, + 'p_cs_precedes': 127, + 'n_cs_precedes': 127, + 'mon_grouping': [], + 'n_sep_by_space': 127, + 'decimal_point': '.', + 'negative_sign': '', + 'positive_sign': '', + 'p_sep_by_space': 127, + 'int_curr_symbol': '', + 'p_sign_posn': 127, + 'thousands_sep': '', + 'mon_thousands_sep': '', + 'frac_digits': 127, + 'mon_decimal_point': '', + 'int_frac_digits': 127} + + def setlocale(category, value=None): + """ setlocale(integer,string=None) -> string. + Activates/queries locale processing. + """ + if value not in (None, '', 'C'): + raise Error, '_locale emulation only supports "C" locale' + return 'C' + + def strcoll(a,b): + """ strcoll(string,string) -> int. + Compares two strings according to the locale. + """ + return cmp(a,b) + + def strxfrm(s): + """ strxfrm(string) -> string. + Returns a string that behaves for cmp locale-aware. + """ + return s + + +_localeconv = localeconv + +# With this dict, you can override some items of localeconv's return value. +# This is useful for testing purposes. +_override_localeconv = {} + +@functools.wraps(_localeconv) +def localeconv(): + d = _localeconv() + if _override_localeconv: + d.update(_override_localeconv) + return d + + +### Number formatting APIs + +# Author: Martin von Loewis +# improved by Georg Brandl + +# Iterate over grouping intervals +def _grouping_intervals(grouping): + last_interval = None + for interval in grouping: + # if grouping is -1, we are done + if interval == CHAR_MAX: + return + # 0: re-use last group ad infinitum + if interval == 0: + if last_interval is None: + raise ValueError("invalid grouping") + while True: + yield last_interval + yield interval + last_interval = interval + +#perform the grouping from right to left +def _group(s, monetary=False): + conv = localeconv() + thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep'] + grouping = conv[monetary and 'mon_grouping' or 'grouping'] + if not grouping: + return (s, 0) + if s[-1] == ' ': + stripped = s.rstrip() + right_spaces = s[len(stripped):] + s = stripped + else: + right_spaces = '' + left_spaces = '' + groups = [] + for interval in _grouping_intervals(grouping): + if not s or s[-1] not in "0123456789": + # only non-digit characters remain (sign, spaces) + left_spaces = s + s = '' + break + groups.append(s[-interval:]) + s = s[:-interval] + if s: + groups.append(s) + groups.reverse() + return ( + left_spaces + thousands_sep.join(groups) + right_spaces, + len(thousands_sep) * (len(groups) - 1) + ) + +# Strip a given amount of excess padding from the given string +def _strip_padding(s, amount): + lpos = 0 + while amount and s[lpos] == ' ': + lpos += 1 + amount -= 1 + rpos = len(s) - 1 + while amount and s[rpos] == ' ': + rpos -= 1 + amount -= 1 + return s[lpos:rpos+1] + +_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?' + r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]') + +def format(percent, value, grouping=False, monetary=False, *additional): + """Returns the locale-aware substitution of a %? specifier + (percent). + + additional is for format strings which contain one or more + '*' modifiers.""" + # this is only for one-percent-specifier strings and this should be checked + match = _percent_re.match(percent) + if not match or len(match.group())!= len(percent): + raise ValueError(("format() must be given exactly one %%char " + "format specifier, %s not valid") % repr(percent)) + return _format(percent, value, grouping, monetary, *additional) + +def _format(percent, value, grouping=False, monetary=False, *additional): + if additional: + formatted = percent % ((value,) + additional) + else: + formatted = percent % value + # floats and decimal ints need special action! + if percent[-1] in 'eEfFgG': + seps = 0 + parts = formatted.split('.') + if grouping: + parts[0], seps = _group(parts[0], monetary=monetary) + decimal_point = localeconv()[monetary and 'mon_decimal_point' + or 'decimal_point'] + formatted = decimal_point.join(parts) + if seps: + formatted = _strip_padding(formatted, seps) + elif percent[-1] in 'diu': + seps = 0 + if grouping: + formatted, seps = _group(formatted, monetary=monetary) + if seps: + formatted = _strip_padding(formatted, seps) + return formatted + +def format_string(f, val, grouping=False): + """Formats a string in the same way that the % formatting would use, + but takes the current locale into account. + Grouping is applied if the third parameter is true.""" + percents = list(_percent_re.finditer(f)) + new_f = _percent_re.sub('%s', f) + + if operator.isMappingType(val): + new_val = [] + for perc in percents: + if perc.group()[-1]=='%': + new_val.append('%') + else: + new_val.append(format(perc.group(), val, grouping)) + else: + if not isinstance(val, tuple): + val = (val,) + new_val = [] + i = 0 + for perc in percents: + if perc.group()[-1]=='%': + new_val.append('%') + else: + starcount = perc.group('modifiers').count('*') + new_val.append(_format(perc.group(), + val[i], + grouping, + False, + *val[i+1:i+1+starcount])) + i += (1 + starcount) + val = tuple(new_val) + + return new_f % val + +def currency(val, symbol=True, grouping=False, international=False): + """Formats val according to the currency settings + in the current locale.""" + conv = localeconv() + + # check for illegal values + digits = conv[international and 'int_frac_digits' or 'frac_digits'] + if digits == 127: + raise ValueError("Currency formatting is not possible using " + "the 'C' locale.") + + s = format('%%.%if' % digits, abs(val), grouping, monetary=True) + # '<' and '>' are markers if the sign must be inserted between symbol and value + s = '<' + s + '>' + + if symbol: + smb = conv[international and 'int_curr_symbol' or 'currency_symbol'] + precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes'] + separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space'] + + if precedes: + s = smb + (separated and ' ' or '') + s + else: + s = s + (separated and ' ' or '') + smb + + sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn'] + sign = conv[val<0 and 'negative_sign' or 'positive_sign'] + + if sign_pos == 0: + s = '(' + s + ')' + elif sign_pos == 1: + s = sign + s + elif sign_pos == 2: + s = s + sign + elif sign_pos == 3: + s = s.replace('<', sign) + elif sign_pos == 4: + s = s.replace('>', sign) + else: + # the default if nothing specified; + # this should be the most fitting sign position + s = sign + s + + return s.replace('<', '').replace('>', '') + +def str(val): + """Convert float to string, taking the locale into account.""" + return format("%.12g", val) + +def atof(string, func=float): + "Parses a string as a float according to the locale settings." + #First, get rid of the grouping + ts = localeconv()['thousands_sep'] + if ts: + string = string.replace(ts, '') + #next, replace the decimal point with a dot + dd = localeconv()['decimal_point'] + if dd: + string = string.replace(dd, '.') + #finally, parse the string + return func(string) + +def atoi(str): + "Converts a string to an integer according to the locale settings." + return atof(str, int) + +def _test(): + setlocale(LC_ALL, "") + #do grouping + s1 = format("%d", 123456789,1) + print s1, "is", atoi(s1) + #standard formatting + s1 = str(3.14) + print s1, "is", atof(s1) + +### Locale name aliasing engine + +# Author: Marc-Andre Lemburg, mal@lemburg.com +# Various tweaks by Fredrik Lundh <fredrik@pythonware.com> + +# store away the low-level version of setlocale (it's +# overridden below) +_setlocale = setlocale + +# Avoid relying on the locale-dependent .lower() method +# (see issue #1813). +_ascii_lower_map = ''.join( + chr(x + 32 if x >= ord('A') and x <= ord('Z') else x) + for x in range(256) +) + +def _replace_encoding(code, encoding): + if '.' in code: + langname = code[:code.index('.')] + else: + langname = code + # Convert the encoding to a C lib compatible encoding string + norm_encoding = encodings.normalize_encoding(encoding) + #print('norm encoding: %r' % norm_encoding) + norm_encoding = encodings.aliases.aliases.get(norm_encoding, + norm_encoding) + #print('aliased encoding: %r' % norm_encoding) + encoding = locale_encoding_alias.get(norm_encoding, + norm_encoding) + #print('found encoding %r' % encoding) + return langname + '.' + encoding + +def normalize(localename): + + """ Returns a normalized locale code for the given locale + name. + + The returned locale code is formatted for use with + setlocale(). + + If normalization fails, the original name is returned + unchanged. + + If the given encoding is not known, the function defaults to + the default encoding for the locale code just like setlocale() + does. + + """ + # Normalize the locale name and extract the encoding and modifier + if isinstance(localename, _unicode): + localename = localename.encode('ascii') + code = localename.translate(_ascii_lower_map) + if ':' in code: + # ':' is sometimes used as encoding delimiter. + code = code.replace(':', '.') + if '@' in code: + code, modifier = code.split('@', 1) + else: + modifier = '' + if '.' in code: + langname, encoding = code.split('.')[:2] + else: + langname = code + encoding = '' + + # First lookup: fullname (possibly with encoding and modifier) + lang_enc = langname + if encoding: + norm_encoding = encoding.replace('-', '') + norm_encoding = norm_encoding.replace('_', '') + lang_enc += '.' + norm_encoding + lookup_name = lang_enc + if modifier: + lookup_name += '@' + modifier + code = locale_alias.get(lookup_name, None) + if code is not None: + return code + #print('first lookup failed') + + if modifier: + # Second try: fullname without modifier (possibly with encoding) + code = locale_alias.get(lang_enc, None) + if code is not None: + #print('lookup without modifier succeeded') + if '@' not in code: + return code + '@' + modifier + if code.split('@', 1)[1].translate(_ascii_lower_map) == modifier: + return code + #print('second lookup failed') + + if encoding: + # Third try: langname (without encoding, possibly with modifier) + lookup_name = langname + if modifier: + lookup_name += '@' + modifier + code = locale_alias.get(lookup_name, None) + if code is not None: + #print('lookup without encoding succeeded') + if '@' not in code: + return _replace_encoding(code, encoding) + code, modifier = code.split('@', 1) + return _replace_encoding(code, encoding) + '@' + modifier + + if modifier: + # Fourth try: langname (without encoding and modifier) + code = locale_alias.get(langname, None) + if code is not None: + #print('lookup without modifier and encoding succeeded') + if '@' not in code: + return _replace_encoding(code, encoding) + '@' + modifier + code, defmod = code.split('@', 1) + if defmod.translate(_ascii_lower_map) == modifier: + return _replace_encoding(code, encoding) + '@' + defmod + + return localename + +def _parse_localename(localename): + + """ Parses the locale code for localename and returns the + result as tuple (language code, encoding). + + The localename is normalized and passed through the locale + alias engine. A ValueError is raised in case the locale name + cannot be parsed. + + The language code corresponds to RFC 1766. code and encoding + can be None in case the values cannot be determined or are + unknown to this implementation. + + """ + code = normalize(localename) + if '@' in code: + # Deal with locale modifiers + code, modifier = code.split('@', 1) + if modifier == 'euro' and '.' not in code: + # Assume Latin-9 for @euro locales. This is bogus, + # since some systems may use other encodings for these + # locales. Also, we ignore other modifiers. + return code, 'iso-8859-15' + + if '.' in code: + return tuple(code.split('.')[:2]) + elif code == 'C': + return None, None + raise ValueError, 'unknown locale: %s' % localename + +def _build_localename(localetuple): + + """ Builds a locale code from the given tuple (language code, + encoding). + + No aliasing or normalizing takes place. + + """ + language, encoding = localetuple + if language is None: + language = 'C' + if encoding is None: + return language + else: + return language + '.' + encoding + +def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')): + + """ Tries to determine the default locale settings and returns + them as tuple (language code, encoding). + + According to POSIX, a program which has not called + setlocale(LC_ALL, "") runs using the portable 'C' locale. + Calling setlocale(LC_ALL, "") lets it use the default locale as + defined by the LANG variable. Since we don't want to interfere + with the current locale setting we thus emulate the behavior + in the way described above. + + To maintain compatibility with other platforms, not only the + LANG variable is tested, but a list of variables given as + envvars parameter. The first found to be defined will be + used. envvars defaults to the search path used in GNU gettext; + it must always contain the variable name 'LANG'. + + Except for the code 'C', the language code corresponds to RFC + 1766. code and encoding can be None in case the values cannot + be determined. + + """ + + try: + # check if it's supported by the _locale module + import _locale + code, encoding = _locale._getdefaultlocale() + except (ImportError, AttributeError): + pass + else: + # make sure the code/encoding values are valid + if sys.platform == "win32" and code and code[:2] == "0x": + # map windows language identifier to language name + code = windows_locale.get(int(code, 0)) + # ...add other platform-specific processing here, if + # necessary... + return code, encoding + + # fall back on POSIX behaviour + import os + lookup = os.environ.get + for variable in envvars: + localename = lookup(variable,None) + if localename: + if variable == 'LANGUAGE': + localename = localename.split(':')[0] + break + else: + localename = 'C' + return _parse_localename(localename) + + +def getlocale(category=LC_CTYPE): + + """ Returns the current setting for the given locale category as + tuple (language code, encoding). + + category may be one of the LC_* value except LC_ALL. It + defaults to LC_CTYPE. + + Except for the code 'C', the language code corresponds to RFC + 1766. code and encoding can be None in case the values cannot + be determined. + + """ + localename = _setlocale(category) + if category == LC_ALL and ';' in localename: + raise TypeError, 'category LC_ALL is not supported' + return _parse_localename(localename) + +def setlocale(category, locale=None): + + """ Set the locale for the given category. The locale can be + a string, an iterable of two strings (language code and encoding), + or None. + + Iterables are converted to strings using the locale aliasing + engine. Locale strings are passed directly to the C lib. + + category may be given as one of the LC_* values. + + """ + if locale and not isinstance(locale, (_str, _unicode)): + # convert to string + locale = normalize(_build_localename(locale)) + return _setlocale(category, locale) + +def resetlocale(category=LC_ALL): + + """ Sets the locale for category to the default setting. + + The default setting is determined by calling + getdefaultlocale(). category defaults to LC_ALL. + + """ + _setlocale(category, _build_localename(getdefaultlocale())) + +if sys.platform.startswith("win"): + # On Win32, this will return the ANSI code page + def getpreferredencoding(do_setlocale = True): + """Return the charset that the user is likely using.""" + import _locale + return _locale._getdefaultlocale()[1] +else: + # On Unix, if CODESET is available, use that. + try: + CODESET + except NameError: + # Fall back to parsing environment variables :-( + def getpreferredencoding(do_setlocale = True): + """Return the charset that the user is likely using, + by looking at environment variables.""" + return getdefaultlocale()[1] + else: + def getpreferredencoding(do_setlocale = True): + """Return the charset that the user is likely using, + according to the system configuration.""" + if do_setlocale: + oldloc = setlocale(LC_CTYPE) + try: + setlocale(LC_CTYPE, "") + except Error: + pass + result = nl_langinfo(CODESET) + setlocale(LC_CTYPE, oldloc) + return result + else: + return nl_langinfo(CODESET) + + +### Database +# +# The following data was extracted from the locale.alias file which +# comes with X11 and then hand edited removing the explicit encoding +# definitions and adding some more aliases. The file is usually +# available as /usr/lib/X11/locale/locale.alias. +# + +# +# The local_encoding_alias table maps lowercase encoding alias names +# to C locale encoding names (case-sensitive). Note that normalize() +# first looks up the encoding in the encodings.aliases dictionary and +# then applies this mapping to find the correct C lib name for the +# encoding. +# +locale_encoding_alias = { + + # Mappings for non-standard encoding names used in locale names + '437': 'C', + 'c': 'C', + 'en': 'ISO8859-1', + 'jis': 'JIS7', + 'jis7': 'JIS7', + 'ajec': 'eucJP', + + # Mappings from Python codec names to C lib encoding names + 'ascii': 'ISO8859-1', + 'latin_1': 'ISO8859-1', + 'iso8859_1': 'ISO8859-1', + 'iso8859_10': 'ISO8859-10', + 'iso8859_11': 'ISO8859-11', + 'iso8859_13': 'ISO8859-13', + 'iso8859_14': 'ISO8859-14', + 'iso8859_15': 'ISO8859-15', + 'iso8859_16': 'ISO8859-16', + 'iso8859_2': 'ISO8859-2', + 'iso8859_3': 'ISO8859-3', + 'iso8859_4': 'ISO8859-4', + 'iso8859_5': 'ISO8859-5', + 'iso8859_6': 'ISO8859-6', + 'iso8859_7': 'ISO8859-7', + 'iso8859_8': 'ISO8859-8', + 'iso8859_9': 'ISO8859-9', + 'iso2022_jp': 'JIS7', + 'shift_jis': 'SJIS', + 'tactis': 'TACTIS', + 'euc_jp': 'eucJP', + 'euc_kr': 'eucKR', + 'utf_8': 'UTF-8', + 'koi8_r': 'KOI8-R', + 'koi8_u': 'KOI8-U', + # XXX This list is still incomplete. If you know more + # mappings, please file a bug report. Thanks. +} + +# +# The locale_alias table maps lowercase alias names to C locale names +# (case-sensitive). Encodings are always separated from the locale +# name using a dot ('.'); they should only be given in case the +# language name is needed to interpret the given encoding alias +# correctly (CJK codes often have this need). +# +# Note that the normalize() function which uses this tables +# removes '_' and '-' characters from the encoding part of the +# locale name before doing the lookup. This saves a lot of +# space in the table. +# +# MAL 2004-12-10: +# Updated alias mapping to most recent locale.alias file +# from X.org distribution using makelocalealias.py. +# +# These are the differences compared to the old mapping (Python 2.4 +# and older): +# +# updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' +# updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' +# updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' +# updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2' +# updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2' +# updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2' +# updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1' +# updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15' +# updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15' +# updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15' +# updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15' +# updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' +# updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' +# updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP' +# updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13' +# updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13' +# updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2' +# updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2' +# updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11' +# updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312' +# updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5' +# updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5' +# +# MAL 2008-05-30: +# Updated alias mapping to most recent locale.alias file +# from X.org distribution using makelocalealias.py. +# +# These are the differences compared to the old mapping (Python 2.5 +# and older): +# +# updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2' +# updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2' +# updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251' +# updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2' +# updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251' +# updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8' +# updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' +# +# AP 2010-04-12: +# Updated alias mapping to most recent locale.alias file +# from X.org distribution using makelocalealias.py. +# +# These are the differences compared to the old mapping (Python 2.6.5 +# and older): +# +# updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8' +# updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8' +# updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' +# updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' +# updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' +# updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' +# updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' +# updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' +# updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin' +# updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' +# updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin' +# updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8' +# updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' +# +# SS 2013-12-20: +# Updated alias mapping to most recent locale.alias file +# from X.org distribution using makelocalealias.py. +# +# These are the differences compared to the old mapping (Python 2.7.6 +# and older): +# +# updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' +# updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' +# updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' +# updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2' +# updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' +# updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' +# updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8' +# updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' +# updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8' +# updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' +# updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' +# +# SS 2014-10-01: +# Updated alias mapping with glibc 2.19 supported locales. + +locale_alias = { + 'a3': 'az_AZ.KOI8-C', + 'a3_az': 'az_AZ.KOI8-C', + 'a3_az.koi8c': 'az_AZ.KOI8-C', + 'a3_az.koic': 'az_AZ.KOI8-C', + 'aa_dj': 'aa_DJ.ISO8859-1', + 'aa_er': 'aa_ER.UTF-8', + 'aa_et': 'aa_ET.UTF-8', + 'af': 'af_ZA.ISO8859-1', + 'af_za': 'af_ZA.ISO8859-1', + 'af_za.iso88591': 'af_ZA.ISO8859-1', + 'am': 'am_ET.UTF-8', + 'am_et': 'am_ET.UTF-8', + 'american': 'en_US.ISO8859-1', + 'american.iso88591': 'en_US.ISO8859-1', + 'an_es': 'an_ES.ISO8859-15', + 'ar': 'ar_AA.ISO8859-6', + 'ar_aa': 'ar_AA.ISO8859-6', + 'ar_aa.iso88596': 'ar_AA.ISO8859-6', + 'ar_ae': 'ar_AE.ISO8859-6', + 'ar_ae.iso88596': 'ar_AE.ISO8859-6', + 'ar_bh': 'ar_BH.ISO8859-6', + 'ar_bh.iso88596': 'ar_BH.ISO8859-6', + 'ar_dz': 'ar_DZ.ISO8859-6', + 'ar_dz.iso88596': 'ar_DZ.ISO8859-6', + 'ar_eg': 'ar_EG.ISO8859-6', + 'ar_eg.iso88596': 'ar_EG.ISO8859-6', + 'ar_in': 'ar_IN.UTF-8', + 'ar_iq': 'ar_IQ.ISO8859-6', + 'ar_iq.iso88596': 'ar_IQ.ISO8859-6', + 'ar_jo': 'ar_JO.ISO8859-6', + 'ar_jo.iso88596': 'ar_JO.ISO8859-6', + 'ar_kw': 'ar_KW.ISO8859-6', + 'ar_kw.iso88596': 'ar_KW.ISO8859-6', + 'ar_lb': 'ar_LB.ISO8859-6', + 'ar_lb.iso88596': 'ar_LB.ISO8859-6', + 'ar_ly': 'ar_LY.ISO8859-6', + 'ar_ly.iso88596': 'ar_LY.ISO8859-6', + 'ar_ma': 'ar_MA.ISO8859-6', + 'ar_ma.iso88596': 'ar_MA.ISO8859-6', + 'ar_om': 'ar_OM.ISO8859-6', + 'ar_om.iso88596': 'ar_OM.ISO8859-6', + 'ar_qa': 'ar_QA.ISO8859-6', + 'ar_qa.iso88596': 'ar_QA.ISO8859-6', + 'ar_sa': 'ar_SA.ISO8859-6', + 'ar_sa.iso88596': 'ar_SA.ISO8859-6', + 'ar_sd': 'ar_SD.ISO8859-6', + 'ar_sd.iso88596': 'ar_SD.ISO8859-6', + 'ar_sy': 'ar_SY.ISO8859-6', + 'ar_sy.iso88596': 'ar_SY.ISO8859-6', + 'ar_tn': 'ar_TN.ISO8859-6', + 'ar_tn.iso88596': 'ar_TN.ISO8859-6', + 'ar_ye': 'ar_YE.ISO8859-6', + 'ar_ye.iso88596': 'ar_YE.ISO8859-6', + 'arabic': 'ar_AA.ISO8859-6', + 'arabic.iso88596': 'ar_AA.ISO8859-6', + 'as': 'as_IN.UTF-8', + 'as_in': 'as_IN.UTF-8', + 'ast_es': 'ast_ES.ISO8859-15', + 'ayc_pe': 'ayc_PE.UTF-8', + 'az': 'az_AZ.ISO8859-9E', + 'az_az': 'az_AZ.ISO8859-9E', + 'az_az.iso88599e': 'az_AZ.ISO8859-9E', + 'be': 'be_BY.CP1251', + 'be@latin': 'be_BY.UTF-8@latin', + 'be_bg.utf8': 'bg_BG.UTF-8', + 'be_by': 'be_BY.CP1251', + 'be_by.cp1251': 'be_BY.CP1251', + 'be_by.microsoftcp1251': 'be_BY.CP1251', + 'be_by.utf8@latin': 'be_BY.UTF-8@latin', + 'be_by@latin': 'be_BY.UTF-8@latin', + 'bem_zm': 'bem_ZM.UTF-8', + 'ber_dz': 'ber_DZ.UTF-8', + 'ber_ma': 'ber_MA.UTF-8', + 'bg': 'bg_BG.CP1251', + 'bg_bg': 'bg_BG.CP1251', + 'bg_bg.cp1251': 'bg_BG.CP1251', + 'bg_bg.iso88595': 'bg_BG.ISO8859-5', + 'bg_bg.koi8r': 'bg_BG.KOI8-R', + 'bg_bg.microsoftcp1251': 'bg_BG.CP1251', + 'bho_in': 'bho_IN.UTF-8', + 'bn_bd': 'bn_BD.UTF-8', + 'bn_in': 'bn_IN.UTF-8', + 'bo_cn': 'bo_CN.UTF-8', + 'bo_in': 'bo_IN.UTF-8', + 'bokmal': 'nb_NO.ISO8859-1', + 'bokm\xe5l': 'nb_NO.ISO8859-1', + 'br': 'br_FR.ISO8859-1', + 'br_fr': 'br_FR.ISO8859-1', + 'br_fr.iso88591': 'br_FR.ISO8859-1', + 'br_fr.iso885914': 'br_FR.ISO8859-14', + 'br_fr.iso885915': 'br_FR.ISO8859-15', + 'br_fr.iso885915@euro': 'br_FR.ISO8859-15', + 'br_fr.utf8@euro': 'br_FR.UTF-8', + 'br_fr@euro': 'br_FR.ISO8859-15', + 'brx_in': 'brx_IN.UTF-8', + 'bs': 'bs_BA.ISO8859-2', + 'bs_ba': 'bs_BA.ISO8859-2', + 'bs_ba.iso88592': 'bs_BA.ISO8859-2', + 'bulgarian': 'bg_BG.CP1251', + 'byn_er': 'byn_ER.UTF-8', + 'c': 'C', + 'c-french': 'fr_CA.ISO8859-1', + 'c-french.iso88591': 'fr_CA.ISO8859-1', + 'c.ascii': 'C', + 'c.en': 'C', + 'c.iso88591': 'en_US.ISO8859-1', + 'c.utf8': 'en_US.UTF-8', + 'c_c': 'C', + 'c_c.c': 'C', + 'ca': 'ca_ES.ISO8859-1', + 'ca_ad': 'ca_AD.ISO8859-1', + 'ca_ad.iso88591': 'ca_AD.ISO8859-1', + 'ca_ad.iso885915': 'ca_AD.ISO8859-15', + 'ca_ad.iso885915@euro': 'ca_AD.ISO8859-15', + 'ca_ad.utf8@euro': 'ca_AD.UTF-8', + 'ca_ad@euro': 'ca_AD.ISO8859-15', + 'ca_es': 'ca_ES.ISO8859-1', + 'ca_es.iso88591': 'ca_ES.ISO8859-1', + 'ca_es.iso885915': 'ca_ES.ISO8859-15', + 'ca_es.iso885915@euro': 'ca_ES.ISO8859-15', + 'ca_es.utf8@euro': 'ca_ES.UTF-8', + 'ca_es@valencia': 'ca_ES.ISO8859-15@valencia', + 'ca_es@euro': 'ca_ES.ISO8859-15', + 'ca_fr': 'ca_FR.ISO8859-1', + 'ca_fr.iso88591': 'ca_FR.ISO8859-1', + 'ca_fr.iso885915': 'ca_FR.ISO8859-15', + 'ca_fr.iso885915@euro': 'ca_FR.ISO8859-15', + 'ca_fr.utf8@euro': 'ca_FR.UTF-8', + 'ca_fr@euro': 'ca_FR.ISO8859-15', + 'ca_it': 'ca_IT.ISO8859-1', + 'ca_it.iso88591': 'ca_IT.ISO8859-1', + 'ca_it.iso885915': 'ca_IT.ISO8859-15', + 'ca_it.iso885915@euro': 'ca_IT.ISO8859-15', + 'ca_it.utf8@euro': 'ca_IT.UTF-8', + 'ca_it@euro': 'ca_IT.ISO8859-15', + 'catalan': 'ca_ES.ISO8859-1', + 'cextend': 'en_US.ISO8859-1', + 'cextend.en': 'en_US.ISO8859-1', + 'chinese-s': 'zh_CN.eucCN', + 'chinese-t': 'zh_TW.eucTW', + 'crh_ua': 'crh_UA.UTF-8', + 'croatian': 'hr_HR.ISO8859-2', + 'cs': 'cs_CZ.ISO8859-2', + 'cs_cs': 'cs_CZ.ISO8859-2', + 'cs_cs.iso88592': 'cs_CZ.ISO8859-2', + 'cs_cz': 'cs_CZ.ISO8859-2', + 'cs_cz.iso88592': 'cs_CZ.ISO8859-2', + 'csb_pl': 'csb_PL.UTF-8', + 'cv_ru': 'cv_RU.UTF-8', + 'cy': 'cy_GB.ISO8859-1', + 'cy_gb': 'cy_GB.ISO8859-1', + 'cy_gb.iso88591': 'cy_GB.ISO8859-1', + 'cy_gb.iso885914': 'cy_GB.ISO8859-14', + 'cy_gb.iso885915': 'cy_GB.ISO8859-15', + 'cy_gb@euro': 'cy_GB.ISO8859-15', + 'cz': 'cs_CZ.ISO8859-2', + 'cz_cz': 'cs_CZ.ISO8859-2', + 'czech': 'cs_CZ.ISO8859-2', + 'da': 'da_DK.ISO8859-1', + 'da.iso885915': 'da_DK.ISO8859-15', + 'da_dk': 'da_DK.ISO8859-1', + 'da_dk.88591': 'da_DK.ISO8859-1', + 'da_dk.885915': 'da_DK.ISO8859-15', + 'da_dk.iso88591': 'da_DK.ISO8859-1', + 'da_dk.iso885915': 'da_DK.ISO8859-15', + 'da_dk@euro': 'da_DK.ISO8859-15', + 'danish': 'da_DK.ISO8859-1', + 'danish.iso88591': 'da_DK.ISO8859-1', + 'dansk': 'da_DK.ISO8859-1', + 'de': 'de_DE.ISO8859-1', + 'de.iso885915': 'de_DE.ISO8859-15', + 'de_at': 'de_AT.ISO8859-1', + 'de_at.iso88591': 'de_AT.ISO8859-1', + 'de_at.iso885915': 'de_AT.ISO8859-15', + 'de_at.iso885915@euro': 'de_AT.ISO8859-15', + 'de_at.utf8@euro': 'de_AT.UTF-8', + 'de_at@euro': 'de_AT.ISO8859-15', + 'de_be': 'de_BE.ISO8859-1', + 'de_be.iso88591': 'de_BE.ISO8859-1', + 'de_be.iso885915': 'de_BE.ISO8859-15', + 'de_be.iso885915@euro': 'de_BE.ISO8859-15', + 'de_be.utf8@euro': 'de_BE.UTF-8', + 'de_be@euro': 'de_BE.ISO8859-15', + 'de_ch': 'de_CH.ISO8859-1', + 'de_ch.iso88591': 'de_CH.ISO8859-1', + 'de_ch.iso885915': 'de_CH.ISO8859-15', + 'de_ch@euro': 'de_CH.ISO8859-15', + 'de_de': 'de_DE.ISO8859-1', + 'de_de.88591': 'de_DE.ISO8859-1', + 'de_de.885915': 'de_DE.ISO8859-15', + 'de_de.885915@euro': 'de_DE.ISO8859-15', + 'de_de.iso88591': 'de_DE.ISO8859-1', + 'de_de.iso885915': 'de_DE.ISO8859-15', + 'de_de.iso885915@euro': 'de_DE.ISO8859-15', + 'de_de.utf8@euro': 'de_DE.UTF-8', + 'de_de@euro': 'de_DE.ISO8859-15', + 'de_li.utf8': 'de_LI.UTF-8', + 'de_lu': 'de_LU.ISO8859-1', + 'de_lu.iso88591': 'de_LU.ISO8859-1', + 'de_lu.iso885915': 'de_LU.ISO8859-15', + 'de_lu.iso885915@euro': 'de_LU.ISO8859-15', + 'de_lu.utf8@euro': 'de_LU.UTF-8', + 'de_lu@euro': 'de_LU.ISO8859-15', + 'deutsch': 'de_DE.ISO8859-1', + 'doi_in': 'doi_IN.UTF-8', + 'dutch': 'nl_NL.ISO8859-1', + 'dutch.iso88591': 'nl_BE.ISO8859-1', + 'dv_mv': 'dv_MV.UTF-8', + 'dz_bt': 'dz_BT.UTF-8', + 'ee': 'ee_EE.ISO8859-4', + 'ee_ee': 'ee_EE.ISO8859-4', + 'ee_ee.iso88594': 'ee_EE.ISO8859-4', + 'eesti': 'et_EE.ISO8859-1', + 'el': 'el_GR.ISO8859-7', + 'el_cy': 'el_CY.ISO8859-7', + 'el_gr': 'el_GR.ISO8859-7', + 'el_gr.iso88597': 'el_GR.ISO8859-7', + 'el_gr@euro': 'el_GR.ISO8859-15', + 'en': 'en_US.ISO8859-1', + 'en.iso88591': 'en_US.ISO8859-1', + 'en_ag': 'en_AG.UTF-8', + 'en_au': 'en_AU.ISO8859-1', + 'en_au.iso88591': 'en_AU.ISO8859-1', + 'en_be': 'en_BE.ISO8859-1', + 'en_be@euro': 'en_BE.ISO8859-15', + 'en_bw': 'en_BW.ISO8859-1', + 'en_bw.iso88591': 'en_BW.ISO8859-1', + 'en_ca': 'en_CA.ISO8859-1', + 'en_ca.iso88591': 'en_CA.ISO8859-1', + 'en_dk': 'en_DK.ISO8859-1', + 'en_dl.utf8': 'en_DL.UTF-8', + 'en_gb': 'en_GB.ISO8859-1', + 'en_gb.88591': 'en_GB.ISO8859-1', + 'en_gb.iso88591': 'en_GB.ISO8859-1', + 'en_gb.iso885915': 'en_GB.ISO8859-15', + 'en_gb@euro': 'en_GB.ISO8859-15', + 'en_hk': 'en_HK.ISO8859-1', + 'en_hk.iso88591': 'en_HK.ISO8859-1', + 'en_ie': 'en_IE.ISO8859-1', + 'en_ie.iso88591': 'en_IE.ISO8859-1', + 'en_ie.iso885915': 'en_IE.ISO8859-15', + 'en_ie.iso885915@euro': 'en_IE.ISO8859-15', + 'en_ie.utf8@euro': 'en_IE.UTF-8', + 'en_ie@euro': 'en_IE.ISO8859-15', + 'en_in': 'en_IN.ISO8859-1', + 'en_ng': 'en_NG.UTF-8', + 'en_nz': 'en_NZ.ISO8859-1', + 'en_nz.iso88591': 'en_NZ.ISO8859-1', + 'en_ph': 'en_PH.ISO8859-1', + 'en_ph.iso88591': 'en_PH.ISO8859-1', + 'en_sg': 'en_SG.ISO8859-1', + 'en_sg.iso88591': 'en_SG.ISO8859-1', + 'en_uk': 'en_GB.ISO8859-1', + 'en_us': 'en_US.ISO8859-1', + 'en_us.88591': 'en_US.ISO8859-1', + 'en_us.885915': 'en_US.ISO8859-15', + 'en_us.iso88591': 'en_US.ISO8859-1', + 'en_us.iso885915': 'en_US.ISO8859-15', + 'en_us.iso885915@euro': 'en_US.ISO8859-15', + 'en_us@euro': 'en_US.ISO8859-15', + 'en_us@euro@euro': 'en_US.ISO8859-15', + 'en_za': 'en_ZA.ISO8859-1', + 'en_za.88591': 'en_ZA.ISO8859-1', + 'en_za.iso88591': 'en_ZA.ISO8859-1', + 'en_za.iso885915': 'en_ZA.ISO8859-15', + 'en_za@euro': 'en_ZA.ISO8859-15', + 'en_zm': 'en_ZM.UTF-8', + 'en_zw': 'en_ZW.ISO8859-1', + 'en_zw.iso88591': 'en_ZW.ISO8859-1', + 'en_zw.utf8': 'en_ZS.UTF-8', + 'eng_gb': 'en_GB.ISO8859-1', + 'eng_gb.8859': 'en_GB.ISO8859-1', + 'english': 'en_EN.ISO8859-1', + 'english.iso88591': 'en_EN.ISO8859-1', + 'english_uk': 'en_GB.ISO8859-1', + 'english_uk.8859': 'en_GB.ISO8859-1', + 'english_united-states': 'en_US.ISO8859-1', + 'english_united-states.437': 'C', + 'english_us': 'en_US.ISO8859-1', + 'english_us.8859': 'en_US.ISO8859-1', + 'english_us.ascii': 'en_US.ISO8859-1', + 'eo': 'eo_XX.ISO8859-3', + 'eo.utf8': 'eo.UTF-8', + 'eo_eo': 'eo_EO.ISO8859-3', + 'eo_eo.iso88593': 'eo_EO.ISO8859-3', + 'eo_us.utf8': 'eo_US.UTF-8', + 'eo_xx': 'eo_XX.ISO8859-3', + 'eo_xx.iso88593': 'eo_XX.ISO8859-3', + 'es': 'es_ES.ISO8859-1', + 'es_ar': 'es_AR.ISO8859-1', + 'es_ar.iso88591': 'es_AR.ISO8859-1', + 'es_bo': 'es_BO.ISO8859-1', + 'es_bo.iso88591': 'es_BO.ISO8859-1', + 'es_cl': 'es_CL.ISO8859-1', + 'es_cl.iso88591': 'es_CL.ISO8859-1', + 'es_co': 'es_CO.ISO8859-1', + 'es_co.iso88591': 'es_CO.ISO8859-1', + 'es_cr': 'es_CR.ISO8859-1', + 'es_cr.iso88591': 'es_CR.ISO8859-1', + 'es_cu': 'es_CU.UTF-8', + 'es_do': 'es_DO.ISO8859-1', + 'es_do.iso88591': 'es_DO.ISO8859-1', + 'es_ec': 'es_EC.ISO8859-1', + 'es_ec.iso88591': 'es_EC.ISO8859-1', + 'es_es': 'es_ES.ISO8859-1', + 'es_es.88591': 'es_ES.ISO8859-1', + 'es_es.iso88591': 'es_ES.ISO8859-1', + 'es_es.iso885915': 'es_ES.ISO8859-15', + 'es_es.iso885915@euro': 'es_ES.ISO8859-15', + 'es_es.utf8@euro': 'es_ES.UTF-8', + 'es_es@euro': 'es_ES.ISO8859-15', + 'es_gt': 'es_GT.ISO8859-1', + 'es_gt.iso88591': 'es_GT.ISO8859-1', + 'es_hn': 'es_HN.ISO8859-1', + 'es_hn.iso88591': 'es_HN.ISO8859-1', + 'es_mx': 'es_MX.ISO8859-1', + 'es_mx.iso88591': 'es_MX.ISO8859-1', + 'es_ni': 'es_NI.ISO8859-1', + 'es_ni.iso88591': 'es_NI.ISO8859-1', + 'es_pa': 'es_PA.ISO8859-1', + 'es_pa.iso88591': 'es_PA.ISO8859-1', + 'es_pa.iso885915': 'es_PA.ISO8859-15', + 'es_pa@euro': 'es_PA.ISO8859-15', + 'es_pe': 'es_PE.ISO8859-1', + 'es_pe.iso88591': 'es_PE.ISO8859-1', + 'es_pe.iso885915': 'es_PE.ISO8859-15', + 'es_pe@euro': 'es_PE.ISO8859-15', + 'es_pr': 'es_PR.ISO8859-1', + 'es_pr.iso88591': 'es_PR.ISO8859-1', + 'es_py': 'es_PY.ISO8859-1', + 'es_py.iso88591': 'es_PY.ISO8859-1', + 'es_py.iso885915': 'es_PY.ISO8859-15', + 'es_py@euro': 'es_PY.ISO8859-15', + 'es_sv': 'es_SV.ISO8859-1', + 'es_sv.iso88591': 'es_SV.ISO8859-1', + 'es_sv.iso885915': 'es_SV.ISO8859-15', + 'es_sv@euro': 'es_SV.ISO8859-15', + 'es_us': 'es_US.ISO8859-1', + 'es_us.iso88591': 'es_US.ISO8859-1', + 'es_uy': 'es_UY.ISO8859-1', + 'es_uy.iso88591': 'es_UY.ISO8859-1', + 'es_uy.iso885915': 'es_UY.ISO8859-15', + 'es_uy@euro': 'es_UY.ISO8859-15', + 'es_ve': 'es_VE.ISO8859-1', + 'es_ve.iso88591': 'es_VE.ISO8859-1', + 'es_ve.iso885915': 'es_VE.ISO8859-15', + 'es_ve@euro': 'es_VE.ISO8859-15', + 'estonian': 'et_EE.ISO8859-1', + 'et': 'et_EE.ISO8859-15', + 'et_ee': 'et_EE.ISO8859-15', + 'et_ee.iso88591': 'et_EE.ISO8859-1', + 'et_ee.iso885913': 'et_EE.ISO8859-13', + 'et_ee.iso885915': 'et_EE.ISO8859-15', + 'et_ee.iso88594': 'et_EE.ISO8859-4', + 'et_ee@euro': 'et_EE.ISO8859-15', + 'eu': 'eu_ES.ISO8859-1', + 'eu_es': 'eu_ES.ISO8859-1', + 'eu_es.iso88591': 'eu_ES.ISO8859-1', + 'eu_es.iso885915': 'eu_ES.ISO8859-15', + 'eu_es.iso885915@euro': 'eu_ES.ISO8859-15', + 'eu_es.utf8@euro': 'eu_ES.UTF-8', + 'eu_es@euro': 'eu_ES.ISO8859-15', + 'eu_fr': 'eu_FR.ISO8859-1', + 'fa': 'fa_IR.UTF-8', + 'fa_ir': 'fa_IR.UTF-8', + 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342', + 'ff_sn': 'ff_SN.UTF-8', + 'fi': 'fi_FI.ISO8859-15', + 'fi.iso885915': 'fi_FI.ISO8859-15', + 'fi_fi': 'fi_FI.ISO8859-15', + 'fi_fi.88591': 'fi_FI.ISO8859-1', + 'fi_fi.iso88591': 'fi_FI.ISO8859-1', + 'fi_fi.iso885915': 'fi_FI.ISO8859-15', + 'fi_fi.iso885915@euro': 'fi_FI.ISO8859-15', + 'fi_fi.utf8@euro': 'fi_FI.UTF-8', + 'fi_fi@euro': 'fi_FI.ISO8859-15', + 'fil_ph': 'fil_PH.UTF-8', + 'finnish': 'fi_FI.ISO8859-1', + 'finnish.iso88591': 'fi_FI.ISO8859-1', + 'fo': 'fo_FO.ISO8859-1', + 'fo_fo': 'fo_FO.ISO8859-1', + 'fo_fo.iso88591': 'fo_FO.ISO8859-1', + 'fo_fo.iso885915': 'fo_FO.ISO8859-15', + 'fo_fo@euro': 'fo_FO.ISO8859-15', + 'fr': 'fr_FR.ISO8859-1', + 'fr.iso885915': 'fr_FR.ISO8859-15', + 'fr_be': 'fr_BE.ISO8859-1', + 'fr_be.88591': 'fr_BE.ISO8859-1', + 'fr_be.iso88591': 'fr_BE.ISO8859-1', + 'fr_be.iso885915': 'fr_BE.ISO8859-15', + 'fr_be.iso885915@euro': 'fr_BE.ISO8859-15', + 'fr_be.utf8@euro': 'fr_BE.UTF-8', + 'fr_be@euro': 'fr_BE.ISO8859-15', + 'fr_ca': 'fr_CA.ISO8859-1', + 'fr_ca.88591': 'fr_CA.ISO8859-1', + 'fr_ca.iso88591': 'fr_CA.ISO8859-1', + 'fr_ca.iso885915': 'fr_CA.ISO8859-15', + 'fr_ca@euro': 'fr_CA.ISO8859-15', + 'fr_ch': 'fr_CH.ISO8859-1', + 'fr_ch.88591': 'fr_CH.ISO8859-1', + 'fr_ch.iso88591': 'fr_CH.ISO8859-1', + 'fr_ch.iso885915': 'fr_CH.ISO8859-15', + 'fr_ch@euro': 'fr_CH.ISO8859-15', + 'fr_fr': 'fr_FR.ISO8859-1', + 'fr_fr.88591': 'fr_FR.ISO8859-1', + 'fr_fr.iso88591': 'fr_FR.ISO8859-1', + 'fr_fr.iso885915': 'fr_FR.ISO8859-15', + 'fr_fr.iso885915@euro': 'fr_FR.ISO8859-15', + 'fr_fr.utf8@euro': 'fr_FR.UTF-8', + 'fr_fr@euro': 'fr_FR.ISO8859-15', + 'fr_lu': 'fr_LU.ISO8859-1', + 'fr_lu.88591': 'fr_LU.ISO8859-1', + 'fr_lu.iso88591': 'fr_LU.ISO8859-1', + 'fr_lu.iso885915': 'fr_LU.ISO8859-15', + 'fr_lu.iso885915@euro': 'fr_LU.ISO8859-15', + 'fr_lu.utf8@euro': 'fr_LU.UTF-8', + 'fr_lu@euro': 'fr_LU.ISO8859-15', + 'fran\xe7ais': 'fr_FR.ISO8859-1', + 'fre_fr': 'fr_FR.ISO8859-1', + 'fre_fr.8859': 'fr_FR.ISO8859-1', + 'french': 'fr_FR.ISO8859-1', + 'french.iso88591': 'fr_CH.ISO8859-1', + 'french_france': 'fr_FR.ISO8859-1', + 'french_france.8859': 'fr_FR.ISO8859-1', + 'fur_it': 'fur_IT.UTF-8', + 'fy_de': 'fy_DE.UTF-8', + 'fy_nl': 'fy_NL.UTF-8', + 'ga': 'ga_IE.ISO8859-1', + 'ga_ie': 'ga_IE.ISO8859-1', + 'ga_ie.iso88591': 'ga_IE.ISO8859-1', + 'ga_ie.iso885914': 'ga_IE.ISO8859-14', + 'ga_ie.iso885915': 'ga_IE.ISO8859-15', + 'ga_ie.iso885915@euro': 'ga_IE.ISO8859-15', + 'ga_ie.utf8@euro': 'ga_IE.UTF-8', + 'ga_ie@euro': 'ga_IE.ISO8859-15', + 'galego': 'gl_ES.ISO8859-1', + 'galician': 'gl_ES.ISO8859-1', + 'gd': 'gd_GB.ISO8859-1', + 'gd_gb': 'gd_GB.ISO8859-1', + 'gd_gb.iso88591': 'gd_GB.ISO8859-1', + 'gd_gb.iso885914': 'gd_GB.ISO8859-14', + 'gd_gb.iso885915': 'gd_GB.ISO8859-15', + 'gd_gb@euro': 'gd_GB.ISO8859-15', + 'ger_de': 'de_DE.ISO8859-1', + 'ger_de.8859': 'de_DE.ISO8859-1', + 'german': 'de_DE.ISO8859-1', + 'german.iso88591': 'de_CH.ISO8859-1', + 'german_germany': 'de_DE.ISO8859-1', + 'german_germany.8859': 'de_DE.ISO8859-1', + 'gez_er': 'gez_ER.UTF-8', + 'gez_et': 'gez_ET.UTF-8', + 'gl': 'gl_ES.ISO8859-1', + 'gl_es': 'gl_ES.ISO8859-1', + 'gl_es.iso88591': 'gl_ES.ISO8859-1', + 'gl_es.iso885915': 'gl_ES.ISO8859-15', + 'gl_es.iso885915@euro': 'gl_ES.ISO8859-15', + 'gl_es.utf8@euro': 'gl_ES.UTF-8', + 'gl_es@euro': 'gl_ES.ISO8859-15', + 'greek': 'el_GR.ISO8859-7', + 'greek.iso88597': 'el_GR.ISO8859-7', + 'gu_in': 'gu_IN.UTF-8', + 'gv': 'gv_GB.ISO8859-1', + 'gv_gb': 'gv_GB.ISO8859-1', + 'gv_gb.iso88591': 'gv_GB.ISO8859-1', + 'gv_gb.iso885914': 'gv_GB.ISO8859-14', + 'gv_gb.iso885915': 'gv_GB.ISO8859-15', + 'gv_gb@euro': 'gv_GB.ISO8859-15', + 'ha_ng': 'ha_NG.UTF-8', + 'he': 'he_IL.ISO8859-8', + 'he_il': 'he_IL.ISO8859-8', + 'he_il.cp1255': 'he_IL.CP1255', + 'he_il.iso88598': 'he_IL.ISO8859-8', + 'he_il.microsoftcp1255': 'he_IL.CP1255', + 'hebrew': 'he_IL.ISO8859-8', + 'hebrew.iso88598': 'he_IL.ISO8859-8', + 'hi': 'hi_IN.ISCII-DEV', + 'hi_in': 'hi_IN.ISCII-DEV', + 'hi_in.isciidev': 'hi_IN.ISCII-DEV', + 'hne': 'hne_IN.UTF-8', + 'hne_in': 'hne_IN.UTF-8', + 'hr': 'hr_HR.ISO8859-2', + 'hr_hr': 'hr_HR.ISO8859-2', + 'hr_hr.iso88592': 'hr_HR.ISO8859-2', + 'hrvatski': 'hr_HR.ISO8859-2', + 'hsb_de': 'hsb_DE.ISO8859-2', + 'ht_ht': 'ht_HT.UTF-8', + 'hu': 'hu_HU.ISO8859-2', + 'hu_hu': 'hu_HU.ISO8859-2', + 'hu_hu.iso88592': 'hu_HU.ISO8859-2', + 'hungarian': 'hu_HU.ISO8859-2', + 'hy_am': 'hy_AM.UTF-8', + 'hy_am.armscii8': 'hy_AM.ARMSCII_8', + 'ia': 'ia.UTF-8', + 'ia_fr': 'ia_FR.UTF-8', + 'icelandic': 'is_IS.ISO8859-1', + 'icelandic.iso88591': 'is_IS.ISO8859-1', + 'id': 'id_ID.ISO8859-1', + 'id_id': 'id_ID.ISO8859-1', + 'ig_ng': 'ig_NG.UTF-8', + 'ik_ca': 'ik_CA.UTF-8', + 'in': 'id_ID.ISO8859-1', + 'in_id': 'id_ID.ISO8859-1', + 'is': 'is_IS.ISO8859-1', + 'is_is': 'is_IS.ISO8859-1', + 'is_is.iso88591': 'is_IS.ISO8859-1', + 'is_is.iso885915': 'is_IS.ISO8859-15', + 'is_is@euro': 'is_IS.ISO8859-15', + 'iso-8859-1': 'en_US.ISO8859-1', + 'iso-8859-15': 'en_US.ISO8859-15', + 'iso8859-1': 'en_US.ISO8859-1', + 'iso8859-15': 'en_US.ISO8859-15', + 'iso_8859_1': 'en_US.ISO8859-1', + 'iso_8859_15': 'en_US.ISO8859-15', + 'it': 'it_IT.ISO8859-1', + 'it.iso885915': 'it_IT.ISO8859-15', + 'it_ch': 'it_CH.ISO8859-1', + 'it_ch.iso88591': 'it_CH.ISO8859-1', + 'it_ch.iso885915': 'it_CH.ISO8859-15', + 'it_ch@euro': 'it_CH.ISO8859-15', + 'it_it': 'it_IT.ISO8859-1', + 'it_it.88591': 'it_IT.ISO8859-1', + 'it_it.iso88591': 'it_IT.ISO8859-1', + 'it_it.iso885915': 'it_IT.ISO8859-15', + 'it_it.iso885915@euro': 'it_IT.ISO8859-15', + 'it_it.utf8@euro': 'it_IT.UTF-8', + 'it_it@euro': 'it_IT.ISO8859-15', + 'italian': 'it_IT.ISO8859-1', + 'italian.iso88591': 'it_IT.ISO8859-1', + 'iu': 'iu_CA.NUNACOM-8', + 'iu_ca': 'iu_CA.NUNACOM-8', + 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8', + 'iw': 'he_IL.ISO8859-8', + 'iw_il': 'he_IL.ISO8859-8', + 'iw_il.iso88598': 'he_IL.ISO8859-8', + 'iw_il.utf8': 'iw_IL.UTF-8', + 'ja': 'ja_JP.eucJP', + 'ja.jis': 'ja_JP.JIS7', + 'ja.sjis': 'ja_JP.SJIS', + 'ja_jp': 'ja_JP.eucJP', + 'ja_jp.ajec': 'ja_JP.eucJP', + 'ja_jp.euc': 'ja_JP.eucJP', + 'ja_jp.eucjp': 'ja_JP.eucJP', + 'ja_jp.iso-2022-jp': 'ja_JP.JIS7', + 'ja_jp.iso2022jp': 'ja_JP.JIS7', + 'ja_jp.jis': 'ja_JP.JIS7', + 'ja_jp.jis7': 'ja_JP.JIS7', + 'ja_jp.mscode': 'ja_JP.SJIS', + 'ja_jp.pck': 'ja_JP.SJIS', + 'ja_jp.sjis': 'ja_JP.SJIS', + 'ja_jp.ujis': 'ja_JP.eucJP', + 'japan': 'ja_JP.eucJP', + 'japanese': 'ja_JP.eucJP', + 'japanese-euc': 'ja_JP.eucJP', + 'japanese.euc': 'ja_JP.eucJP', + 'japanese.sjis': 'ja_JP.SJIS', + 'jp_jp': 'ja_JP.eucJP', + 'ka': 'ka_GE.GEORGIAN-ACADEMY', + 'ka_ge': 'ka_GE.GEORGIAN-ACADEMY', + 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY', + 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS', + 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY', + 'kk_kz': 'kk_KZ.RK1048', + 'kl': 'kl_GL.ISO8859-1', + 'kl_gl': 'kl_GL.ISO8859-1', + 'kl_gl.iso88591': 'kl_GL.ISO8859-1', + 'kl_gl.iso885915': 'kl_GL.ISO8859-15', + 'kl_gl@euro': 'kl_GL.ISO8859-15', + 'km_kh': 'km_KH.UTF-8', + 'kn': 'kn_IN.UTF-8', + 'kn_in': 'kn_IN.UTF-8', + 'ko': 'ko_KR.eucKR', + 'ko_kr': 'ko_KR.eucKR', + 'ko_kr.euc': 'ko_KR.eucKR', + 'ko_kr.euckr': 'ko_KR.eucKR', + 'kok_in': 'kok_IN.UTF-8', + 'korean': 'ko_KR.eucKR', + 'korean.euc': 'ko_KR.eucKR', + 'ks': 'ks_IN.UTF-8', + 'ks_in': 'ks_IN.UTF-8', + 'ks_in@devanagari': 'ks_IN.UTF-8@devanagari', + 'ks_in@devanagari.utf8': 'ks_IN.UTF-8@devanagari', + 'ku_tr': 'ku_TR.ISO8859-9', + 'kw': 'kw_GB.ISO8859-1', + 'kw_gb': 'kw_GB.ISO8859-1', + 'kw_gb.iso88591': 'kw_GB.ISO8859-1', + 'kw_gb.iso885914': 'kw_GB.ISO8859-14', + 'kw_gb.iso885915': 'kw_GB.ISO8859-15', + 'kw_gb@euro': 'kw_GB.ISO8859-15', + 'ky': 'ky_KG.UTF-8', + 'ky_kg': 'ky_KG.UTF-8', + 'lb_lu': 'lb_LU.UTF-8', + 'lg_ug': 'lg_UG.ISO8859-10', + 'li_be': 'li_BE.UTF-8', + 'li_nl': 'li_NL.UTF-8', + 'lij_it': 'lij_IT.UTF-8', + 'lithuanian': 'lt_LT.ISO8859-13', + 'lo': 'lo_LA.MULELAO-1', + 'lo_la': 'lo_LA.MULELAO-1', + 'lo_la.cp1133': 'lo_LA.IBM-CP1133', + 'lo_la.ibmcp1133': 'lo_LA.IBM-CP1133', + 'lo_la.mulelao1': 'lo_LA.MULELAO-1', + 'lt': 'lt_LT.ISO8859-13', + 'lt_lt': 'lt_LT.ISO8859-13', + 'lt_lt.iso885913': 'lt_LT.ISO8859-13', + 'lt_lt.iso88594': 'lt_LT.ISO8859-4', + 'lv': 'lv_LV.ISO8859-13', + 'lv_lv': 'lv_LV.ISO8859-13', + 'lv_lv.iso885913': 'lv_LV.ISO8859-13', + 'lv_lv.iso88594': 'lv_LV.ISO8859-4', + 'mag_in': 'mag_IN.UTF-8', + 'mai': 'mai_IN.UTF-8', + 'mai_in': 'mai_IN.UTF-8', + 'mg_mg': 'mg_MG.ISO8859-15', + 'mhr_ru': 'mhr_RU.UTF-8', + 'mi': 'mi_NZ.ISO8859-1', + 'mi_nz': 'mi_NZ.ISO8859-1', + 'mi_nz.iso88591': 'mi_NZ.ISO8859-1', + 'mk': 'mk_MK.ISO8859-5', + 'mk_mk': 'mk_MK.ISO8859-5', + 'mk_mk.cp1251': 'mk_MK.CP1251', + 'mk_mk.iso88595': 'mk_MK.ISO8859-5', + 'mk_mk.microsoftcp1251': 'mk_MK.CP1251', + 'ml': 'ml_IN.UTF-8', + 'ml_in': 'ml_IN.UTF-8', + 'mn_mn': 'mn_MN.UTF-8', + 'mni_in': 'mni_IN.UTF-8', + 'mr': 'mr_IN.UTF-8', + 'mr_in': 'mr_IN.UTF-8', + 'ms': 'ms_MY.ISO8859-1', + 'ms_my': 'ms_MY.ISO8859-1', + 'ms_my.iso88591': 'ms_MY.ISO8859-1', + 'mt': 'mt_MT.ISO8859-3', + 'mt_mt': 'mt_MT.ISO8859-3', + 'mt_mt.iso88593': 'mt_MT.ISO8859-3', + 'my_mm': 'my_MM.UTF-8', + 'nan_tw@latin': 'nan_TW.UTF-8@latin', + 'nb': 'nb_NO.ISO8859-1', + 'nb_no': 'nb_NO.ISO8859-1', + 'nb_no.88591': 'nb_NO.ISO8859-1', + 'nb_no.iso88591': 'nb_NO.ISO8859-1', + 'nb_no.iso885915': 'nb_NO.ISO8859-15', + 'nb_no@euro': 'nb_NO.ISO8859-15', + 'nds_de': 'nds_DE.UTF-8', + 'nds_nl': 'nds_NL.UTF-8', + 'ne_np': 'ne_NP.UTF-8', + 'nhn_mx': 'nhn_MX.UTF-8', + 'niu_nu': 'niu_NU.UTF-8', + 'niu_nz': 'niu_NZ.UTF-8', + 'nl': 'nl_NL.ISO8859-1', + 'nl.iso885915': 'nl_NL.ISO8859-15', + 'nl_aw': 'nl_AW.UTF-8', + 'nl_be': 'nl_BE.ISO8859-1', + 'nl_be.88591': 'nl_BE.ISO8859-1', + 'nl_be.iso88591': 'nl_BE.ISO8859-1', + 'nl_be.iso885915': 'nl_BE.ISO8859-15', + 'nl_be.iso885915@euro': 'nl_BE.ISO8859-15', + 'nl_be.utf8@euro': 'nl_BE.UTF-8', + 'nl_be@euro': 'nl_BE.ISO8859-15', + 'nl_nl': 'nl_NL.ISO8859-1', + 'nl_nl.88591': 'nl_NL.ISO8859-1', + 'nl_nl.iso88591': 'nl_NL.ISO8859-1', + 'nl_nl.iso885915': 'nl_NL.ISO8859-15', + 'nl_nl.iso885915@euro': 'nl_NL.ISO8859-15', + 'nl_nl.utf8@euro': 'nl_NL.UTF-8', + 'nl_nl@euro': 'nl_NL.ISO8859-15', + 'nn': 'nn_NO.ISO8859-1', + 'nn_no': 'nn_NO.ISO8859-1', + 'nn_no.88591': 'nn_NO.ISO8859-1', + 'nn_no.iso88591': 'nn_NO.ISO8859-1', + 'nn_no.iso885915': 'nn_NO.ISO8859-15', + 'nn_no@euro': 'nn_NO.ISO8859-15', + 'no': 'no_NO.ISO8859-1', + 'no@nynorsk': 'ny_NO.ISO8859-1', + 'no_no': 'no_NO.ISO8859-1', + 'no_no.88591': 'no_NO.ISO8859-1', + 'no_no.iso88591': 'no_NO.ISO8859-1', + 'no_no.iso885915': 'no_NO.ISO8859-15', + 'no_no.iso88591@bokmal': 'no_NO.ISO8859-1', + 'no_no.iso88591@nynorsk': 'no_NO.ISO8859-1', + 'no_no@euro': 'no_NO.ISO8859-15', + 'norwegian': 'no_NO.ISO8859-1', + 'norwegian.iso88591': 'no_NO.ISO8859-1', + 'nr': 'nr_ZA.ISO8859-1', + 'nr_za': 'nr_ZA.ISO8859-1', + 'nr_za.iso88591': 'nr_ZA.ISO8859-1', + 'nso': 'nso_ZA.ISO8859-15', + 'nso_za': 'nso_ZA.ISO8859-15', + 'nso_za.iso885915': 'nso_ZA.ISO8859-15', + 'ny': 'ny_NO.ISO8859-1', + 'ny_no': 'ny_NO.ISO8859-1', + 'ny_no.88591': 'ny_NO.ISO8859-1', + 'ny_no.iso88591': 'ny_NO.ISO8859-1', + 'ny_no.iso885915': 'ny_NO.ISO8859-15', + 'ny_no@euro': 'ny_NO.ISO8859-15', + 'nynorsk': 'nn_NO.ISO8859-1', + 'oc': 'oc_FR.ISO8859-1', + 'oc_fr': 'oc_FR.ISO8859-1', + 'oc_fr.iso88591': 'oc_FR.ISO8859-1', + 'oc_fr.iso885915': 'oc_FR.ISO8859-15', + 'oc_fr@euro': 'oc_FR.ISO8859-15', + 'om_et': 'om_ET.UTF-8', + 'om_ke': 'om_KE.ISO8859-1', + 'or': 'or_IN.UTF-8', + 'or_in': 'or_IN.UTF-8', + 'os_ru': 'os_RU.UTF-8', + 'pa': 'pa_IN.UTF-8', + 'pa_in': 'pa_IN.UTF-8', + 'pa_pk': 'pa_PK.UTF-8', + 'pap_an': 'pap_AN.UTF-8', + 'pd': 'pd_US.ISO8859-1', + 'pd_de': 'pd_DE.ISO8859-1', + 'pd_de.iso88591': 'pd_DE.ISO8859-1', + 'pd_de.iso885915': 'pd_DE.ISO8859-15', + 'pd_de@euro': 'pd_DE.ISO8859-15', + 'pd_us': 'pd_US.ISO8859-1', + 'pd_us.iso88591': 'pd_US.ISO8859-1', + 'pd_us.iso885915': 'pd_US.ISO8859-15', + 'pd_us@euro': 'pd_US.ISO8859-15', + 'ph': 'ph_PH.ISO8859-1', + 'ph_ph': 'ph_PH.ISO8859-1', + 'ph_ph.iso88591': 'ph_PH.ISO8859-1', + 'pl': 'pl_PL.ISO8859-2', + 'pl_pl': 'pl_PL.ISO8859-2', + 'pl_pl.iso88592': 'pl_PL.ISO8859-2', + 'polish': 'pl_PL.ISO8859-2', + 'portuguese': 'pt_PT.ISO8859-1', + 'portuguese.iso88591': 'pt_PT.ISO8859-1', + 'portuguese_brazil': 'pt_BR.ISO8859-1', + 'portuguese_brazil.8859': 'pt_BR.ISO8859-1', + 'posix': 'C', + 'posix-utf2': 'C', + 'pp': 'pp_AN.ISO8859-1', + 'pp_an': 'pp_AN.ISO8859-1', + 'pp_an.iso88591': 'pp_AN.ISO8859-1', + 'ps_af': 'ps_AF.UTF-8', + 'pt': 'pt_PT.ISO8859-1', + 'pt.iso885915': 'pt_PT.ISO8859-15', + 'pt_br': 'pt_BR.ISO8859-1', + 'pt_br.88591': 'pt_BR.ISO8859-1', + 'pt_br.iso88591': 'pt_BR.ISO8859-1', + 'pt_br.iso885915': 'pt_BR.ISO8859-15', + 'pt_br@euro': 'pt_BR.ISO8859-15', + 'pt_pt': 'pt_PT.ISO8859-1', + 'pt_pt.88591': 'pt_PT.ISO8859-1', + 'pt_pt.iso88591': 'pt_PT.ISO8859-1', + 'pt_pt.iso885915': 'pt_PT.ISO8859-15', + 'pt_pt.iso885915@euro': 'pt_PT.ISO8859-15', + 'pt_pt.utf8@euro': 'pt_PT.UTF-8', + 'pt_pt@euro': 'pt_PT.ISO8859-15', + 'ro': 'ro_RO.ISO8859-2', + 'ro_ro': 'ro_RO.ISO8859-2', + 'ro_ro.iso88592': 'ro_RO.ISO8859-2', + 'romanian': 'ro_RO.ISO8859-2', + 'ru': 'ru_RU.UTF-8', + 'ru.koi8r': 'ru_RU.KOI8-R', + 'ru_ru': 'ru_RU.UTF-8', + 'ru_ru.cp1251': 'ru_RU.CP1251', + 'ru_ru.iso88595': 'ru_RU.ISO8859-5', + 'ru_ru.koi8r': 'ru_RU.KOI8-R', + 'ru_ru.microsoftcp1251': 'ru_RU.CP1251', + 'ru_ua': 'ru_UA.KOI8-U', + 'ru_ua.cp1251': 'ru_UA.CP1251', + 'ru_ua.koi8u': 'ru_UA.KOI8-U', + 'ru_ua.microsoftcp1251': 'ru_UA.CP1251', + 'rumanian': 'ro_RO.ISO8859-2', + 'russian': 'ru_RU.ISO8859-5', + 'rw': 'rw_RW.ISO8859-1', + 'rw_rw': 'rw_RW.ISO8859-1', + 'rw_rw.iso88591': 'rw_RW.ISO8859-1', + 'sa_in': 'sa_IN.UTF-8', + 'sat_in': 'sat_IN.UTF-8', + 'sc_it': 'sc_IT.UTF-8', + 'sd': 'sd_IN.UTF-8', + 'sd@devanagari': 'sd_IN.UTF-8@devanagari', + 'sd_in': 'sd_IN.UTF-8', + 'sd_in@devanagari': 'sd_IN.UTF-8@devanagari', + 'sd_in@devanagari.utf8': 'sd_IN.UTF-8@devanagari', + 'sd_pk': 'sd_PK.UTF-8', + 'se_no': 'se_NO.UTF-8', + 'serbocroatian': 'sr_RS.UTF-8@latin', + 'sh': 'sr_RS.UTF-8@latin', + 'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2', + 'sh_hr': 'sh_HR.ISO8859-2', + 'sh_hr.iso88592': 'hr_HR.ISO8859-2', + 'sh_sp': 'sr_CS.ISO8859-2', + 'sh_yu': 'sr_RS.UTF-8@latin', + 'shs_ca': 'shs_CA.UTF-8', + 'si': 'si_LK.UTF-8', + 'si_lk': 'si_LK.UTF-8', + 'sid_et': 'sid_ET.UTF-8', + 'sinhala': 'si_LK.UTF-8', + 'sk': 'sk_SK.ISO8859-2', + 'sk_sk': 'sk_SK.ISO8859-2', + 'sk_sk.iso88592': 'sk_SK.ISO8859-2', + 'sl': 'sl_SI.ISO8859-2', + 'sl_cs': 'sl_CS.ISO8859-2', + 'sl_si': 'sl_SI.ISO8859-2', + 'sl_si.iso88592': 'sl_SI.ISO8859-2', + 'slovak': 'sk_SK.ISO8859-2', + 'slovene': 'sl_SI.ISO8859-2', + 'slovenian': 'sl_SI.ISO8859-2', + 'so_dj': 'so_DJ.ISO8859-1', + 'so_et': 'so_ET.UTF-8', + 'so_ke': 'so_KE.ISO8859-1', + 'so_so': 'so_SO.ISO8859-1', + 'sp': 'sr_CS.ISO8859-5', + 'sp_yu': 'sr_CS.ISO8859-5', + 'spanish': 'es_ES.ISO8859-1', + 'spanish.iso88591': 'es_ES.ISO8859-1', + 'spanish_spain': 'es_ES.ISO8859-1', + 'spanish_spain.8859': 'es_ES.ISO8859-1', + 'sq': 'sq_AL.ISO8859-2', + 'sq_al': 'sq_AL.ISO8859-2', + 'sq_al.iso88592': 'sq_AL.ISO8859-2', + 'sq_mk': 'sq_MK.UTF-8', + 'sr': 'sr_RS.UTF-8', + 'sr@cyrillic': 'sr_RS.UTF-8', + 'sr@latin': 'sr_RS.UTF-8@latin', + 'sr@latn': 'sr_CS.UTF-8@latin', + 'sr_cs': 'sr_CS.UTF-8', + 'sr_cs.iso88592': 'sr_CS.ISO8859-2', + 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2', + 'sr_cs.iso88595': 'sr_CS.ISO8859-5', + 'sr_cs.utf8@latn': 'sr_CS.UTF-8@latin', + 'sr_cs@latn': 'sr_CS.UTF-8@latin', + 'sr_me': 'sr_ME.UTF-8', + 'sr_rs': 'sr_RS.UTF-8', + 'sr_rs@latin': 'sr_RS.UTF-8@latin', + 'sr_rs@latn': 'sr_RS.UTF-8@latin', + 'sr_sp': 'sr_CS.ISO8859-2', + 'sr_yu': 'sr_RS.UTF-8@latin', + 'sr_yu.cp1251@cyrillic': 'sr_CS.CP1251', + 'sr_yu.iso88592': 'sr_CS.ISO8859-2', + 'sr_yu.iso88595': 'sr_CS.ISO8859-5', + 'sr_yu.iso88595@cyrillic': 'sr_CS.ISO8859-5', + 'sr_yu.microsoftcp1251@cyrillic': 'sr_CS.CP1251', + 'sr_yu.utf8': 'sr_RS.UTF-8', + 'sr_yu.utf8@cyrillic': 'sr_RS.UTF-8', + 'sr_yu@cyrillic': 'sr_RS.UTF-8', + 'ss': 'ss_ZA.ISO8859-1', + 'ss_za': 'ss_ZA.ISO8859-1', + 'ss_za.iso88591': 'ss_ZA.ISO8859-1', + 'st': 'st_ZA.ISO8859-1', + 'st_za': 'st_ZA.ISO8859-1', + 'st_za.iso88591': 'st_ZA.ISO8859-1', + 'sv': 'sv_SE.ISO8859-1', + 'sv.iso885915': 'sv_SE.ISO8859-15', + 'sv_fi': 'sv_FI.ISO8859-1', + 'sv_fi.iso88591': 'sv_FI.ISO8859-1', + 'sv_fi.iso885915': 'sv_FI.ISO8859-15', + 'sv_fi.iso885915@euro': 'sv_FI.ISO8859-15', + 'sv_fi.utf8@euro': 'sv_FI.UTF-8', + 'sv_fi@euro': 'sv_FI.ISO8859-15', + 'sv_se': 'sv_SE.ISO8859-1', + 'sv_se.88591': 'sv_SE.ISO8859-1', + 'sv_se.iso88591': 'sv_SE.ISO8859-1', + 'sv_se.iso885915': 'sv_SE.ISO8859-15', + 'sv_se@euro': 'sv_SE.ISO8859-15', + 'sw_ke': 'sw_KE.UTF-8', + 'sw_tz': 'sw_TZ.UTF-8', + 'swedish': 'sv_SE.ISO8859-1', + 'swedish.iso88591': 'sv_SE.ISO8859-1', + 'szl_pl': 'szl_PL.UTF-8', + 'ta': 'ta_IN.TSCII-0', + 'ta_in': 'ta_IN.TSCII-0', + 'ta_in.tscii': 'ta_IN.TSCII-0', + 'ta_in.tscii0': 'ta_IN.TSCII-0', + 'ta_lk': 'ta_LK.UTF-8', + 'te': 'te_IN.UTF-8', + 'te_in': 'te_IN.UTF-8', + 'tg': 'tg_TJ.KOI8-C', + 'tg_tj': 'tg_TJ.KOI8-C', + 'tg_tj.koi8c': 'tg_TJ.KOI8-C', + 'th': 'th_TH.ISO8859-11', + 'th_th': 'th_TH.ISO8859-11', + 'th_th.iso885911': 'th_TH.ISO8859-11', + 'th_th.tactis': 'th_TH.TIS620', + 'th_th.tis620': 'th_TH.TIS620', + 'thai': 'th_TH.ISO8859-11', + 'ti_er': 'ti_ER.UTF-8', + 'ti_et': 'ti_ET.UTF-8', + 'tig_er': 'tig_ER.UTF-8', + 'tk_tm': 'tk_TM.UTF-8', + 'tl': 'tl_PH.ISO8859-1', + 'tl_ph': 'tl_PH.ISO8859-1', + 'tl_ph.iso88591': 'tl_PH.ISO8859-1', + 'tn': 'tn_ZA.ISO8859-15', + 'tn_za': 'tn_ZA.ISO8859-15', + 'tn_za.iso885915': 'tn_ZA.ISO8859-15', + 'tr': 'tr_TR.ISO8859-9', + 'tr_cy': 'tr_CY.ISO8859-9', + 'tr_tr': 'tr_TR.ISO8859-9', + 'tr_tr.iso88599': 'tr_TR.ISO8859-9', + 'ts': 'ts_ZA.ISO8859-1', + 'ts_za': 'ts_ZA.ISO8859-1', + 'ts_za.iso88591': 'ts_ZA.ISO8859-1', + 'tt': 'tt_RU.TATAR-CYR', + 'tt_ru': 'tt_RU.TATAR-CYR', + 'tt_ru.koi8c': 'tt_RU.KOI8-C', + 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR', + 'tt_ru@iqtelif': 'tt_RU.UTF-8@iqtelif', + 'turkish': 'tr_TR.ISO8859-9', + 'turkish.iso88599': 'tr_TR.ISO8859-9', + 'ug_cn': 'ug_CN.UTF-8', + 'uk': 'uk_UA.KOI8-U', + 'uk_ua': 'uk_UA.KOI8-U', + 'uk_ua.cp1251': 'uk_UA.CP1251', + 'uk_ua.iso88595': 'uk_UA.ISO8859-5', + 'uk_ua.koi8u': 'uk_UA.KOI8-U', + 'uk_ua.microsoftcp1251': 'uk_UA.CP1251', + 'univ': 'en_US.UTF-8', + 'universal': 'en_US.UTF-8', + 'universal.utf8@ucs4': 'en_US.UTF-8', + 'unm_us': 'unm_US.UTF-8', + 'ur': 'ur_PK.CP1256', + 'ur_in': 'ur_IN.UTF-8', + 'ur_pk': 'ur_PK.CP1256', + 'ur_pk.cp1256': 'ur_PK.CP1256', + 'ur_pk.microsoftcp1256': 'ur_PK.CP1256', + 'uz': 'uz_UZ.UTF-8', + 'uz_uz': 'uz_UZ.UTF-8', + 'uz_uz.iso88591': 'uz_UZ.ISO8859-1', + 'uz_uz.utf8@cyrillic': 'uz_UZ.UTF-8', + 'uz_uz@cyrillic': 'uz_UZ.UTF-8', + 've': 've_ZA.UTF-8', + 've_za': 've_ZA.UTF-8', + 'vi': 'vi_VN.TCVN', + 'vi_vn': 'vi_VN.TCVN', + 'vi_vn.tcvn': 'vi_VN.TCVN', + 'vi_vn.tcvn5712': 'vi_VN.TCVN', + 'vi_vn.viscii': 'vi_VN.VISCII', + 'vi_vn.viscii111': 'vi_VN.VISCII', + 'wa': 'wa_BE.ISO8859-1', + 'wa_be': 'wa_BE.ISO8859-1', + 'wa_be.iso88591': 'wa_BE.ISO8859-1', + 'wa_be.iso885915': 'wa_BE.ISO8859-15', + 'wa_be.iso885915@euro': 'wa_BE.ISO8859-15', + 'wa_be@euro': 'wa_BE.ISO8859-15', + 'wae_ch': 'wae_CH.UTF-8', + 'wal_et': 'wal_ET.UTF-8', + 'wo_sn': 'wo_SN.UTF-8', + 'xh': 'xh_ZA.ISO8859-1', + 'xh_za': 'xh_ZA.ISO8859-1', + 'xh_za.iso88591': 'xh_ZA.ISO8859-1', + 'yi': 'yi_US.CP1255', + 'yi_us': 'yi_US.CP1255', + 'yi_us.cp1255': 'yi_US.CP1255', + 'yi_us.microsoftcp1255': 'yi_US.CP1255', + 'yo_ng': 'yo_NG.UTF-8', + 'yue_hk': 'yue_HK.UTF-8', + 'zh': 'zh_CN.eucCN', + 'zh_cn': 'zh_CN.gb2312', + 'zh_cn.big5': 'zh_TW.big5', + 'zh_cn.euc': 'zh_CN.eucCN', + 'zh_cn.gb18030': 'zh_CN.gb18030', + 'zh_cn.gb2312': 'zh_CN.gb2312', + 'zh_cn.gbk': 'zh_CN.gbk', + 'zh_hk': 'zh_HK.big5hkscs', + 'zh_hk.big5': 'zh_HK.big5', + 'zh_hk.big5hk': 'zh_HK.big5hkscs', + 'zh_hk.big5hkscs': 'zh_HK.big5hkscs', + 'zh_sg': 'zh_SG.GB2312', + 'zh_sg.gbk': 'zh_SG.GBK', + 'zh_tw': 'zh_TW.big5', + 'zh_tw.big5': 'zh_TW.big5', + 'zh_tw.euc': 'zh_TW.eucTW', + 'zh_tw.euctw': 'zh_TW.eucTW', + 'zu': 'zu_ZA.ISO8859-1', + 'zu_za': 'zu_ZA.ISO8859-1', + 'zu_za.iso88591': 'zu_ZA.ISO8859-1', +} + +# +# This maps Windows language identifiers to locale strings. +# +# This list has been updated from +# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp +# to include every locale up to Windows Vista. +# +# NOTE: this mapping is incomplete. If your language is missing, please +# submit a bug report to the Python bug tracker at http://bugs.python.org/ +# Make sure you include the missing language identifier and the suggested +# locale code. +# + +windows_locale = { + 0x0436: "af_ZA", # Afrikaans + 0x041c: "sq_AL", # Albanian + 0x0484: "gsw_FR",# Alsatian - France + 0x045e: "am_ET", # Amharic - Ethiopia + 0x0401: "ar_SA", # Arabic - Saudi Arabia + 0x0801: "ar_IQ", # Arabic - Iraq + 0x0c01: "ar_EG", # Arabic - Egypt + 0x1001: "ar_LY", # Arabic - Libya + 0x1401: "ar_DZ", # Arabic - Algeria + 0x1801: "ar_MA", # Arabic - Morocco + 0x1c01: "ar_TN", # Arabic - Tunisia + 0x2001: "ar_OM", # Arabic - Oman + 0x2401: "ar_YE", # Arabic - Yemen + 0x2801: "ar_SY", # Arabic - Syria + 0x2c01: "ar_JO", # Arabic - Jordan + 0x3001: "ar_LB", # Arabic - Lebanon + 0x3401: "ar_KW", # Arabic - Kuwait + 0x3801: "ar_AE", # Arabic - United Arab Emirates + 0x3c01: "ar_BH", # Arabic - Bahrain + 0x4001: "ar_QA", # Arabic - Qatar + 0x042b: "hy_AM", # Armenian + 0x044d: "as_IN", # Assamese - India + 0x042c: "az_AZ", # Azeri - Latin + 0x082c: "az_AZ", # Azeri - Cyrillic + 0x046d: "ba_RU", # Bashkir + 0x042d: "eu_ES", # Basque - Russia + 0x0423: "be_BY", # Belarusian + 0x0445: "bn_IN", # Begali + 0x201a: "bs_BA", # Bosnian - Cyrillic + 0x141a: "bs_BA", # Bosnian - Latin + 0x047e: "br_FR", # Breton - France + 0x0402: "bg_BG", # Bulgarian +# 0x0455: "my_MM", # Burmese - Not supported + 0x0403: "ca_ES", # Catalan + 0x0004: "zh_CHS",# Chinese - Simplified + 0x0404: "zh_TW", # Chinese - Taiwan + 0x0804: "zh_CN", # Chinese - PRC + 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R. + 0x1004: "zh_SG", # Chinese - Singapore + 0x1404: "zh_MO", # Chinese - Macao S.A.R. + 0x7c04: "zh_CHT",# Chinese - Traditional + 0x0483: "co_FR", # Corsican - France + 0x041a: "hr_HR", # Croatian + 0x101a: "hr_BA", # Croatian - Bosnia + 0x0405: "cs_CZ", # Czech + 0x0406: "da_DK", # Danish + 0x048c: "gbz_AF",# Dari - Afghanistan + 0x0465: "div_MV",# Divehi - Maldives + 0x0413: "nl_NL", # Dutch - The Netherlands + 0x0813: "nl_BE", # Dutch - Belgium + 0x0409: "en_US", # English - United States + 0x0809: "en_GB", # English - United Kingdom + 0x0c09: "en_AU", # English - Australia + 0x1009: "en_CA", # English - Canada + 0x1409: "en_NZ", # English - New Zealand + 0x1809: "en_IE", # English - Ireland + 0x1c09: "en_ZA", # English - South Africa + 0x2009: "en_JA", # English - Jamaica + 0x2409: "en_CB", # English - Caribbean + 0x2809: "en_BZ", # English - Belize + 0x2c09: "en_TT", # English - Trinidad + 0x3009: "en_ZW", # English - Zimbabwe + 0x3409: "en_PH", # English - Philippines + 0x4009: "en_IN", # English - India + 0x4409: "en_MY", # English - Malaysia + 0x4809: "en_IN", # English - Singapore + 0x0425: "et_EE", # Estonian + 0x0438: "fo_FO", # Faroese + 0x0464: "fil_PH",# Filipino + 0x040b: "fi_FI", # Finnish + 0x040c: "fr_FR", # French - France + 0x080c: "fr_BE", # French - Belgium + 0x0c0c: "fr_CA", # French - Canada + 0x100c: "fr_CH", # French - Switzerland + 0x140c: "fr_LU", # French - Luxembourg + 0x180c: "fr_MC", # French - Monaco + 0x0462: "fy_NL", # Frisian - Netherlands + 0x0456: "gl_ES", # Galician + 0x0437: "ka_GE", # Georgian + 0x0407: "de_DE", # German - Germany + 0x0807: "de_CH", # German - Switzerland + 0x0c07: "de_AT", # German - Austria + 0x1007: "de_LU", # German - Luxembourg + 0x1407: "de_LI", # German - Liechtenstein + 0x0408: "el_GR", # Greek + 0x046f: "kl_GL", # Greenlandic - Greenland + 0x0447: "gu_IN", # Gujarati + 0x0468: "ha_NG", # Hausa - Latin + 0x040d: "he_IL", # Hebrew + 0x0439: "hi_IN", # Hindi + 0x040e: "hu_HU", # Hungarian + 0x040f: "is_IS", # Icelandic + 0x0421: "id_ID", # Indonesian + 0x045d: "iu_CA", # Inuktitut - Syllabics + 0x085d: "iu_CA", # Inuktitut - Latin + 0x083c: "ga_IE", # Irish - Ireland + 0x0410: "it_IT", # Italian - Italy + 0x0810: "it_CH", # Italian - Switzerland + 0x0411: "ja_JP", # Japanese + 0x044b: "kn_IN", # Kannada - India + 0x043f: "kk_KZ", # Kazakh + 0x0453: "kh_KH", # Khmer - Cambodia + 0x0486: "qut_GT",# K'iche - Guatemala + 0x0487: "rw_RW", # Kinyarwanda - Rwanda + 0x0457: "kok_IN",# Konkani + 0x0412: "ko_KR", # Korean + 0x0440: "ky_KG", # Kyrgyz + 0x0454: "lo_LA", # Lao - Lao PDR + 0x0426: "lv_LV", # Latvian + 0x0427: "lt_LT", # Lithuanian + 0x082e: "dsb_DE",# Lower Sorbian - Germany + 0x046e: "lb_LU", # Luxembourgish + 0x042f: "mk_MK", # FYROM Macedonian + 0x043e: "ms_MY", # Malay - Malaysia + 0x083e: "ms_BN", # Malay - Brunei Darussalam + 0x044c: "ml_IN", # Malayalam - India + 0x043a: "mt_MT", # Maltese + 0x0481: "mi_NZ", # Maori + 0x047a: "arn_CL",# Mapudungun + 0x044e: "mr_IN", # Marathi + 0x047c: "moh_CA",# Mohawk - Canada + 0x0450: "mn_MN", # Mongolian - Cyrillic + 0x0850: "mn_CN", # Mongolian - PRC + 0x0461: "ne_NP", # Nepali + 0x0414: "nb_NO", # Norwegian - Bokmal + 0x0814: "nn_NO", # Norwegian - Nynorsk + 0x0482: "oc_FR", # Occitan - France + 0x0448: "or_IN", # Oriya - India + 0x0463: "ps_AF", # Pashto - Afghanistan + 0x0429: "fa_IR", # Persian + 0x0415: "pl_PL", # Polish + 0x0416: "pt_BR", # Portuguese - Brazil + 0x0816: "pt_PT", # Portuguese - Portugal + 0x0446: "pa_IN", # Punjabi + 0x046b: "quz_BO",# Quechua (Bolivia) + 0x086b: "quz_EC",# Quechua (Ecuador) + 0x0c6b: "quz_PE",# Quechua (Peru) + 0x0418: "ro_RO", # Romanian - Romania + 0x0417: "rm_CH", # Romansh + 0x0419: "ru_RU", # Russian + 0x243b: "smn_FI",# Sami Finland + 0x103b: "smj_NO",# Sami Norway + 0x143b: "smj_SE",# Sami Sweden + 0x043b: "se_NO", # Sami Northern Norway + 0x083b: "se_SE", # Sami Northern Sweden + 0x0c3b: "se_FI", # Sami Northern Finland + 0x203b: "sms_FI",# Sami Skolt + 0x183b: "sma_NO",# Sami Southern Norway + 0x1c3b: "sma_SE",# Sami Southern Sweden + 0x044f: "sa_IN", # Sanskrit + 0x0c1a: "sr_SP", # Serbian - Cyrillic + 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic + 0x081a: "sr_SP", # Serbian - Latin + 0x181a: "sr_BA", # Serbian - Bosnia Latin + 0x045b: "si_LK", # Sinhala - Sri Lanka + 0x046c: "ns_ZA", # Northern Sotho + 0x0432: "tn_ZA", # Setswana - Southern Africa + 0x041b: "sk_SK", # Slovak + 0x0424: "sl_SI", # Slovenian + 0x040a: "es_ES", # Spanish - Spain + 0x080a: "es_MX", # Spanish - Mexico + 0x0c0a: "es_ES", # Spanish - Spain (Modern) + 0x100a: "es_GT", # Spanish - Guatemala + 0x140a: "es_CR", # Spanish - Costa Rica + 0x180a: "es_PA", # Spanish - Panama + 0x1c0a: "es_DO", # Spanish - Dominican Republic + 0x200a: "es_VE", # Spanish - Venezuela + 0x240a: "es_CO", # Spanish - Colombia + 0x280a: "es_PE", # Spanish - Peru + 0x2c0a: "es_AR", # Spanish - Argentina + 0x300a: "es_EC", # Spanish - Ecuador + 0x340a: "es_CL", # Spanish - Chile + 0x380a: "es_UR", # Spanish - Uruguay + 0x3c0a: "es_PY", # Spanish - Paraguay + 0x400a: "es_BO", # Spanish - Bolivia + 0x440a: "es_SV", # Spanish - El Salvador + 0x480a: "es_HN", # Spanish - Honduras + 0x4c0a: "es_NI", # Spanish - Nicaragua + 0x500a: "es_PR", # Spanish - Puerto Rico + 0x540a: "es_US", # Spanish - United States +# 0x0430: "", # Sutu - Not supported + 0x0441: "sw_KE", # Swahili + 0x041d: "sv_SE", # Swedish - Sweden + 0x081d: "sv_FI", # Swedish - Finland + 0x045a: "syr_SY",# Syriac + 0x0428: "tg_TJ", # Tajik - Cyrillic + 0x085f: "tmz_DZ",# Tamazight - Latin + 0x0449: "ta_IN", # Tamil + 0x0444: "tt_RU", # Tatar + 0x044a: "te_IN", # Telugu + 0x041e: "th_TH", # Thai + 0x0851: "bo_BT", # Tibetan - Bhutan + 0x0451: "bo_CN", # Tibetan - PRC + 0x041f: "tr_TR", # Turkish + 0x0442: "tk_TM", # Turkmen - Cyrillic + 0x0480: "ug_CN", # Uighur - Arabic + 0x0422: "uk_UA", # Ukrainian + 0x042e: "wen_DE",# Upper Sorbian - Germany + 0x0420: "ur_PK", # Urdu + 0x0820: "ur_IN", # Urdu - India + 0x0443: "uz_UZ", # Uzbek - Latin + 0x0843: "uz_UZ", # Uzbek - Cyrillic + 0x042a: "vi_VN", # Vietnamese + 0x0452: "cy_GB", # Welsh + 0x0488: "wo_SN", # Wolof - Senegal + 0x0434: "xh_ZA", # Xhosa - South Africa + 0x0485: "sah_RU",# Yakut - Cyrillic + 0x0478: "ii_CN", # Yi - PRC + 0x046a: "yo_NG", # Yoruba - Nigeria + 0x0435: "zu_ZA", # Zulu +} + +def _print_locale(): + + """ Test function. + """ + categories = {} + def _init_categories(categories=categories): + for k,v in globals().items(): + if k[:3] == 'LC_': + categories[k] = v + _init_categories() + del categories['LC_ALL'] + + print 'Locale defaults as determined by getdefaultlocale():' + print '-'*72 + lang, enc = getdefaultlocale() + print 'Language: ', lang or '(undefined)' + print 'Encoding: ', enc or '(undefined)' + print + + print 'Locale settings on startup:' + print '-'*72 + for name,category in categories.items(): + print name, '...' + lang, enc = getlocale(category) + print ' Language: ', lang or '(undefined)' + print ' Encoding: ', enc or '(undefined)' + print + + print + print 'Locale settings after calling resetlocale():' + print '-'*72 + resetlocale() + for name,category in categories.items(): + print name, '...' + lang, enc = getlocale(category) + print ' Language: ', lang or '(undefined)' + print ' Encoding: ', enc or '(undefined)' + print + + try: + setlocale(LC_ALL, "") + except: + print 'NOTE:' + print 'setlocale(LC_ALL, "") does not support the default locale' + print 'given in the OS environment variables.' + else: + print + print 'Locale settings after calling setlocale(LC_ALL, ""):' + print '-'*72 + for name,category in categories.items(): + print name, '...' + lang, enc = getlocale(category) + print ' Language: ', lang or '(undefined)' + print ' Encoding: ', enc or '(undefined)' + print + +### + +try: + LC_MESSAGES +except NameError: + pass +else: + __all__.append("LC_MESSAGES") + +if __name__=='__main__': + print 'Locale aliasing:' + print + _print_locale() + print + print 'Number formatting:' + print + _test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/ntpath.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,550 @@ +# Module 'ntpath' -- common operations on WinNT/Win95 pathnames +"""Common pathname manipulations, WindowsNT/95 version. + +Instead of importing this module directly, import os and refer to this +module as os.path. +""" + +import os +import sys +import stat +import genericpath +import warnings + +from genericpath import * +from genericpath import _unicode + +__all__ = ["normcase","isabs","join","splitdrive","split","splitext", + "basename","dirname","commonprefix","getsize","getmtime", + "getatime","getctime", "islink","exists","lexists","isdir","isfile", + "ismount","walk","expanduser","expandvars","normpath","abspath", + "splitunc","curdir","pardir","sep","pathsep","defpath","altsep", + "extsep","devnull","realpath","supports_unicode_filenames","relpath"] + +# strings representing various path-related bits and pieces +curdir = '.' +pardir = '..' +extsep = '.' +sep = '\\' +pathsep = ';' +altsep = '/' +defpath = '.;C:\\bin' +if 'ce' in sys.builtin_module_names: + defpath = '\\Windows' +elif 'os2' in sys.builtin_module_names: + # OS/2 w/ VACPP + altsep = '/' +devnull = 'nul' + +# Normalize the case of a pathname and map slashes to backslashes. +# Other normalizations (such as optimizing '../' away) are not done +# (this is done by normpath). + +def normcase(s): + """Normalize case of pathname. + + Makes all characters lowercase and all slashes into backslashes.""" + return s.replace("/", "\\").lower() + + +# Return whether a path is absolute. +# Trivial in Posix, harder on the Mac or MS-DOS. +# For DOS it is absolute if it starts with a slash or backslash (current +# volume), or if a pathname after the volume letter and colon / UNC resource +# starts with a slash or backslash. + +def isabs(s): + """Test whether a path is absolute""" + s = splitdrive(s)[1] + return s != '' and s[:1] in '/\\' + + +# Join two (or more) paths. +def join(path, *paths): + """Join two or more pathname components, inserting "\\" as needed.""" + result_drive, result_path = splitdrive(path) + for p in paths: + p_drive, p_path = splitdrive(p) + if p_path and p_path[0] in '\\/': + # Second path is absolute + if p_drive or not result_drive: + result_drive = p_drive + result_path = p_path + continue + elif p_drive and p_drive != result_drive: + if p_drive.lower() != result_drive.lower(): + # Different drives => ignore the first path entirely + result_drive = p_drive + result_path = p_path + continue + # Same drive in different case + result_drive = p_drive + # Second path is relative to the first + if result_path and result_path[-1] not in '\\/': + result_path = result_path + '\\' + result_path = result_path + p_path + ## add separator between UNC and non-absolute path + if (result_path and result_path[0] not in '\\/' and + result_drive and result_drive[-1:] != ':'): + return result_drive + sep + result_path + return result_drive + result_path + + +# Split a path in a drive specification (a drive letter followed by a +# colon) and the path specification. +# It is always true that drivespec + pathspec == p +def splitdrive(p): + """Split a pathname into drive/UNC sharepoint and relative path specifiers. + Returns a 2-tuple (drive_or_unc, path); either part may be empty. + + If you assign + result = splitdrive(p) + It is always true that: + result[0] + result[1] == p + + If the path contained a drive letter, drive_or_unc will contain everything + up to and including the colon. e.g. splitdrive("c:/dir") returns ("c:", "/dir") + + If the path contained a UNC path, the drive_or_unc will contain the host name + and share up to but not including the fourth directory separator character. + e.g. splitdrive("//host/computer/dir") returns ("//host/computer", "/dir") + + Paths cannot contain both a drive letter and a UNC path. + + """ + if len(p) > 1: + normp = p.replace(altsep, sep) + if (normp[0:2] == sep*2) and (normp[2:3] != sep): + # is a UNC path: + # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path + # \\machine\mountpoint\directory\etc\... + # directory ^^^^^^^^^^^^^^^ + index = normp.find(sep, 2) + if index == -1: + return '', p + index2 = normp.find(sep, index + 1) + # a UNC path can't have two slashes in a row + # (after the initial two) + if index2 == index + 1: + return '', p + if index2 == -1: + index2 = len(p) + return p[:index2], p[index2:] + if normp[1] == ':': + return p[:2], p[2:] + return '', p + +# Parse UNC paths +def splitunc(p): + """Split a pathname into UNC mount point and relative path specifiers. + + Return a 2-tuple (unc, rest); either part may be empty. + If unc is not empty, it has the form '//host/mount' (or similar + using backslashes). unc+rest is always the input path. + Paths containing drive letters never have a UNC part. + """ + if p[1:2] == ':': + return '', p # Drive letter present + firstTwo = p[0:2] + if firstTwo == '//' or firstTwo == '\\\\': + # is a UNC path: + # vvvvvvvvvvvvvvvvvvvv equivalent to drive letter + # \\machine\mountpoint\directories... + # directory ^^^^^^^^^^^^^^^ + normp = p.replace('\\', '/') + index = normp.find('/', 2) + if index <= 2: + return '', p + index2 = normp.find('/', index + 1) + # a UNC path can't have two slashes in a row + # (after the initial two) + if index2 == index + 1: + return '', p + if index2 == -1: + index2 = len(p) + return p[:index2], p[index2:] + return '', p + + +# Split a path in head (everything up to the last '/') and tail (the +# rest). After the trailing '/' is stripped, the invariant +# join(head, tail) == p holds. +# The resulting head won't end in '/' unless it is the root. + +def split(p): + """Split a pathname. + + Return tuple (head, tail) where tail is everything after the final slash. + Either part may be empty.""" + + d, p = splitdrive(p) + # set i to index beyond p's last slash + i = len(p) + while i and p[i-1] not in '/\\': + i = i - 1 + head, tail = p[:i], p[i:] # now tail has no slashes + # remove trailing slashes from head, unless it's all slashes + head2 = head + while head2 and head2[-1] in '/\\': + head2 = head2[:-1] + head = head2 or head + return d + head, tail + + +# Split a path in root and extension. +# The extension is everything starting at the last dot in the last +# pathname component; the root is everything before that. +# It is always true that root + ext == p. + +def splitext(p): + return genericpath._splitext(p, sep, altsep, extsep) +splitext.__doc__ = genericpath._splitext.__doc__ + + +# Return the tail (basename) part of a path. + +def basename(p): + """Returns the final component of a pathname""" + return split(p)[1] + + +# Return the head (dirname) part of a path. + +def dirname(p): + """Returns the directory component of a pathname""" + return split(p)[0] + +# Is a path a symbolic link? +# This will always return false on systems where posix.lstat doesn't exist. + +def islink(path): + """Test for symbolic link. + On WindowsNT/95 and OS/2 always returns false + """ + return False + +# alias exists to lexists +lexists = exists + +# Is a path a mount point? Either a root (with or without drive letter) +# or a UNC path with at most a / or \ after the mount point. + +def ismount(path): + """Test whether a path is a mount point (defined as root of drive)""" + unc, rest = splitunc(path) + if unc: + return rest in ("", "/", "\\") + p = splitdrive(path)[1] + return len(p) == 1 and p[0] in '/\\' + + +# Directory tree walk. +# For each directory under top (including top itself, but excluding +# '.' and '..'), func(arg, dirname, filenames) is called, where +# dirname is the name of the directory and filenames is the list +# of files (and subdirectories etc.) in the directory. +# The func may modify the filenames list, to implement a filter, +# or to impose a different order of visiting. + +def walk(top, func, arg): + """Directory tree walk with callback function. + + For each directory in the directory tree rooted at top (including top + itself, but excluding '.' and '..'), call func(arg, dirname, fnames). + dirname is the name of the directory, and fnames a list of the names of + the files and subdirectories in dirname (excluding '.' and '..'). func + may modify the fnames list in-place (e.g. via del or slice assignment), + and walk will only recurse into the subdirectories whose names remain in + fnames; this can be used to implement a filter, or to impose a specific + order of visiting. No semantics are defined for, or required of, arg, + beyond that arg is always passed to func. It can be used, e.g., to pass + a filename pattern, or a mutable object designed to accumulate + statistics. Passing None for arg is common.""" + warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.", + stacklevel=2) + try: + names = os.listdir(top) + except os.error: + return + func(arg, top, names) + for name in names: + name = join(top, name) + if isdir(name): + walk(name, func, arg) + + +# Expand paths beginning with '~' or '~user'. +# '~' means $HOME; '~user' means that user's home directory. +# If the path doesn't begin with '~', or if the user or $HOME is unknown, +# the path is returned unchanged (leaving error reporting to whatever +# function is called with the expanded path as argument). +# See also module 'glob' for expansion of *, ? and [...] in pathnames. +# (A function should also be defined to do full *sh-style environment +# variable expansion.) + +def expanduser(path): + """Expand ~ and ~user constructs. + + If user or $HOME is unknown, do nothing.""" + if path[:1] != '~': + return path + i, n = 1, len(path) + while i < n and path[i] not in '/\\': + i = i + 1 + + if 'HOME' in os.environ: + userhome = os.environ['HOME'] + elif 'USERPROFILE' in os.environ: + userhome = os.environ['USERPROFILE'] + elif not 'HOMEPATH' in os.environ: + return path + else: + try: + drive = os.environ['HOMEDRIVE'] + except KeyError: + drive = '' + userhome = join(drive, os.environ['HOMEPATH']) + + if i != 1: #~user + userhome = join(dirname(userhome), path[1:i]) + + return userhome + path[i:] + + +# Expand paths containing shell variable substitutions. +# The following rules apply: +# - no expansion within single quotes +# - '$$' is translated into '$' +# - '%%' is translated into '%' if '%%' are not seen in %var1%%var2% +# - ${varname} is accepted. +# - $varname is accepted. +# - %varname% is accepted. +# - varnames can be made out of letters, digits and the characters '_-' +# (though is not verified in the ${varname} and %varname% cases) +# XXX With COMMAND.COM you can use any characters in a variable name, +# XXX except '^|<>='. + +def expandvars(path): + """Expand shell variables of the forms $var, ${var} and %var%. + + Unknown variables are left unchanged.""" + if '$' not in path and '%' not in path: + return path + import string + varchars = string.ascii_letters + string.digits + '_-' + if isinstance(path, _unicode): + encoding = sys.getfilesystemencoding() + def getenv(var): + return os.environ[var.encode(encoding)].decode(encoding) + else: + def getenv(var): + return os.environ[var] + res = '' + index = 0 + pathlen = len(path) + while index < pathlen: + c = path[index] + if c == '\'': # no expansion within single quotes + path = path[index + 1:] + pathlen = len(path) + try: + index = path.index('\'') + res = res + '\'' + path[:index + 1] + except ValueError: + res = res + c + path + index = pathlen - 1 + elif c == '%': # variable or '%' + if path[index + 1:index + 2] == '%': + res = res + c + index = index + 1 + else: + path = path[index+1:] + pathlen = len(path) + try: + index = path.index('%') + except ValueError: + res = res + '%' + path + index = pathlen - 1 + else: + var = path[:index] + try: + res = res + getenv(var) + except KeyError: + res = res + '%' + var + '%' + elif c == '$': # variable or '$$' + if path[index + 1:index + 2] == '$': + res = res + c + index = index + 1 + elif path[index + 1:index + 2] == '{': + path = path[index+2:] + pathlen = len(path) + try: + index = path.index('}') + var = path[:index] + try: + res = res + getenv(var) + except KeyError: + res = res + '${' + var + '}' + except ValueError: + res = res + '${' + path + index = pathlen - 1 + else: + var = '' + index = index + 1 + c = path[index:index + 1] + while c != '' and c in varchars: + var = var + c + index = index + 1 + c = path[index:index + 1] + try: + res = res + getenv(var) + except KeyError: + res = res + '$' + var + if c != '': + index = index - 1 + else: + res = res + c + index = index + 1 + return res + + +# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B. +# Previously, this function also truncated pathnames to 8+3 format, +# but as this module is called "ntpath", that's obviously wrong! + +def normpath(path): + """Normalize path, eliminating double slashes, etc.""" + # Preserve unicode (if path is unicode) + backslash, dot = (u'\\', u'.') if isinstance(path, _unicode) else ('\\', '.') + if path.startswith(('\\\\.\\', '\\\\?\\')): + # in the case of paths with these prefixes: + # \\.\ -> device names + # \\?\ -> literal paths + # do not do any normalization, but return the path unchanged + return path + path = path.replace("/", "\\") + prefix, path = splitdrive(path) + # We need to be careful here. If the prefix is empty, and the path starts + # with a backslash, it could either be an absolute path on the current + # drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It + # is therefore imperative NOT to collapse multiple backslashes blindly in + # that case. + # The code below preserves multiple backslashes when there is no drive + # letter. This means that the invalid filename \\\a\b is preserved + # unchanged, where a\\\b is normalised to a\b. It's not clear that there + # is any better behaviour for such edge cases. + if prefix == '': + # No drive letter - preserve initial backslashes + while path[:1] == "\\": + prefix = prefix + backslash + path = path[1:] + else: + # We have a drive letter - collapse initial backslashes + if path.startswith("\\"): + prefix = prefix + backslash + path = path.lstrip("\\") + comps = path.split("\\") + i = 0 + while i < len(comps): + if comps[i] in ('.', ''): + del comps[i] + elif comps[i] == '..': + if i > 0 and comps[i-1] != '..': + del comps[i-1:i+1] + i -= 1 + elif i == 0 and prefix.endswith("\\"): + del comps[i] + else: + i += 1 + else: + i += 1 + # If the path is now empty, substitute '.' + if not prefix and not comps: + comps.append(dot) + return prefix + backslash.join(comps) + + +# Return an absolute path. +try: + from nt import _getfullpathname + +except ImportError: # not running on Windows - mock up something sensible + def abspath(path): + """Return the absolute version of a path.""" + if not isabs(path): + if isinstance(path, _unicode): + cwd = os.getcwdu() + else: + cwd = os.getcwd() + path = join(cwd, path) + return normpath(path) + +else: # use native Windows method on Windows + def abspath(path): + """Return the absolute version of a path.""" + + if path: # Empty path must return current working directory. + try: + path = _getfullpathname(path) + except WindowsError: + pass # Bad path - return unchanged. + elif isinstance(path, _unicode): + path = os.getcwdu() + else: + path = os.getcwd() + return normpath(path) + +# realpath is a no-op on systems without islink support +realpath = abspath +# Win9x family and earlier have no Unicode filename support. +supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and + sys.getwindowsversion()[3] >= 2) + +def _abspath_split(path): + abs = abspath(normpath(path)) + prefix, rest = splitunc(abs) + is_unc = bool(prefix) + if not is_unc: + prefix, rest = splitdrive(abs) + return is_unc, prefix, [x for x in rest.split(sep) if x] + +def relpath(path, start=curdir): + """Return a relative version of a path""" + + if not path: + raise ValueError("no path specified") + + start_is_unc, start_prefix, start_list = _abspath_split(start) + path_is_unc, path_prefix, path_list = _abspath_split(path) + + if path_is_unc ^ start_is_unc: + raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)" + % (path, start)) + if path_prefix.lower() != start_prefix.lower(): + if path_is_unc: + raise ValueError("path is on UNC root %s, start on UNC root %s" + % (path_prefix, start_prefix)) + else: + raise ValueError("path is on drive %s, start on drive %s" + % (path_prefix, start_prefix)) + # Work out how much of the filepath is shared by start and path. + i = 0 + for e1, e2 in zip(start_list, path_list): + if e1.lower() != e2.lower(): + break + i += 1 + + rel_list = [pardir] * (len(start_list)-i) + path_list[i:] + if not rel_list: + return curdir + return join(*rel_list) + +try: + # The genericpath.isdir implementation uses os.stat and checks the mode + # attribute to tell whether or not the path is a directory. + # This is overkill on Windows - just pass the path to GetFileAttributes + # and check the attribute from there. + from nt import _isdir as isdir +except ImportError: + # Use genericpath.isdir as imported above. + pass
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/orig-prefix.txt Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,1 @@ +/usr \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/os.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,742 @@ +r"""OS routines for NT or Posix depending on what system we're on. + +This exports: + - all functions from posix, nt, os2, or ce, e.g. unlink, stat, etc. + - os.path is one of the modules posixpath, or ntpath + - os.name is 'posix', 'nt', 'os2', 'ce' or 'riscos' + - os.curdir is a string representing the current directory ('.' or ':') + - os.pardir is a string representing the parent directory ('..' or '::') + - os.sep is the (or a most common) pathname separator ('/' or ':' or '\\') + - os.extsep is the extension separator ('.' or '/') + - os.altsep is the alternate pathname separator (None or '/') + - os.pathsep is the component separator used in $PATH etc + - os.linesep is the line separator in text files ('\r' or '\n' or '\r\n') + - os.defpath is the default search path for executables + - os.devnull is the file path of the null device ('/dev/null', etc.) + +Programs that import and use 'os' stand a better chance of being +portable between different platforms. Of course, they must then +only use functions that are defined by all platforms (e.g., unlink +and opendir), and leave all pathname manipulation to os.path +(e.g., split and join). +""" + +#' + +import sys, errno + +_names = sys.builtin_module_names + +# Note: more names are added to __all__ later. +__all__ = ["altsep", "curdir", "pardir", "sep", "extsep", "pathsep", "linesep", + "defpath", "name", "path", "devnull", + "SEEK_SET", "SEEK_CUR", "SEEK_END"] + +def _get_exports_list(module): + try: + return list(module.__all__) + except AttributeError: + return [n for n in dir(module) if n[0] != '_'] + +if 'posix' in _names: + name = 'posix' + linesep = '\n' + from posix import * + try: + from posix import _exit + except ImportError: + pass + import posixpath as path + + import posix + __all__.extend(_get_exports_list(posix)) + del posix + +elif 'nt' in _names: + name = 'nt' + linesep = '\r\n' + from nt import * + try: + from nt import _exit + except ImportError: + pass + import ntpath as path + + import nt + __all__.extend(_get_exports_list(nt)) + del nt + +elif 'os2' in _names: + name = 'os2' + linesep = '\r\n' + from os2 import * + try: + from os2 import _exit + except ImportError: + pass + if sys.version.find('EMX GCC') == -1: + import ntpath as path + else: + import os2emxpath as path + from _emx_link import link + + import os2 + __all__.extend(_get_exports_list(os2)) + del os2 + +elif 'ce' in _names: + name = 'ce' + linesep = '\r\n' + from ce import * + try: + from ce import _exit + except ImportError: + pass + # We can use the standard Windows path. + import ntpath as path + + import ce + __all__.extend(_get_exports_list(ce)) + del ce + +elif 'riscos' in _names: + name = 'riscos' + linesep = '\n' + from riscos import * + try: + from riscos import _exit + except ImportError: + pass + import riscospath as path + + import riscos + __all__.extend(_get_exports_list(riscos)) + del riscos + +else: + raise ImportError, 'no os specific module found' + +sys.modules['os.path'] = path +from os.path import (curdir, pardir, sep, pathsep, defpath, extsep, altsep, + devnull) + +del _names + +# Python uses fixed values for the SEEK_ constants; they are mapped +# to native constants if necessary in posixmodule.c +SEEK_SET = 0 +SEEK_CUR = 1 +SEEK_END = 2 + +#' + +# Super directory utilities. +# (Inspired by Eric Raymond; the doc strings are mostly his) + +def makedirs(name, mode=0777): + """makedirs(path [, mode=0777]) + + Super-mkdir; create a leaf directory and all intermediate ones. + Works like mkdir, except that any intermediate path segment (not + just the rightmost) will be created if it does not exist. This is + recursive. + + """ + head, tail = path.split(name) + if not tail: + head, tail = path.split(head) + if head and tail and not path.exists(head): + try: + makedirs(head, mode) + except OSError, e: + # be happy if someone already created the path + if e.errno != errno.EEXIST: + raise + if tail == curdir: # xxx/newdir/. exists if xxx/newdir exists + return + mkdir(name, mode) + +def removedirs(name): + """removedirs(path) + + Super-rmdir; remove a leaf directory and all empty intermediate + ones. Works like rmdir except that, if the leaf directory is + successfully removed, directories corresponding to rightmost path + segments will be pruned away until either the whole path is + consumed or an error occurs. Errors during this latter phase are + ignored -- they generally mean that a directory was not empty. + + """ + rmdir(name) + head, tail = path.split(name) + if not tail: + head, tail = path.split(head) + while head and tail: + try: + rmdir(head) + except error: + break + head, tail = path.split(head) + +def renames(old, new): + """renames(old, new) + + Super-rename; create directories as necessary and delete any left + empty. Works like rename, except creation of any intermediate + directories needed to make the new pathname good is attempted + first. After the rename, directories corresponding to rightmost + path segments of the old name will be pruned until either the + whole path is consumed or a nonempty directory is found. + + Note: this function can fail with the new directory structure made + if you lack permissions needed to unlink the leaf directory or + file. + + """ + head, tail = path.split(new) + if head and tail and not path.exists(head): + makedirs(head) + rename(old, new) + head, tail = path.split(old) + if head and tail: + try: + removedirs(head) + except error: + pass + +__all__.extend(["makedirs", "removedirs", "renames"]) + +def walk(top, topdown=True, onerror=None, followlinks=False): + """Directory tree generator. + + For each directory in the directory tree rooted at top (including top + itself, but excluding '.' and '..'), yields a 3-tuple + + dirpath, dirnames, filenames + + dirpath is a string, the path to the directory. dirnames is a list of + the names of the subdirectories in dirpath (excluding '.' and '..'). + filenames is a list of the names of the non-directory files in dirpath. + Note that the names in the lists are just names, with no path components. + To get a full path (which begins with top) to a file or directory in + dirpath, do os.path.join(dirpath, name). + + If optional arg 'topdown' is true or not specified, the triple for a + directory is generated before the triples for any of its subdirectories + (directories are generated top down). If topdown is false, the triple + for a directory is generated after the triples for all of its + subdirectories (directories are generated bottom up). + + When topdown is true, the caller can modify the dirnames list in-place + (e.g., via del or slice assignment), and walk will only recurse into the + subdirectories whose names remain in dirnames; this can be used to prune the + search, or to impose a specific order of visiting. Modifying dirnames when + topdown is false is ineffective, since the directories in dirnames have + already been generated by the time dirnames itself is generated. No matter + the value of topdown, the list of subdirectories is retrieved before the + tuples for the directory and its subdirectories are generated. + + By default errors from the os.listdir() call are ignored. If + optional arg 'onerror' is specified, it should be a function; it + will be called with one argument, an os.error instance. It can + report the error to continue with the walk, or raise the exception + to abort the walk. Note that the filename is available as the + filename attribute of the exception object. + + By default, os.walk does not follow symbolic links to subdirectories on + systems that support them. In order to get this functionality, set the + optional argument 'followlinks' to true. + + Caution: if you pass a relative pathname for top, don't change the + current working directory between resumptions of walk. walk never + changes the current directory, and assumes that the client doesn't + either. + + Example: + + import os + from os.path import join, getsize + for root, dirs, files in os.walk('python/Lib/email'): + print root, "consumes", + print sum([getsize(join(root, name)) for name in files]), + print "bytes in", len(files), "non-directory files" + if 'CVS' in dirs: + dirs.remove('CVS') # don't visit CVS directories + + """ + + islink, join, isdir = path.islink, path.join, path.isdir + + # We may not have read permission for top, in which case we can't + # get a list of the files the directory contains. os.path.walk + # always suppressed the exception then, rather than blow up for a + # minor reason when (say) a thousand readable directories are still + # left to visit. That logic is copied here. + try: + # Note that listdir and error are globals in this module due + # to earlier import-*. + names = listdir(top) + except error, err: + if onerror is not None: + onerror(err) + return + + dirs, nondirs = [], [] + for name in names: + if isdir(join(top, name)): + dirs.append(name) + else: + nondirs.append(name) + + if topdown: + yield top, dirs, nondirs + for name in dirs: + new_path = join(top, name) + if followlinks or not islink(new_path): + for x in walk(new_path, topdown, onerror, followlinks): + yield x + if not topdown: + yield top, dirs, nondirs + +__all__.append("walk") + +# Make sure os.environ exists, at least +try: + environ +except NameError: + environ = {} + +def execl(file, *args): + """execl(file, *args) + + Execute the executable file with argument list args, replacing the + current process. """ + execv(file, args) + +def execle(file, *args): + """execle(file, *args, env) + + Execute the executable file with argument list args and + environment env, replacing the current process. """ + env = args[-1] + execve(file, args[:-1], env) + +def execlp(file, *args): + """execlp(file, *args) + + Execute the executable file (which is searched for along $PATH) + with argument list args, replacing the current process. """ + execvp(file, args) + +def execlpe(file, *args): + """execlpe(file, *args, env) + + Execute the executable file (which is searched for along $PATH) + with argument list args and environment env, replacing the current + process. """ + env = args[-1] + execvpe(file, args[:-1], env) + +def execvp(file, args): + """execvp(file, args) + + Execute the executable file (which is searched for along $PATH) + with argument list args, replacing the current process. + args may be a list or tuple of strings. """ + _execvpe(file, args) + +def execvpe(file, args, env): + """execvpe(file, args, env) + + Execute the executable file (which is searched for along $PATH) + with argument list args and environment env , replacing the + current process. + args may be a list or tuple of strings. """ + _execvpe(file, args, env) + +__all__.extend(["execl","execle","execlp","execlpe","execvp","execvpe"]) + +def _execvpe(file, args, env=None): + if env is not None: + func = execve + argrest = (args, env) + else: + func = execv + argrest = (args,) + env = environ + + head, tail = path.split(file) + if head: + func(file, *argrest) + return + if 'PATH' in env: + envpath = env['PATH'] + else: + envpath = defpath + PATH = envpath.split(pathsep) + saved_exc = None + saved_tb = None + for dir in PATH: + fullname = path.join(dir, file) + try: + func(fullname, *argrest) + except error, e: + tb = sys.exc_info()[2] + if (e.errno != errno.ENOENT and e.errno != errno.ENOTDIR + and saved_exc is None): + saved_exc = e + saved_tb = tb + if saved_exc: + raise error, saved_exc, saved_tb + raise error, e, tb + +# Change environ to automatically call putenv() if it exists +try: + # This will fail if there's no putenv + putenv +except NameError: + pass +else: + import UserDict + + # Fake unsetenv() for Windows + # not sure about os2 here but + # I'm guessing they are the same. + + if name in ('os2', 'nt'): + def unsetenv(key): + putenv(key, "") + + if name == "riscos": + # On RISC OS, all env access goes through getenv and putenv + from riscosenviron import _Environ + elif name in ('os2', 'nt'): # Where Env Var Names Must Be UPPERCASE + # But we store them as upper case + class _Environ(UserDict.IterableUserDict): + def __init__(self, environ): + UserDict.UserDict.__init__(self) + data = self.data + for k, v in environ.items(): + data[k.upper()] = v + def __setitem__(self, key, item): + putenv(key, item) + self.data[key.upper()] = item + def __getitem__(self, key): + return self.data[key.upper()] + try: + unsetenv + except NameError: + def __delitem__(self, key): + del self.data[key.upper()] + else: + def __delitem__(self, key): + unsetenv(key) + del self.data[key.upper()] + def clear(self): + for key in self.data.keys(): + unsetenv(key) + del self.data[key] + def pop(self, key, *args): + unsetenv(key) + return self.data.pop(key.upper(), *args) + def has_key(self, key): + return key.upper() in self.data + def __contains__(self, key): + return key.upper() in self.data + def get(self, key, failobj=None): + return self.data.get(key.upper(), failobj) + def update(self, dict=None, **kwargs): + if dict: + try: + keys = dict.keys() + except AttributeError: + # List of (key, value) + for k, v in dict: + self[k] = v + else: + # got keys + # cannot use items(), since mappings + # may not have them. + for k in keys: + self[k] = dict[k] + if kwargs: + self.update(kwargs) + def copy(self): + return dict(self) + + else: # Where Env Var Names Can Be Mixed Case + class _Environ(UserDict.IterableUserDict): + def __init__(self, environ): + UserDict.UserDict.__init__(self) + self.data = environ + def __setitem__(self, key, item): + putenv(key, item) + self.data[key] = item + def update(self, dict=None, **kwargs): + if dict: + try: + keys = dict.keys() + except AttributeError: + # List of (key, value) + for k, v in dict: + self[k] = v + else: + # got keys + # cannot use items(), since mappings + # may not have them. + for k in keys: + self[k] = dict[k] + if kwargs: + self.update(kwargs) + try: + unsetenv + except NameError: + pass + else: + def __delitem__(self, key): + unsetenv(key) + del self.data[key] + def clear(self): + for key in self.data.keys(): + unsetenv(key) + del self.data[key] + def pop(self, key, *args): + unsetenv(key) + return self.data.pop(key, *args) + def copy(self): + return dict(self) + + + environ = _Environ(environ) + +def getenv(key, default=None): + """Get an environment variable, return None if it doesn't exist. + The optional second argument can specify an alternate default.""" + return environ.get(key, default) +__all__.append("getenv") + +def _exists(name): + return name in globals() + +# Supply spawn*() (probably only for Unix) +if _exists("fork") and not _exists("spawnv") and _exists("execv"): + + P_WAIT = 0 + P_NOWAIT = P_NOWAITO = 1 + + # XXX Should we support P_DETACH? I suppose it could fork()**2 + # and close the std I/O streams. Also, P_OVERLAY is the same + # as execv*()? + + def _spawnvef(mode, file, args, env, func): + # Internal helper; func is the exec*() function to use + pid = fork() + if not pid: + # Child + try: + if env is None: + func(file, args) + else: + func(file, args, env) + except: + _exit(127) + else: + # Parent + if mode == P_NOWAIT: + return pid # Caller is responsible for waiting! + while 1: + wpid, sts = waitpid(pid, 0) + if WIFSTOPPED(sts): + continue + elif WIFSIGNALED(sts): + return -WTERMSIG(sts) + elif WIFEXITED(sts): + return WEXITSTATUS(sts) + else: + raise error, "Not stopped, signaled or exited???" + + def spawnv(mode, file, args): + """spawnv(mode, file, args) -> integer + +Execute file with arguments from args in a subprocess. +If mode == P_NOWAIT return the pid of the process. +If mode == P_WAIT return the process's exit code if it exits normally; +otherwise return -SIG, where SIG is the signal that killed it. """ + return _spawnvef(mode, file, args, None, execv) + + def spawnve(mode, file, args, env): + """spawnve(mode, file, args, env) -> integer + +Execute file with arguments from args in a subprocess with the +specified environment. +If mode == P_NOWAIT return the pid of the process. +If mode == P_WAIT return the process's exit code if it exits normally; +otherwise return -SIG, where SIG is the signal that killed it. """ + return _spawnvef(mode, file, args, env, execve) + + # Note: spawnvp[e] is't currently supported on Windows + + def spawnvp(mode, file, args): + """spawnvp(mode, file, args) -> integer + +Execute file (which is looked for along $PATH) with arguments from +args in a subprocess. +If mode == P_NOWAIT return the pid of the process. +If mode == P_WAIT return the process's exit code if it exits normally; +otherwise return -SIG, where SIG is the signal that killed it. """ + return _spawnvef(mode, file, args, None, execvp) + + def spawnvpe(mode, file, args, env): + """spawnvpe(mode, file, args, env) -> integer + +Execute file (which is looked for along $PATH) with arguments from +args in a subprocess with the supplied environment. +If mode == P_NOWAIT return the pid of the process. +If mode == P_WAIT return the process's exit code if it exits normally; +otherwise return -SIG, where SIG is the signal that killed it. """ + return _spawnvef(mode, file, args, env, execvpe) + +if _exists("spawnv"): + # These aren't supplied by the basic Windows code + # but can be easily implemented in Python + + def spawnl(mode, file, *args): + """spawnl(mode, file, *args) -> integer + +Execute file with arguments from args in a subprocess. +If mode == P_NOWAIT return the pid of the process. +If mode == P_WAIT return the process's exit code if it exits normally; +otherwise return -SIG, where SIG is the signal that killed it. """ + return spawnv(mode, file, args) + + def spawnle(mode, file, *args): + """spawnle(mode, file, *args, env) -> integer + +Execute file with arguments from args in a subprocess with the +supplied environment. +If mode == P_NOWAIT return the pid of the process. +If mode == P_WAIT return the process's exit code if it exits normally; +otherwise return -SIG, where SIG is the signal that killed it. """ + env = args[-1] + return spawnve(mode, file, args[:-1], env) + + + __all__.extend(["spawnv", "spawnve", "spawnl", "spawnle",]) + + +if _exists("spawnvp"): + # At the moment, Windows doesn't implement spawnvp[e], + # so it won't have spawnlp[e] either. + def spawnlp(mode, file, *args): + """spawnlp(mode, file, *args) -> integer + +Execute file (which is looked for along $PATH) with arguments from +args in a subprocess with the supplied environment. +If mode == P_NOWAIT return the pid of the process. +If mode == P_WAIT return the process's exit code if it exits normally; +otherwise return -SIG, where SIG is the signal that killed it. """ + return spawnvp(mode, file, args) + + def spawnlpe(mode, file, *args): + """spawnlpe(mode, file, *args, env) -> integer + +Execute file (which is looked for along $PATH) with arguments from +args in a subprocess with the supplied environment. +If mode == P_NOWAIT return the pid of the process. +If mode == P_WAIT return the process's exit code if it exits normally; +otherwise return -SIG, where SIG is the signal that killed it. """ + env = args[-1] + return spawnvpe(mode, file, args[:-1], env) + + + __all__.extend(["spawnvp", "spawnvpe", "spawnlp", "spawnlpe",]) + + +# Supply popen2 etc. (for Unix) +if _exists("fork"): + if not _exists("popen2"): + def popen2(cmd, mode="t", bufsize=-1): + """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' + may be a sequence, in which case arguments will be passed directly to + the program without shell intervention (as with os.spawnv()). If 'cmd' + is a string it will be passed to the shell (as with os.system()). If + 'bufsize' is specified, it sets the buffer size for the I/O pipes. The + file objects (child_stdin, child_stdout) are returned.""" + import warnings + msg = "os.popen2 is deprecated. Use the subprocess module." + warnings.warn(msg, DeprecationWarning, stacklevel=2) + + import subprocess + PIPE = subprocess.PIPE + p = subprocess.Popen(cmd, shell=isinstance(cmd, basestring), + bufsize=bufsize, stdin=PIPE, stdout=PIPE, + close_fds=True) + return p.stdin, p.stdout + __all__.append("popen2") + + if not _exists("popen3"): + def popen3(cmd, mode="t", bufsize=-1): + """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' + may be a sequence, in which case arguments will be passed directly to + the program without shell intervention (as with os.spawnv()). If 'cmd' + is a string it will be passed to the shell (as with os.system()). If + 'bufsize' is specified, it sets the buffer size for the I/O pipes. The + file objects (child_stdin, child_stdout, child_stderr) are returned.""" + import warnings + msg = "os.popen3 is deprecated. Use the subprocess module." + warnings.warn(msg, DeprecationWarning, stacklevel=2) + + import subprocess + PIPE = subprocess.PIPE + p = subprocess.Popen(cmd, shell=isinstance(cmd, basestring), + bufsize=bufsize, stdin=PIPE, stdout=PIPE, + stderr=PIPE, close_fds=True) + return p.stdin, p.stdout, p.stderr + __all__.append("popen3") + + if not _exists("popen4"): + def popen4(cmd, mode="t", bufsize=-1): + """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' + may be a sequence, in which case arguments will be passed directly to + the program without shell intervention (as with os.spawnv()). If 'cmd' + is a string it will be passed to the shell (as with os.system()). If + 'bufsize' is specified, it sets the buffer size for the I/O pipes. The + file objects (child_stdin, child_stdout_stderr) are returned.""" + import warnings + msg = "os.popen4 is deprecated. Use the subprocess module." + warnings.warn(msg, DeprecationWarning, stacklevel=2) + + import subprocess + PIPE = subprocess.PIPE + p = subprocess.Popen(cmd, shell=isinstance(cmd, basestring), + bufsize=bufsize, stdin=PIPE, stdout=PIPE, + stderr=subprocess.STDOUT, close_fds=True) + return p.stdin, p.stdout + __all__.append("popen4") + +import copy_reg as _copy_reg + +def _make_stat_result(tup, dict): + return stat_result(tup, dict) + +def _pickle_stat_result(sr): + (type, args) = sr.__reduce__() + return (_make_stat_result, args) + +try: + _copy_reg.pickle(stat_result, _pickle_stat_result, _make_stat_result) +except NameError: # stat_result may not exist + pass + +def _make_statvfs_result(tup, dict): + return statvfs_result(tup, dict) + +def _pickle_statvfs_result(sr): + (type, args) = sr.__reduce__() + return (_make_statvfs_result, args) + +try: + _copy_reg.pickle(statvfs_result, _pickle_statvfs_result, + _make_statvfs_result) +except NameError: # statvfs_result may not exist + pass
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/posixpath.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,439 @@ +"""Common operations on Posix pathnames. + +Instead of importing this module directly, import os and refer to +this module as os.path. The "os.path" name is an alias for this +module on Posix systems; on other systems (e.g. Mac, Windows), +os.path provides the same operations in a manner specific to that +platform, and is an alias to another module (e.g. macpath, ntpath). + +Some of this can actually be useful on non-Posix systems too, e.g. +for manipulation of the pathname component of URLs. +""" + +import os +import sys +import stat +import genericpath +import warnings +from genericpath import * +from genericpath import _unicode + +__all__ = ["normcase","isabs","join","splitdrive","split","splitext", + "basename","dirname","commonprefix","getsize","getmtime", + "getatime","getctime","islink","exists","lexists","isdir","isfile", + "ismount","walk","expanduser","expandvars","normpath","abspath", + "samefile","sameopenfile","samestat", + "curdir","pardir","sep","pathsep","defpath","altsep","extsep", + "devnull","realpath","supports_unicode_filenames","relpath"] + +# strings representing various path-related bits and pieces +curdir = '.' +pardir = '..' +extsep = '.' +sep = '/' +pathsep = ':' +defpath = ':/bin:/usr/bin' +altsep = None +devnull = '/dev/null' + +# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac. +# On MS-DOS this may also turn slashes into backslashes; however, other +# normalizations (such as optimizing '../' away) are not allowed +# (another function should be defined to do that). + +def normcase(s): + """Normalize case of pathname. Has no effect under Posix""" + return s + + +# Return whether a path is absolute. +# Trivial in Posix, harder on the Mac or MS-DOS. + +def isabs(s): + """Test whether a path is absolute""" + return s.startswith('/') + + +# Join pathnames. +# Ignore the previous parts if a part is absolute. +# Insert a '/' unless the first part is empty or already ends in '/'. + +def join(a, *p): + """Join two or more pathname components, inserting '/' as needed. + If any component is an absolute path, all previous path components + will be discarded. An empty last part will result in a path that + ends with a separator.""" + path = a + for b in p: + if b.startswith('/'): + path = b + elif path == '' or path.endswith('/'): + path += b + else: + path += '/' + b + return path + + +# Split a path in head (everything up to the last '/') and tail (the +# rest). If the path ends in '/', tail will be empty. If there is no +# '/' in the path, head will be empty. +# Trailing '/'es are stripped from head unless it is the root. + +def split(p): + """Split a pathname. Returns tuple "(head, tail)" where "tail" is + everything after the final slash. Either part may be empty.""" + i = p.rfind('/') + 1 + head, tail = p[:i], p[i:] + if head and head != '/'*len(head): + head = head.rstrip('/') + return head, tail + + +# Split a path in root and extension. +# The extension is everything starting at the last dot in the last +# pathname component; the root is everything before that. +# It is always true that root + ext == p. + +def splitext(p): + return genericpath._splitext(p, sep, altsep, extsep) +splitext.__doc__ = genericpath._splitext.__doc__ + +# Split a pathname into a drive specification and the rest of the +# path. Useful on DOS/Windows/NT; on Unix, the drive is always empty. + +def splitdrive(p): + """Split a pathname into drive and path. On Posix, drive is always + empty.""" + return '', p + + +# Return the tail (basename) part of a path, same as split(path)[1]. + +def basename(p): + """Returns the final component of a pathname""" + i = p.rfind('/') + 1 + return p[i:] + + +# Return the head (dirname) part of a path, same as split(path)[0]. + +def dirname(p): + """Returns the directory component of a pathname""" + i = p.rfind('/') + 1 + head = p[:i] + if head and head != '/'*len(head): + head = head.rstrip('/') + return head + + +# Is a path a symbolic link? +# This will always return false on systems where os.lstat doesn't exist. + +def islink(path): + """Test whether a path is a symbolic link""" + try: + st = os.lstat(path) + except (os.error, AttributeError): + return False + return stat.S_ISLNK(st.st_mode) + +# Being true for dangling symbolic links is also useful. + +def lexists(path): + """Test whether a path exists. Returns True for broken symbolic links""" + try: + os.lstat(path) + except os.error: + return False + return True + + +# Are two filenames really pointing to the same file? + +def samefile(f1, f2): + """Test whether two pathnames reference the same actual file""" + s1 = os.stat(f1) + s2 = os.stat(f2) + return samestat(s1, s2) + + +# Are two open files really referencing the same file? +# (Not necessarily the same file descriptor!) + +def sameopenfile(fp1, fp2): + """Test whether two open file objects reference the same file""" + s1 = os.fstat(fp1) + s2 = os.fstat(fp2) + return samestat(s1, s2) + + +# Are two stat buffers (obtained from stat, fstat or lstat) +# describing the same file? + +def samestat(s1, s2): + """Test whether two stat buffers reference the same file""" + return s1.st_ino == s2.st_ino and \ + s1.st_dev == s2.st_dev + + +# Is a path a mount point? +# (Does this work for all UNIXes? Is it even guaranteed to work by Posix?) + +def ismount(path): + """Test whether a path is a mount point""" + if islink(path): + # A symlink can never be a mount point + return False + try: + s1 = os.lstat(path) + s2 = os.lstat(join(path, '..')) + except os.error: + return False # It doesn't exist -- so not a mount point :-) + dev1 = s1.st_dev + dev2 = s2.st_dev + if dev1 != dev2: + return True # path/.. on a different device as path + ino1 = s1.st_ino + ino2 = s2.st_ino + if ino1 == ino2: + return True # path/.. is the same i-node as path + return False + + +# Directory tree walk. +# For each directory under top (including top itself, but excluding +# '.' and '..'), func(arg, dirname, filenames) is called, where +# dirname is the name of the directory and filenames is the list +# of files (and subdirectories etc.) in the directory. +# The func may modify the filenames list, to implement a filter, +# or to impose a different order of visiting. + +def walk(top, func, arg): + """Directory tree walk with callback function. + + For each directory in the directory tree rooted at top (including top + itself, but excluding '.' and '..'), call func(arg, dirname, fnames). + dirname is the name of the directory, and fnames a list of the names of + the files and subdirectories in dirname (excluding '.' and '..'). func + may modify the fnames list in-place (e.g. via del or slice assignment), + and walk will only recurse into the subdirectories whose names remain in + fnames; this can be used to implement a filter, or to impose a specific + order of visiting. No semantics are defined for, or required of, arg, + beyond that arg is always passed to func. It can be used, e.g., to pass + a filename pattern, or a mutable object designed to accumulate + statistics. Passing None for arg is common.""" + warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.", + stacklevel=2) + try: + names = os.listdir(top) + except os.error: + return + func(arg, top, names) + for name in names: + name = join(top, name) + try: + st = os.lstat(name) + except os.error: + continue + if stat.S_ISDIR(st.st_mode): + walk(name, func, arg) + + +# Expand paths beginning with '~' or '~user'. +# '~' means $HOME; '~user' means that user's home directory. +# If the path doesn't begin with '~', or if the user or $HOME is unknown, +# the path is returned unchanged (leaving error reporting to whatever +# function is called with the expanded path as argument). +# See also module 'glob' for expansion of *, ? and [...] in pathnames. +# (A function should also be defined to do full *sh-style environment +# variable expansion.) + +def expanduser(path): + """Expand ~ and ~user constructions. If user or $HOME is unknown, + do nothing.""" + if not path.startswith('~'): + return path + i = path.find('/', 1) + if i < 0: + i = len(path) + if i == 1: + if 'HOME' not in os.environ: + import pwd + userhome = pwd.getpwuid(os.getuid()).pw_dir + else: + userhome = os.environ['HOME'] + else: + import pwd + try: + pwent = pwd.getpwnam(path[1:i]) + except KeyError: + return path + userhome = pwent.pw_dir + userhome = userhome.rstrip('/') + return (userhome + path[i:]) or '/' + + +# Expand paths containing shell variable substitutions. +# This expands the forms $variable and ${variable} only. +# Non-existent variables are left unchanged. + +_varprog = None +_uvarprog = None + +def expandvars(path): + """Expand shell variables of form $var and ${var}. Unknown variables + are left unchanged.""" + global _varprog, _uvarprog + if '$' not in path: + return path + if isinstance(path, _unicode): + if not _uvarprog: + import re + _uvarprog = re.compile(ur'\$(\w+|\{[^}]*\})', re.UNICODE) + varprog = _uvarprog + encoding = sys.getfilesystemencoding() + else: + if not _varprog: + import re + _varprog = re.compile(r'\$(\w+|\{[^}]*\})') + varprog = _varprog + encoding = None + i = 0 + while True: + m = varprog.search(path, i) + if not m: + break + i, j = m.span(0) + name = m.group(1) + if name.startswith('{') and name.endswith('}'): + name = name[1:-1] + if encoding: + name = name.encode(encoding) + if name in os.environ: + tail = path[j:] + value = os.environ[name] + if encoding: + value = value.decode(encoding) + path = path[:i] + value + i = len(path) + path += tail + else: + i = j + return path + + +# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. +# It should be understood that this may change the meaning of the path +# if it contains symbolic links! + +def normpath(path): + """Normalize path, eliminating double slashes, etc.""" + # Preserve unicode (if path is unicode) + slash, dot = (u'/', u'.') if isinstance(path, _unicode) else ('/', '.') + if path == '': + return dot + initial_slashes = path.startswith('/') + # POSIX allows one or two initial slashes, but treats three or more + # as single slash. + if (initial_slashes and + path.startswith('//') and not path.startswith('///')): + initial_slashes = 2 + comps = path.split('/') + new_comps = [] + for comp in comps: + if comp in ('', '.'): + continue + if (comp != '..' or (not initial_slashes and not new_comps) or + (new_comps and new_comps[-1] == '..')): + new_comps.append(comp) + elif new_comps: + new_comps.pop() + comps = new_comps + path = slash.join(comps) + if initial_slashes: + path = slash*initial_slashes + path + return path or dot + + +def abspath(path): + """Return an absolute path.""" + if not isabs(path): + if isinstance(path, _unicode): + cwd = os.getcwdu() + else: + cwd = os.getcwd() + path = join(cwd, path) + return normpath(path) + + +# Return a canonical path (i.e. the absolute location of a file on the +# filesystem). + +def realpath(filename): + """Return the canonical path of the specified filename, eliminating any +symbolic links encountered in the path.""" + path, ok = _joinrealpath('', filename, {}) + return abspath(path) + +# Join two paths, normalizing and eliminating any symbolic links +# encountered in the second path. +def _joinrealpath(path, rest, seen): + if isabs(rest): + rest = rest[1:] + path = sep + + while rest: + name, _, rest = rest.partition(sep) + if not name or name == curdir: + # current dir + continue + if name == pardir: + # parent dir + if path: + path, name = split(path) + if name == pardir: + path = join(path, pardir, pardir) + else: + path = pardir + continue + newpath = join(path, name) + if not islink(newpath): + path = newpath + continue + # Resolve the symbolic link + if newpath in seen: + # Already seen this path + path = seen[newpath] + if path is not None: + # use cached value + continue + # The symlink is not resolved, so we must have a symlink loop. + # Return already resolved part + rest of the path unchanged. + return join(newpath, rest), False + seen[newpath] = None # not resolved symlink + path, ok = _joinrealpath(path, os.readlink(newpath), seen) + if not ok: + return join(path, rest), False + seen[newpath] = path # resolved symlink + + return path, True + + +supports_unicode_filenames = (sys.platform == 'darwin') + +def relpath(path, start=curdir): + """Return a relative version of a path""" + + if not path: + raise ValueError("no path specified") + + start_list = [x for x in abspath(start).split(sep) if x] + path_list = [x for x in abspath(path).split(sep) if x] + + # Work out how much of the filepath is shared by start and path. + i = len(commonprefix([start_list, path_list])) + + rel_list = [pardir] * (len(start_list)-i) + path_list[i:] + if not rel_list: + return curdir + return join(*rel_list)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/re.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,340 @@ +# +# Secret Labs' Regular Expression Engine +# +# re-compatible interface for the sre matching engine +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# This version of the SRE library can be redistributed under CNRI's +# Python 1.6 license. For any other use, please contact Secret Labs +# AB (info@pythonware.com). +# +# Portions of this engine have been developed in cooperation with +# CNRI. Hewlett-Packard provided funding for 1.6 integration and +# other compatibility work. +# + +r"""Support for regular expressions (RE). + +This module provides regular expression matching operations similar to +those found in Perl. It supports both 8-bit and Unicode strings; both +the pattern and the strings being processed can contain null bytes and +characters outside the US ASCII range. + +Regular expressions can contain both special and ordinary characters. +Most ordinary characters, like "A", "a", or "0", are the simplest +regular expressions; they simply match themselves. You can +concatenate ordinary characters, so last matches the string 'last'. + +The special characters are: + "." Matches any character except a newline. + "^" Matches the start of the string. + "$" Matches the end of the string or just before the newline at + the end of the string. + "*" Matches 0 or more (greedy) repetitions of the preceding RE. + Greedy means that it will match as many repetitions as possible. + "+" Matches 1 or more (greedy) repetitions of the preceding RE. + "?" Matches 0 or 1 (greedy) of the preceding RE. + *?,+?,?? Non-greedy versions of the previous three special characters. + {m,n} Matches from m to n repetitions of the preceding RE. + {m,n}? Non-greedy version of the above. + "\\" Either escapes special characters or signals a special sequence. + [] Indicates a set of characters. + A "^" as the first character indicates a complementing set. + "|" A|B, creates an RE that will match either A or B. + (...) Matches the RE inside the parentheses. + The contents can be retrieved or matched later in the string. + (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below). + (?:...) Non-grouping version of regular parentheses. + (?P<name>...) The substring matched by the group is accessible by name. + (?P=name) Matches the text matched earlier by the group named name. + (?#...) A comment; ignored. + (?=...) Matches if ... matches next, but doesn't consume the string. + (?!...) Matches if ... doesn't match next. + (?<=...) Matches if preceded by ... (must be fixed length). + (?<!...) Matches if not preceded by ... (must be fixed length). + (?(id/name)yes|no) Matches yes pattern if the group with id/name matched, + the (optional) no pattern otherwise. + +The special sequences consist of "\\" and a character from the list +below. If the ordinary character is not on the list, then the +resulting RE will match the second character. + \number Matches the contents of the group of the same number. + \A Matches only at the start of the string. + \Z Matches only at the end of the string. + \b Matches the empty string, but only at the start or end of a word. + \B Matches the empty string, but not at the start or end of a word. + \d Matches any decimal digit; equivalent to the set [0-9]. + \D Matches any non-digit character; equivalent to the set [^0-9]. + \s Matches any whitespace character; equivalent to [ \t\n\r\f\v]. + \S Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v]. + \w Matches any alphanumeric character; equivalent to [a-zA-Z0-9_]. + With LOCALE, it will match the set [0-9_] plus characters defined + as letters for the current locale. + \W Matches the complement of \w. + \\ Matches a literal backslash. + +This module exports the following functions: + match Match a regular expression pattern to the beginning of a string. + search Search a string for the presence of a pattern. + sub Substitute occurrences of a pattern found in a string. + subn Same as sub, but also return the number of substitutions made. + split Split a string by the occurrences of a pattern. + findall Find all occurrences of a pattern in a string. + finditer Return an iterator yielding a match object for each match. + compile Compile a pattern into a RegexObject. + purge Clear the regular expression cache. + escape Backslash all non-alphanumerics in a string. + +Some of the functions in this module takes flags as optional parameters: + I IGNORECASE Perform case-insensitive matching. + L LOCALE Make \w, \W, \b, \B, dependent on the current locale. + M MULTILINE "^" matches the beginning of lines (after a newline) + as well as the string. + "$" matches the end of lines (before a newline) as well + as the end of the string. + S DOTALL "." matches any character at all, including the newline. + X VERBOSE Ignore whitespace and comments for nicer looking RE's. + U UNICODE Make \w, \W, \b, \B, dependent on the Unicode locale. + +This module also defines an exception 'error'. + +""" + +import sys +import sre_compile +import sre_parse +try: + import _locale +except ImportError: + _locale = None + +# public symbols +__all__ = [ "match", "search", "sub", "subn", "split", "findall", + "compile", "purge", "template", "escape", "I", "L", "M", "S", "X", + "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", + "UNICODE", "error" ] + +__version__ = "2.2.1" + +# flags +I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case +L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale +U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale +M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline +S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline +X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments + +# sre extensions (experimental, don't rely on these) +T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking +DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation + +# sre exception +error = sre_compile.error + +# -------------------------------------------------------------------- +# public interface + +def match(pattern, string, flags=0): + """Try to apply the pattern at the start of the string, returning + a match object, or None if no match was found.""" + return _compile(pattern, flags).match(string) + +def search(pattern, string, flags=0): + """Scan through string looking for a match to the pattern, returning + a match object, or None if no match was found.""" + return _compile(pattern, flags).search(string) + +def sub(pattern, repl, string, count=0, flags=0): + """Return the string obtained by replacing the leftmost + non-overlapping occurrences of the pattern in string by the + replacement repl. repl can be either a string or a callable; + if a string, backslash escapes in it are processed. If it is + a callable, it's passed the match object and must return + a replacement string to be used.""" + return _compile(pattern, flags).sub(repl, string, count) + +def subn(pattern, repl, string, count=0, flags=0): + """Return a 2-tuple containing (new_string, number). + new_string is the string obtained by replacing the leftmost + non-overlapping occurrences of the pattern in the source + string by the replacement repl. number is the number of + substitutions that were made. repl can be either a string or a + callable; if a string, backslash escapes in it are processed. + If it is a callable, it's passed the match object and must + return a replacement string to be used.""" + return _compile(pattern, flags).subn(repl, string, count) + +def split(pattern, string, maxsplit=0, flags=0): + """Split the source string by the occurrences of the pattern, + returning a list containing the resulting substrings.""" + return _compile(pattern, flags).split(string, maxsplit) + +def findall(pattern, string, flags=0): + """Return a list of all non-overlapping matches in the string. + + If one or more groups are present in the pattern, return a + list of groups; this will be a list of tuples if the pattern + has more than one group. + + Empty matches are included in the result.""" + return _compile(pattern, flags).findall(string) + +if sys.hexversion >= 0x02020000: + __all__.append("finditer") + def finditer(pattern, string, flags=0): + """Return an iterator over all non-overlapping matches in the + string. For each match, the iterator returns a match object. + + Empty matches are included in the result.""" + return _compile(pattern, flags).finditer(string) + +def compile(pattern, flags=0): + "Compile a regular expression pattern, returning a pattern object." + return _compile(pattern, flags) + +def purge(): + "Clear the regular expression cache" + _cache.clear() + _cache_repl.clear() + +def template(pattern, flags=0): + "Compile a template pattern, returning a pattern object" + return _compile(pattern, flags|T) + +_alphanum = frozenset( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") + +def escape(pattern): + "Escape all non-alphanumeric characters in pattern." + s = list(pattern) + alphanum = _alphanum + for i, c in enumerate(pattern): + if c not in alphanum: + if c == "\000": + s[i] = "\\000" + else: + s[i] = "\\" + c + return pattern[:0].join(s) + +# -------------------------------------------------------------------- +# internals + +_cache = {} +_cache_repl = {} + +_pattern_type = type(sre_compile.compile("", 0)) + +_MAXCACHE = 100 + +def _compile(*key): + # internal: compile pattern + pattern, flags = key + bypass_cache = flags & DEBUG + if not bypass_cache: + cachekey = (type(key[0]),) + key + try: + p, loc = _cache[cachekey] + if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE): + return p + except KeyError: + pass + if isinstance(pattern, _pattern_type): + if flags: + raise ValueError('Cannot process flags argument with a compiled pattern') + return pattern + if not sre_compile.isstring(pattern): + raise TypeError, "first argument must be string or compiled pattern" + try: + p = sre_compile.compile(pattern, flags) + except error, v: + raise error, v # invalid expression + if not bypass_cache: + if len(_cache) >= _MAXCACHE: + _cache.clear() + if p.flags & LOCALE: + if not _locale: + return p + loc = _locale.setlocale(_locale.LC_CTYPE) + else: + loc = None + _cache[cachekey] = p, loc + return p + +def _compile_repl(*key): + # internal: compile replacement pattern + p = _cache_repl.get(key) + if p is not None: + return p + repl, pattern = key + try: + p = sre_parse.parse_template(repl, pattern) + except error, v: + raise error, v # invalid expression + if len(_cache_repl) >= _MAXCACHE: + _cache_repl.clear() + _cache_repl[key] = p + return p + +def _expand(pattern, match, template): + # internal: match.expand implementation hook + template = sre_parse.parse_template(template, pattern) + return sre_parse.expand_template(template, match) + +def _subx(pattern, template): + # internal: pattern.sub/subn implementation helper + template = _compile_repl(template, pattern) + if not template[0] and len(template[1]) == 1: + # literal replacement + return template[1][0] + def filter(match, template=template): + return sre_parse.expand_template(template, match) + return filter + +# register myself for pickling + +import copy_reg + +def _pickle(p): + return _compile, (p.pattern, p.flags) + +copy_reg.pickle(_pattern_type, _pickle, _compile) + +# -------------------------------------------------------------------- +# experimental stuff (see python-dev discussions for details) + +class Scanner: + def __init__(self, lexicon, flags=0): + from sre_constants import BRANCH, SUBPATTERN + self.lexicon = lexicon + # combine phrases into a compound pattern + p = [] + s = sre_parse.Pattern() + s.flags = flags + for phrase, action in lexicon: + p.append(sre_parse.SubPattern(s, [ + (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))), + ])) + s.groups = len(p)+1 + p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) + self.scanner = sre_compile.compile(p) + def scan(self, string): + result = [] + append = result.append + match = self.scanner.scanner(string).match + i = 0 + while 1: + m = match() + if not m: + break + j = m.end() + if i == j: + break + action = self.lexicon[m.lastindex-1][1] + if hasattr(action, '__call__'): + self.match = m + action = action(self, m.group()) + if action is not None: + append(action) + i = j + return result, string[i:]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/site.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,758 @@ +"""Append module search paths for third-party packages to sys.path. + +**************************************************************** +* This module is automatically imported during initialization. * +**************************************************************** + +In earlier versions of Python (up to 1.5a3), scripts or modules that +needed to use site-specific modules would place ``import site'' +somewhere near the top of their code. Because of the automatic +import, this is no longer necessary (but code that does it still +works). + +This will append site-specific paths to the module search path. On +Unix, it starts with sys.prefix and sys.exec_prefix (if different) and +appends lib/python<version>/site-packages as well as lib/site-python. +It also supports the Debian convention of +lib/python<version>/dist-packages. On other platforms (mainly Mac and +Windows), it uses just sys.prefix (and sys.exec_prefix, if different, +but this is unlikely). The resulting directories, if they exist, are +appended to sys.path, and also inspected for path configuration files. + +FOR DEBIAN, this sys.path is augmented with directories in /usr/local. +Local addons go into /usr/local/lib/python<version>/site-packages +(resp. /usr/local/lib/site-python), Debian addons install into +/usr/{lib,share}/python<version>/dist-packages. + +A path configuration file is a file whose name has the form +<package>.pth; its contents are additional directories (one per line) +to be added to sys.path. Non-existing directories (or +non-directories) are never added to sys.path; no directory is added to +sys.path more than once. Blank lines and lines beginning with +'#' are skipped. Lines starting with 'import' are executed. + +For example, suppose sys.prefix and sys.exec_prefix are set to +/usr/local and there is a directory /usr/local/lib/python2.X/site-packages +with three subdirectories, foo, bar and spam, and two path +configuration files, foo.pth and bar.pth. Assume foo.pth contains the +following: + + # foo package configuration + foo + bar + bletch + +and bar.pth contains: + + # bar package configuration + bar + +Then the following directories are added to sys.path, in this order: + + /usr/local/lib/python2.X/site-packages/bar + /usr/local/lib/python2.X/site-packages/foo + +Note that bletch is omitted because it doesn't exist; bar precedes foo +because bar.pth comes alphabetically before foo.pth; and spam is +omitted because it is not mentioned in either path configuration file. + +After these path manipulations, an attempt is made to import a module +named sitecustomize, which can perform arbitrary additional +site-specific customizations. If this import fails with an +ImportError exception, it is silently ignored. + +""" + +import sys +import os +try: + import __builtin__ as builtins +except ImportError: + import builtins +try: + set +except NameError: + from sets import Set as set + +# Prefixes for site-packages; add additional prefixes like /usr/local here +PREFIXES = [sys.prefix, sys.exec_prefix] +# Enable per user site-packages directory +# set it to False to disable the feature or True to force the feature +ENABLE_USER_SITE = None +# for distutils.commands.install +USER_SITE = None +USER_BASE = None + +_is_64bit = (getattr(sys, 'maxsize', None) or getattr(sys, 'maxint')) > 2**32 +_is_pypy = hasattr(sys, 'pypy_version_info') +_is_jython = sys.platform[:4] == 'java' +if _is_jython: + ModuleType = type(os) + +def makepath(*paths): + dir = os.path.join(*paths) + if _is_jython and (dir == '__classpath__' or + dir.startswith('__pyclasspath__')): + return dir, dir + dir = os.path.abspath(dir) + return dir, os.path.normcase(dir) + +def abs__file__(): + """Set all module' __file__ attribute to an absolute path""" + for m in sys.modules.values(): + if ((_is_jython and not isinstance(m, ModuleType)) or + hasattr(m, '__loader__')): + # only modules need the abspath in Jython. and don't mess + # with a PEP 302-supplied __file__ + continue + f = getattr(m, '__file__', None) + if f is None: + continue + m.__file__ = os.path.abspath(f) + +def removeduppaths(): + """ Remove duplicate entries from sys.path along with making them + absolute""" + # This ensures that the initial path provided by the interpreter contains + # only absolute pathnames, even if we're running from the build directory. + L = [] + known_paths = set() + for dir in sys.path: + # Filter out duplicate paths (on case-insensitive file systems also + # if they only differ in case); turn relative paths into absolute + # paths. + dir, dircase = makepath(dir) + if not dircase in known_paths: + L.append(dir) + known_paths.add(dircase) + sys.path[:] = L + return known_paths + +# XXX This should not be part of site.py, since it is needed even when +# using the -S option for Python. See http://www.python.org/sf/586680 +def addbuilddir(): + """Append ./build/lib.<platform> in case we're running in the build dir + (especially for Guido :-)""" + from distutils.util import get_platform + s = "build/lib.%s-%.3s" % (get_platform(), sys.version) + if hasattr(sys, 'gettotalrefcount'): + s += '-pydebug' + s = os.path.join(os.path.dirname(sys.path[-1]), s) + sys.path.append(s) + +def _init_pathinfo(): + """Return a set containing all existing directory entries from sys.path""" + d = set() + for dir in sys.path: + try: + if os.path.isdir(dir): + dir, dircase = makepath(dir) + d.add(dircase) + except TypeError: + continue + return d + +def addpackage(sitedir, name, known_paths): + """Add a new path to known_paths by combining sitedir and 'name' or execute + sitedir if it starts with 'import'""" + if known_paths is None: + _init_pathinfo() + reset = 1 + else: + reset = 0 + fullname = os.path.join(sitedir, name) + try: + f = open(fullname, "rU") + except IOError: + return + try: + for line in f: + if line.startswith("#"): + continue + if line.startswith("import"): + exec(line) + continue + line = line.rstrip() + dir, dircase = makepath(sitedir, line) + if not dircase in known_paths and os.path.exists(dir): + sys.path.append(dir) + known_paths.add(dircase) + finally: + f.close() + if reset: + known_paths = None + return known_paths + +def addsitedir(sitedir, known_paths=None): + """Add 'sitedir' argument to sys.path if missing and handle .pth files in + 'sitedir'""" + if known_paths is None: + known_paths = _init_pathinfo() + reset = 1 + else: + reset = 0 + sitedir, sitedircase = makepath(sitedir) + if not sitedircase in known_paths: + sys.path.append(sitedir) # Add path component + try: + names = os.listdir(sitedir) + except os.error: + return + names.sort() + for name in names: + if name.endswith(os.extsep + "pth"): + addpackage(sitedir, name, known_paths) + if reset: + known_paths = None + return known_paths + +def addsitepackages(known_paths, sys_prefix=sys.prefix, exec_prefix=sys.exec_prefix): + """Add site-packages (and possibly site-python) to sys.path""" + prefixes = [os.path.join(sys_prefix, "local"), sys_prefix] + if exec_prefix != sys_prefix: + prefixes.append(os.path.join(exec_prefix, "local")) + + for prefix in prefixes: + if prefix: + if sys.platform in ('os2emx', 'riscos') or _is_jython: + sitedirs = [os.path.join(prefix, "Lib", "site-packages")] + elif _is_pypy: + sitedirs = [os.path.join(prefix, 'site-packages')] + elif sys.platform == 'darwin' and prefix == sys_prefix: + + if prefix.startswith("/System/Library/Frameworks/"): # Apple's Python + + sitedirs = [os.path.join("/Library/Python", sys.version[:3], "site-packages"), + os.path.join(prefix, "Extras", "lib", "python")] + + else: # any other Python distros on OSX work this way + sitedirs = [os.path.join(prefix, "lib", + "python" + sys.version[:3], "site-packages")] + + elif os.sep == '/': + sitedirs = [os.path.join(prefix, + "lib", + "python" + sys.version[:3], + "site-packages"), + os.path.join(prefix, "lib", "site-python"), + os.path.join(prefix, "python" + sys.version[:3], "lib-dynload")] + lib64_dir = os.path.join(prefix, "lib64", "python" + sys.version[:3], "site-packages") + if (os.path.exists(lib64_dir) and + os.path.realpath(lib64_dir) not in [os.path.realpath(p) for p in sitedirs]): + if _is_64bit: + sitedirs.insert(0, lib64_dir) + else: + sitedirs.append(lib64_dir) + try: + # sys.getobjects only available in --with-pydebug build + sys.getobjects + sitedirs.insert(0, os.path.join(sitedirs[0], 'debug')) + except AttributeError: + pass + # Debian-specific dist-packages directories: + sitedirs.append(os.path.join(prefix, "local/lib", + "python" + sys.version[:3], + "dist-packages")) + if sys.version[0] == '2': + sitedirs.append(os.path.join(prefix, "lib", + "python" + sys.version[:3], + "dist-packages")) + else: + sitedirs.append(os.path.join(prefix, "lib", + "python" + sys.version[0], + "dist-packages")) + sitedirs.append(os.path.join(prefix, "lib", "dist-python")) + else: + sitedirs = [prefix, os.path.join(prefix, "lib", "site-packages")] + if sys.platform == 'darwin': + # for framework builds *only* we add the standard Apple + # locations. Currently only per-user, but /Library and + # /Network/Library could be added too + if 'Python.framework' in prefix: + home = os.environ.get('HOME') + if home: + sitedirs.append( + os.path.join(home, + 'Library', + 'Python', + sys.version[:3], + 'site-packages')) + for sitedir in sitedirs: + if os.path.isdir(sitedir): + addsitedir(sitedir, known_paths) + return None + +def check_enableusersite(): + """Check if user site directory is safe for inclusion + + The function tests for the command line flag (including environment var), + process uid/gid equal to effective uid/gid. + + None: Disabled for security reasons + False: Disabled by user (command line option) + True: Safe and enabled + """ + if hasattr(sys, 'flags') and getattr(sys.flags, 'no_user_site', False): + return False + + if hasattr(os, "getuid") and hasattr(os, "geteuid"): + # check process uid == effective uid + if os.geteuid() != os.getuid(): + return None + if hasattr(os, "getgid") and hasattr(os, "getegid"): + # check process gid == effective gid + if os.getegid() != os.getgid(): + return None + + return True + +def addusersitepackages(known_paths): + """Add a per user site-package to sys.path + + Each user has its own python directory with site-packages in the + home directory. + + USER_BASE is the root directory for all Python versions + + USER_SITE is the user specific site-packages directory + + USER_SITE/.. can be used for data. + """ + global USER_BASE, USER_SITE, ENABLE_USER_SITE + env_base = os.environ.get("PYTHONUSERBASE", None) + + def joinuser(*args): + return os.path.expanduser(os.path.join(*args)) + + #if sys.platform in ('os2emx', 'riscos'): + # # Don't know what to put here + # USER_BASE = '' + # USER_SITE = '' + if os.name == "nt": + base = os.environ.get("APPDATA") or "~" + if env_base: + USER_BASE = env_base + else: + USER_BASE = joinuser(base, "Python") + USER_SITE = os.path.join(USER_BASE, + "Python" + sys.version[0] + sys.version[2], + "site-packages") + else: + if env_base: + USER_BASE = env_base + else: + USER_BASE = joinuser("~", ".local") + USER_SITE = os.path.join(USER_BASE, "lib", + "python" + sys.version[:3], + "site-packages") + + if ENABLE_USER_SITE and os.path.isdir(USER_SITE): + addsitedir(USER_SITE, known_paths) + if ENABLE_USER_SITE: + for dist_libdir in ("lib", "local/lib"): + user_site = os.path.join(USER_BASE, dist_libdir, + "python" + sys.version[:3], + "dist-packages") + if os.path.isdir(user_site): + addsitedir(user_site, known_paths) + return known_paths + + + +def setBEGINLIBPATH(): + """The OS/2 EMX port has optional extension modules that do double duty + as DLLs (and must use the .DLL file extension) for other extensions. + The library search path needs to be amended so these will be found + during module import. Use BEGINLIBPATH so that these are at the start + of the library search path. + + """ + dllpath = os.path.join(sys.prefix, "Lib", "lib-dynload") + libpath = os.environ['BEGINLIBPATH'].split(';') + if libpath[-1]: + libpath.append(dllpath) + else: + libpath[-1] = dllpath + os.environ['BEGINLIBPATH'] = ';'.join(libpath) + + +def setquit(): + """Define new built-ins 'quit' and 'exit'. + These are simply strings that display a hint on how to exit. + + """ + if os.sep == ':': + eof = 'Cmd-Q' + elif os.sep == '\\': + eof = 'Ctrl-Z plus Return' + else: + eof = 'Ctrl-D (i.e. EOF)' + + class Quitter(object): + def __init__(self, name): + self.name = name + def __repr__(self): + return 'Use %s() or %s to exit' % (self.name, eof) + def __call__(self, code=None): + # Shells like IDLE catch the SystemExit, but listen when their + # stdin wrapper is closed. + try: + sys.stdin.close() + except: + pass + raise SystemExit(code) + builtins.quit = Quitter('quit') + builtins.exit = Quitter('exit') + + +class _Printer(object): + """interactive prompt objects for printing the license text, a list of + contributors and the copyright notice.""" + + MAXLINES = 23 + + def __init__(self, name, data, files=(), dirs=()): + self.__name = name + self.__data = data + self.__files = files + self.__dirs = dirs + self.__lines = None + + def __setup(self): + if self.__lines: + return + data = None + for dir in self.__dirs: + for filename in self.__files: + filename = os.path.join(dir, filename) + try: + fp = open(filename, "rU") + data = fp.read() + fp.close() + break + except IOError: + pass + if data: + break + if not data: + data = self.__data + self.__lines = data.split('\n') + self.__linecnt = len(self.__lines) + + def __repr__(self): + self.__setup() + if len(self.__lines) <= self.MAXLINES: + return "\n".join(self.__lines) + else: + return "Type %s() to see the full %s text" % ((self.__name,)*2) + + def __call__(self): + self.__setup() + prompt = 'Hit Return for more, or q (and Return) to quit: ' + lineno = 0 + while 1: + try: + for i in range(lineno, lineno + self.MAXLINES): + print(self.__lines[i]) + except IndexError: + break + else: + lineno += self.MAXLINES + key = None + while key is None: + try: + key = raw_input(prompt) + except NameError: + key = input(prompt) + if key not in ('', 'q'): + key = None + if key == 'q': + break + +def setcopyright(): + """Set 'copyright' and 'credits' in __builtin__""" + builtins.copyright = _Printer("copyright", sys.copyright) + if _is_jython: + builtins.credits = _Printer( + "credits", + "Jython is maintained by the Jython developers (www.jython.org).") + elif _is_pypy: + builtins.credits = _Printer( + "credits", + "PyPy is maintained by the PyPy developers: http://pypy.org/") + else: + builtins.credits = _Printer("credits", """\ + Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands + for supporting Python development. See www.python.org for more information.""") + here = os.path.dirname(os.__file__) + builtins.license = _Printer( + "license", "See http://www.python.org/%.3s/license.html" % sys.version, + ["LICENSE.txt", "LICENSE"], + [os.path.join(here, os.pardir), here, os.curdir]) + + +class _Helper(object): + """Define the built-in 'help'. + This is a wrapper around pydoc.help (with a twist). + + """ + + def __repr__(self): + return "Type help() for interactive help, " \ + "or help(object) for help about object." + def __call__(self, *args, **kwds): + import pydoc + return pydoc.help(*args, **kwds) + +def sethelper(): + builtins.help = _Helper() + +def aliasmbcs(): + """On Windows, some default encodings are not provided by Python, + while they are always available as "mbcs" in each locale. Make + them usable by aliasing to "mbcs" in such a case.""" + if sys.platform == 'win32': + import locale, codecs + enc = locale.getdefaultlocale()[1] + if enc.startswith('cp'): # "cp***" ? + try: + codecs.lookup(enc) + except LookupError: + import encodings + encodings._cache[enc] = encodings._unknown + encodings.aliases.aliases[enc] = 'mbcs' + +def setencoding(): + """Set the string encoding used by the Unicode implementation. The + default is 'ascii', but if you're willing to experiment, you can + change this.""" + encoding = "ascii" # Default value set by _PyUnicode_Init() + if 0: + # Enable to support locale aware default string encodings. + import locale + loc = locale.getdefaultlocale() + if loc[1]: + encoding = loc[1] + if 0: + # Enable to switch off string to Unicode coercion and implicit + # Unicode to string conversion. + encoding = "undefined" + if encoding != "ascii": + # On Non-Unicode builds this will raise an AttributeError... + sys.setdefaultencoding(encoding) # Needs Python Unicode build ! + + +def execsitecustomize(): + """Run custom site specific code, if available.""" + try: + import sitecustomize + except ImportError: + pass + +def virtual_install_main_packages(): + f = open(os.path.join(os.path.dirname(__file__), 'orig-prefix.txt')) + sys.real_prefix = f.read().strip() + f.close() + pos = 2 + hardcoded_relative_dirs = [] + if sys.path[0] == '': + pos += 1 + if _is_jython: + paths = [os.path.join(sys.real_prefix, 'Lib')] + elif _is_pypy: + if sys.version_info > (3, 2): + cpyver = '%d' % sys.version_info[0] + elif sys.pypy_version_info >= (1, 5): + cpyver = '%d.%d' % sys.version_info[:2] + else: + cpyver = '%d.%d.%d' % sys.version_info[:3] + paths = [os.path.join(sys.real_prefix, 'lib_pypy'), + os.path.join(sys.real_prefix, 'lib-python', cpyver)] + if sys.pypy_version_info < (1, 9): + paths.insert(1, os.path.join(sys.real_prefix, + 'lib-python', 'modified-%s' % cpyver)) + hardcoded_relative_dirs = paths[:] # for the special 'darwin' case below + # + # This is hardcoded in the Python executable, but relative to sys.prefix: + for path in paths[:]: + plat_path = os.path.join(path, 'plat-%s' % sys.platform) + if os.path.exists(plat_path): + paths.append(plat_path) + elif sys.platform == 'win32': + paths = [os.path.join(sys.real_prefix, 'Lib'), os.path.join(sys.real_prefix, 'DLLs')] + else: + paths = [os.path.join(sys.real_prefix, 'lib', 'python'+sys.version[:3])] + hardcoded_relative_dirs = paths[:] # for the special 'darwin' case below + lib64_path = os.path.join(sys.real_prefix, 'lib64', 'python'+sys.version[:3]) + if os.path.exists(lib64_path): + if _is_64bit: + paths.insert(0, lib64_path) + else: + paths.append(lib64_path) + # This is hardcoded in the Python executable, but relative to + # sys.prefix. Debian change: we need to add the multiarch triplet + # here, which is where the real stuff lives. As per PEP 421, in + # Python 3.3+, this lives in sys.implementation, while in Python 2.7 + # it lives in sys. + try: + arch = getattr(sys, 'implementation', sys)._multiarch + except AttributeError: + # This is a non-multiarch aware Python. Fallback to the old way. + arch = sys.platform + plat_path = os.path.join(sys.real_prefix, 'lib', + 'python'+sys.version[:3], + 'plat-%s' % arch) + if os.path.exists(plat_path): + paths.append(plat_path) + # This is hardcoded in the Python executable, but + # relative to sys.prefix, so we have to fix up: + for path in list(paths): + tk_dir = os.path.join(path, 'lib-tk') + if os.path.exists(tk_dir): + paths.append(tk_dir) + + # These are hardcoded in the Apple's Python executable, + # but relative to sys.prefix, so we have to fix them up: + if sys.platform == 'darwin': + hardcoded_paths = [os.path.join(relative_dir, module) + for relative_dir in hardcoded_relative_dirs + for module in ('plat-darwin', 'plat-mac', 'plat-mac/lib-scriptpackages')] + + for path in hardcoded_paths: + if os.path.exists(path): + paths.append(path) + + sys.path.extend(paths) + +def force_global_eggs_after_local_site_packages(): + """ + Force easy_installed eggs in the global environment to get placed + in sys.path after all packages inside the virtualenv. This + maintains the "least surprise" result that packages in the + virtualenv always mask global packages, never the other way + around. + + """ + egginsert = getattr(sys, '__egginsert', 0) + for i, path in enumerate(sys.path): + if i > egginsert and path.startswith(sys.prefix): + egginsert = i + sys.__egginsert = egginsert + 1 + +def virtual_addsitepackages(known_paths): + force_global_eggs_after_local_site_packages() + return addsitepackages(known_paths, sys_prefix=sys.real_prefix) + +def fixclasspath(): + """Adjust the special classpath sys.path entries for Jython. These + entries should follow the base virtualenv lib directories. + """ + paths = [] + classpaths = [] + for path in sys.path: + if path == '__classpath__' or path.startswith('__pyclasspath__'): + classpaths.append(path) + else: + paths.append(path) + sys.path = paths + sys.path.extend(classpaths) + +def execusercustomize(): + """Run custom user specific code, if available.""" + try: + import usercustomize + except ImportError: + pass + + +def main(): + global ENABLE_USER_SITE + virtual_install_main_packages() + abs__file__() + paths_in_sys = removeduppaths() + if (os.name == "posix" and sys.path and + os.path.basename(sys.path[-1]) == "Modules"): + addbuilddir() + if _is_jython: + fixclasspath() + GLOBAL_SITE_PACKAGES = not os.path.exists(os.path.join(os.path.dirname(__file__), 'no-global-site-packages.txt')) + if not GLOBAL_SITE_PACKAGES: + ENABLE_USER_SITE = False + if ENABLE_USER_SITE is None: + ENABLE_USER_SITE = check_enableusersite() + paths_in_sys = addsitepackages(paths_in_sys) + paths_in_sys = addusersitepackages(paths_in_sys) + if GLOBAL_SITE_PACKAGES: + paths_in_sys = virtual_addsitepackages(paths_in_sys) + if sys.platform == 'os2emx': + setBEGINLIBPATH() + setquit() + setcopyright() + sethelper() + aliasmbcs() + setencoding() + execsitecustomize() + if ENABLE_USER_SITE: + execusercustomize() + # Remove sys.setdefaultencoding() so that users cannot change the + # encoding after initialization. The test for presence is needed when + # this module is run as a script, because this code is executed twice. + if hasattr(sys, "setdefaultencoding"): + del sys.setdefaultencoding + +main() + +def _script(): + help = """\ + %s [--user-base] [--user-site] + + Without arguments print some useful information + With arguments print the value of USER_BASE and/or USER_SITE separated + by '%s'. + + Exit codes with --user-base or --user-site: + 0 - user site directory is enabled + 1 - user site directory is disabled by user + 2 - uses site directory is disabled by super user + or for security reasons + >2 - unknown error + """ + args = sys.argv[1:] + if not args: + print("sys.path = [") + for dir in sys.path: + print(" %r," % (dir,)) + print("]") + def exists(path): + if os.path.isdir(path): + return "exists" + else: + return "doesn't exist" + print("USER_BASE: %r (%s)" % (USER_BASE, exists(USER_BASE))) + print("USER_SITE: %r (%s)" % (USER_SITE, exists(USER_BASE))) + print("ENABLE_USER_SITE: %r" % ENABLE_USER_SITE) + sys.exit(0) + + buffer = [] + if '--user-base' in args: + buffer.append(USER_BASE) + if '--user-site' in args: + buffer.append(USER_SITE) + + if buffer: + print(os.pathsep.join(buffer)) + if ENABLE_USER_SITE: + sys.exit(0) + elif ENABLE_USER_SITE is False: + sys.exit(1) + elif ENABLE_USER_SITE is None: + sys.exit(2) + else: + sys.exit(3) + else: + import textwrap + print(textwrap.dedent(help % (sys.argv[0], os.pathsep))) + sys.exit(10) + +if __name__ == '__main__': + _script()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/sre.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,13 @@ +"""This file is only retained for backwards compatibility. +It will be removed in the future. sre was moved to re in version 2.5. +""" + +import warnings +warnings.warn("The sre module is deprecated, please import re.", + DeprecationWarning, 2) + +from re import * +from re import __all__ + +# old pickles expect the _compile() reconstructor in this module +from re import _compile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/sre_compile.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,596 @@ +# -*- coding: utf-8 -*- +# +# Secret Labs' Regular Expression Engine +# +# convert template to internal format +# +# Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. +# +# See the sre.py file for information on usage and redistribution. +# + +"""Internal support module for sre""" + +import _sre, sys +import sre_parse +from sre_constants import * + +assert _sre.MAGIC == MAGIC, "SRE module mismatch" + +if _sre.CODESIZE == 2: + MAXCODE = 65535 +else: + MAXCODE = 0xFFFFFFFFL + +_LITERAL_CODES = set([LITERAL, NOT_LITERAL]) +_REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT]) +_SUCCESS_CODES = set([SUCCESS, FAILURE]) +_ASSERT_CODES = set([ASSERT, ASSERT_NOT]) + +# Sets of lowercase characters which have the same uppercase. +_equivalences = ( + # LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I + (0x69, 0x131), # iı + # LATIN SMALL LETTER S, LATIN SMALL LETTER LONG S + (0x73, 0x17f), # sÅ¿ + # MICRO SIGN, GREEK SMALL LETTER MU + (0xb5, 0x3bc), # µμ + # COMBINING GREEK YPOGEGRAMMENI, GREEK SMALL LETTER IOTA, GREEK PROSGEGRAMMENI + (0x345, 0x3b9, 0x1fbe), # \u0345ιι + # GREEK SMALL LETTER BETA, GREEK BETA SYMBOL + (0x3b2, 0x3d0), # Î²Ï + # GREEK SMALL LETTER EPSILON, GREEK LUNATE EPSILON SYMBOL + (0x3b5, 0x3f5), # εϵ + # GREEK SMALL LETTER THETA, GREEK THETA SYMBOL + (0x3b8, 0x3d1), # θϑ + # GREEK SMALL LETTER KAPPA, GREEK KAPPA SYMBOL + (0x3ba, 0x3f0), # κϰ + # GREEK SMALL LETTER PI, GREEK PI SYMBOL + (0x3c0, 0x3d6), # πϖ + # GREEK SMALL LETTER RHO, GREEK RHO SYMBOL + (0x3c1, 0x3f1), # Ïϱ + # GREEK SMALL LETTER FINAL SIGMA, GREEK SMALL LETTER SIGMA + (0x3c2, 0x3c3), # ςσ + # GREEK SMALL LETTER PHI, GREEK PHI SYMBOL + (0x3c6, 0x3d5), # φϕ + # LATIN SMALL LETTER S WITH DOT ABOVE, LATIN SMALL LETTER LONG S WITH DOT ABOVE + (0x1e61, 0x1e9b), # ṡẛ +) + +# Maps the lowercase code to lowercase codes which have the same uppercase. +_ignorecase_fixes = {i: tuple(j for j in t if i != j) + for t in _equivalences for i in t} + +def _compile(code, pattern, flags): + # internal: compile a (sub)pattern + emit = code.append + _len = len + LITERAL_CODES = _LITERAL_CODES + REPEATING_CODES = _REPEATING_CODES + SUCCESS_CODES = _SUCCESS_CODES + ASSERT_CODES = _ASSERT_CODES + if (flags & SRE_FLAG_IGNORECASE and + not (flags & SRE_FLAG_LOCALE) and + flags & SRE_FLAG_UNICODE): + fixes = _ignorecase_fixes + else: + fixes = None + for op, av in pattern: + if op in LITERAL_CODES: + if flags & SRE_FLAG_IGNORECASE: + lo = _sre.getlower(av, flags) + if fixes and lo in fixes: + emit(OPCODES[IN_IGNORE]) + skip = _len(code); emit(0) + if op is NOT_LITERAL: + emit(OPCODES[NEGATE]) + for k in (lo,) + fixes[lo]: + emit(OPCODES[LITERAL]) + emit(k) + emit(OPCODES[FAILURE]) + code[skip] = _len(code) - skip + else: + emit(OPCODES[OP_IGNORE[op]]) + emit(lo) + else: + emit(OPCODES[op]) + emit(av) + elif op is IN: + if flags & SRE_FLAG_IGNORECASE: + emit(OPCODES[OP_IGNORE[op]]) + def fixup(literal, flags=flags): + return _sre.getlower(literal, flags) + else: + emit(OPCODES[op]) + fixup = None + skip = _len(code); emit(0) + _compile_charset(av, flags, code, fixup, fixes) + code[skip] = _len(code) - skip + elif op is ANY: + if flags & SRE_FLAG_DOTALL: + emit(OPCODES[ANY_ALL]) + else: + emit(OPCODES[ANY]) + elif op in REPEATING_CODES: + if flags & SRE_FLAG_TEMPLATE: + raise error, "internal: unsupported template operator" + emit(OPCODES[REPEAT]) + skip = _len(code); emit(0) + emit(av[0]) + emit(av[1]) + _compile(code, av[2], flags) + emit(OPCODES[SUCCESS]) + code[skip] = _len(code) - skip + elif _simple(av) and op is not REPEAT: + if op is MAX_REPEAT: + emit(OPCODES[REPEAT_ONE]) + else: + emit(OPCODES[MIN_REPEAT_ONE]) + skip = _len(code); emit(0) + emit(av[0]) + emit(av[1]) + _compile(code, av[2], flags) + emit(OPCODES[SUCCESS]) + code[skip] = _len(code) - skip + else: + emit(OPCODES[REPEAT]) + skip = _len(code); emit(0) + emit(av[0]) + emit(av[1]) + _compile(code, av[2], flags) + code[skip] = _len(code) - skip + if op is MAX_REPEAT: + emit(OPCODES[MAX_UNTIL]) + else: + emit(OPCODES[MIN_UNTIL]) + elif op is SUBPATTERN: + if av[0]: + emit(OPCODES[MARK]) + emit((av[0]-1)*2) + # _compile_info(code, av[1], flags) + _compile(code, av[1], flags) + if av[0]: + emit(OPCODES[MARK]) + emit((av[0]-1)*2+1) + elif op in SUCCESS_CODES: + emit(OPCODES[op]) + elif op in ASSERT_CODES: + emit(OPCODES[op]) + skip = _len(code); emit(0) + if av[0] >= 0: + emit(0) # look ahead + else: + lo, hi = av[1].getwidth() + if lo != hi: + raise error, "look-behind requires fixed-width pattern" + emit(lo) # look behind + _compile(code, av[1], flags) + emit(OPCODES[SUCCESS]) + code[skip] = _len(code) - skip + elif op is CALL: + emit(OPCODES[op]) + skip = _len(code); emit(0) + _compile(code, av, flags) + emit(OPCODES[SUCCESS]) + code[skip] = _len(code) - skip + elif op is AT: + emit(OPCODES[op]) + if flags & SRE_FLAG_MULTILINE: + av = AT_MULTILINE.get(av, av) + if flags & SRE_FLAG_LOCALE: + av = AT_LOCALE.get(av, av) + elif flags & SRE_FLAG_UNICODE: + av = AT_UNICODE.get(av, av) + emit(ATCODES[av]) + elif op is BRANCH: + emit(OPCODES[op]) + tail = [] + tailappend = tail.append + for av in av[1]: + skip = _len(code); emit(0) + # _compile_info(code, av, flags) + _compile(code, av, flags) + emit(OPCODES[JUMP]) + tailappend(_len(code)); emit(0) + code[skip] = _len(code) - skip + emit(0) # end of branch + for tail in tail: + code[tail] = _len(code) - tail + elif op is CATEGORY: + emit(OPCODES[op]) + if flags & SRE_FLAG_LOCALE: + av = CH_LOCALE[av] + elif flags & SRE_FLAG_UNICODE: + av = CH_UNICODE[av] + emit(CHCODES[av]) + elif op is GROUPREF: + if flags & SRE_FLAG_IGNORECASE: + emit(OPCODES[OP_IGNORE[op]]) + else: + emit(OPCODES[op]) + emit(av-1) + elif op is GROUPREF_EXISTS: + emit(OPCODES[op]) + emit(av[0]-1) + skipyes = _len(code); emit(0) + _compile(code, av[1], flags) + if av[2]: + emit(OPCODES[JUMP]) + skipno = _len(code); emit(0) + code[skipyes] = _len(code) - skipyes + 1 + _compile(code, av[2], flags) + code[skipno] = _len(code) - skipno + else: + code[skipyes] = _len(code) - skipyes + 1 + else: + raise ValueError, ("unsupported operand type", op) + +def _compile_charset(charset, flags, code, fixup=None, fixes=None): + # compile charset subprogram + emit = code.append + for op, av in _optimize_charset(charset, fixup, fixes, + flags & SRE_FLAG_UNICODE): + emit(OPCODES[op]) + if op is NEGATE: + pass + elif op is LITERAL: + emit(av) + elif op is RANGE: + emit(av[0]) + emit(av[1]) + elif op is CHARSET: + code.extend(av) + elif op is BIGCHARSET: + code.extend(av) + elif op is CATEGORY: + if flags & SRE_FLAG_LOCALE: + emit(CHCODES[CH_LOCALE[av]]) + elif flags & SRE_FLAG_UNICODE: + emit(CHCODES[CH_UNICODE[av]]) + else: + emit(CHCODES[av]) + else: + raise error, "internal: unsupported set operator" + emit(OPCODES[FAILURE]) + +def _optimize_charset(charset, fixup, fixes, isunicode): + # internal: optimize character set + out = [] + tail = [] + charmap = bytearray(256) + for op, av in charset: + while True: + try: + if op is LITERAL: + if fixup: + i = fixup(av) + charmap[i] = 1 + if fixes and i in fixes: + for k in fixes[i]: + charmap[k] = 1 + else: + charmap[av] = 1 + elif op is RANGE: + r = range(av[0], av[1]+1) + if fixup: + r = map(fixup, r) + if fixup and fixes: + for i in r: + charmap[i] = 1 + if i in fixes: + for k in fixes[i]: + charmap[k] = 1 + else: + for i in r: + charmap[i] = 1 + elif op is NEGATE: + out.append((op, av)) + else: + tail.append((op, av)) + except IndexError: + if len(charmap) == 256: + # character set contains non-UCS1 character codes + charmap += b'\0' * 0xff00 + continue + # character set contains non-BMP character codes + if fixup and isunicode and op is RANGE: + lo, hi = av + ranges = [av] + # There are only two ranges of cased astral characters: + # 10400-1044F (Deseret) and 118A0-118DF (Warang Citi). + _fixup_range(max(0x10000, lo), min(0x11fff, hi), + ranges, fixup) + for lo, hi in ranges: + if lo == hi: + tail.append((LITERAL, hi)) + else: + tail.append((RANGE, (lo, hi))) + else: + tail.append((op, av)) + break + + # compress character map + runs = [] + q = 0 + while True: + p = charmap.find(b'\1', q) + if p < 0: + break + if len(runs) >= 2: + runs = None + break + q = charmap.find(b'\0', p) + if q < 0: + runs.append((p, len(charmap))) + break + runs.append((p, q)) + if runs is not None: + # use literal/range + for p, q in runs: + if q - p == 1: + out.append((LITERAL, p)) + else: + out.append((RANGE, (p, q - 1))) + out += tail + # if the case was changed or new representation is more compact + if fixup or len(out) < len(charset): + return out + # else original character set is good enough + return charset + + # use bitmap + if len(charmap) == 256: + data = _mk_bitmap(charmap) + out.append((CHARSET, data)) + out += tail + return out + + # To represent a big charset, first a bitmap of all characters in the + # set is constructed. Then, this bitmap is sliced into chunks of 256 + # characters, duplicate chunks are eliminated, and each chunk is + # given a number. In the compiled expression, the charset is + # represented by a 32-bit word sequence, consisting of one word for + # the number of different chunks, a sequence of 256 bytes (64 words) + # of chunk numbers indexed by their original chunk position, and a + # sequence of 256-bit chunks (8 words each). + + # Compression is normally good: in a typical charset, large ranges of + # Unicode will be either completely excluded (e.g. if only cyrillic + # letters are to be matched), or completely included (e.g. if large + # subranges of Kanji match). These ranges will be represented by + # chunks of all one-bits or all zero-bits. + + # Matching can be also done efficiently: the more significant byte of + # the Unicode character is an index into the chunk number, and the + # less significant byte is a bit index in the chunk (just like the + # CHARSET matching). + + # In UCS-4 mode, the BIGCHARSET opcode still supports only subsets + # of the basic multilingual plane; an efficient representation + # for all of Unicode has not yet been developed. + + charmap = bytes(charmap) # should be hashable + comps = {} + mapping = bytearray(256) + block = 0 + data = bytearray() + for i in range(0, 65536, 256): + chunk = charmap[i: i + 256] + if chunk in comps: + mapping[i // 256] = comps[chunk] + else: + mapping[i // 256] = comps[chunk] = block + block += 1 + data += chunk + data = _mk_bitmap(data) + data[0:0] = [block] + _bytes_to_codes(mapping) + out.append((BIGCHARSET, data)) + out += tail + return out + +def _fixup_range(lo, hi, ranges, fixup): + for i in map(fixup, range(lo, hi+1)): + for k, (lo, hi) in enumerate(ranges): + if i < lo: + if l == lo - 1: + ranges[k] = (i, hi) + else: + ranges.insert(k, (i, i)) + break + elif i > hi: + if i == hi + 1: + ranges[k] = (lo, i) + break + else: + break + else: + ranges.append((i, i)) + +_CODEBITS = _sre.CODESIZE * 8 +_BITS_TRANS = b'0' + b'1' * 255 +def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int): + s = bytes(bits).translate(_BITS_TRANS)[::-1] + return [_int(s[i - _CODEBITS: i], 2) + for i in range(len(s), 0, -_CODEBITS)] + +def _bytes_to_codes(b): + # Convert block indices to word array + import array + if _sre.CODESIZE == 2: + code = 'H' + else: + code = 'I' + a = array.array(code, bytes(b)) + assert a.itemsize == _sre.CODESIZE + assert len(a) * a.itemsize == len(b) + return a.tolist() + +def _simple(av): + # check if av is a "simple" operator + lo, hi = av[2].getwidth() + return lo == hi == 1 and av[2][0][0] != SUBPATTERN + +def _compile_info(code, pattern, flags): + # internal: compile an info block. in the current version, + # this contains min/max pattern width, and an optional literal + # prefix or a character map + lo, hi = pattern.getwidth() + if lo == 0: + return # not worth it + # look for a literal prefix + prefix = [] + prefixappend = prefix.append + prefix_skip = 0 + charset = [] # not used + charsetappend = charset.append + if not (flags & SRE_FLAG_IGNORECASE): + # look for literal prefix + for op, av in pattern.data: + if op is LITERAL: + if len(prefix) == prefix_skip: + prefix_skip = prefix_skip + 1 + prefixappend(av) + elif op is SUBPATTERN and len(av[1]) == 1: + op, av = av[1][0] + if op is LITERAL: + prefixappend(av) + else: + break + else: + break + # if no prefix, look for charset prefix + if not prefix and pattern.data: + op, av = pattern.data[0] + if op is SUBPATTERN and av[1]: + op, av = av[1][0] + if op is LITERAL: + charsetappend((op, av)) + elif op is BRANCH: + c = [] + cappend = c.append + for p in av[1]: + if not p: + break + op, av = p[0] + if op is LITERAL: + cappend((op, av)) + else: + break + else: + charset = c + elif op is BRANCH: + c = [] + cappend = c.append + for p in av[1]: + if not p: + break + op, av = p[0] + if op is LITERAL: + cappend((op, av)) + else: + break + else: + charset = c + elif op is IN: + charset = av +## if prefix: +## print "*** PREFIX", prefix, prefix_skip +## if charset: +## print "*** CHARSET", charset + # add an info block + emit = code.append + emit(OPCODES[INFO]) + skip = len(code); emit(0) + # literal flag + mask = 0 + if prefix: + mask = SRE_INFO_PREFIX + if len(prefix) == prefix_skip == len(pattern.data): + mask = mask + SRE_INFO_LITERAL + elif charset: + mask = mask + SRE_INFO_CHARSET + emit(mask) + # pattern length + if lo < MAXCODE: + emit(lo) + else: + emit(MAXCODE) + prefix = prefix[:MAXCODE] + if hi < MAXCODE: + emit(hi) + else: + emit(0) + # add literal prefix + if prefix: + emit(len(prefix)) # length + emit(prefix_skip) # skip + code.extend(prefix) + # generate overlap table + table = [-1] + ([0]*len(prefix)) + for i in xrange(len(prefix)): + table[i+1] = table[i]+1 + while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]: + table[i+1] = table[table[i+1]-1]+1 + code.extend(table[1:]) # don't store first entry + elif charset: + _compile_charset(charset, flags, code) + code[skip] = len(code) - skip + +try: + unicode +except NameError: + STRING_TYPES = (type(""),) +else: + STRING_TYPES = (type(""), type(unicode(""))) + +def isstring(obj): + for tp in STRING_TYPES: + if isinstance(obj, tp): + return 1 + return 0 + +def _code(p, flags): + + flags = p.pattern.flags | flags + code = [] + + # compile info block + _compile_info(code, p, flags) + + # compile the pattern + _compile(code, p.data, flags) + + code.append(OPCODES[SUCCESS]) + + return code + +def compile(p, flags=0): + # internal: convert pattern list to internal format + + if isstring(p): + pattern = p + p = sre_parse.parse(p, flags) + else: + pattern = None + + code = _code(p, flags) + + # print code + + # XXX: <fl> get rid of this limitation! + if p.pattern.groups > 100: + raise AssertionError( + "sorry, but this version only supports 100 named groups" + ) + + # map in either direction + groupindex = p.pattern.groupdict + indexgroup = [None] * p.pattern.groups + for k, i in groupindex.items(): + indexgroup[i] = k + + return _sre.compile( + pattern, flags | p.pattern.flags, code, + p.pattern.groups-1, + groupindex, indexgroup + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/sre_constants.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,263 @@ +# +# Secret Labs' Regular Expression Engine +# +# various symbols used by the regular expression engine. +# run this script to update the _sre include files! +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# See the sre.py file for information on usage and redistribution. +# + +"""Internal support module for sre""" + +# update when constants are added or removed + +MAGIC = 20031017 + +try: + from _sre import MAXREPEAT +except ImportError: + import _sre + MAXREPEAT = _sre.MAXREPEAT = 65535 + +# SRE standard exception (access as sre.error) +# should this really be here? + +class error(Exception): + pass + +# operators + +FAILURE = "failure" +SUCCESS = "success" + +ANY = "any" +ANY_ALL = "any_all" +ASSERT = "assert" +ASSERT_NOT = "assert_not" +AT = "at" +BIGCHARSET = "bigcharset" +BRANCH = "branch" +CALL = "call" +CATEGORY = "category" +CHARSET = "charset" +GROUPREF = "groupref" +GROUPREF_IGNORE = "groupref_ignore" +GROUPREF_EXISTS = "groupref_exists" +IN = "in" +IN_IGNORE = "in_ignore" +INFO = "info" +JUMP = "jump" +LITERAL = "literal" +LITERAL_IGNORE = "literal_ignore" +MARK = "mark" +MAX_REPEAT = "max_repeat" +MAX_UNTIL = "max_until" +MIN_REPEAT = "min_repeat" +MIN_UNTIL = "min_until" +NEGATE = "negate" +NOT_LITERAL = "not_literal" +NOT_LITERAL_IGNORE = "not_literal_ignore" +RANGE = "range" +REPEAT = "repeat" +REPEAT_ONE = "repeat_one" +SUBPATTERN = "subpattern" +MIN_REPEAT_ONE = "min_repeat_one" + +# positions +AT_BEGINNING = "at_beginning" +AT_BEGINNING_LINE = "at_beginning_line" +AT_BEGINNING_STRING = "at_beginning_string" +AT_BOUNDARY = "at_boundary" +AT_NON_BOUNDARY = "at_non_boundary" +AT_END = "at_end" +AT_END_LINE = "at_end_line" +AT_END_STRING = "at_end_string" +AT_LOC_BOUNDARY = "at_loc_boundary" +AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" +AT_UNI_BOUNDARY = "at_uni_boundary" +AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" + +# categories +CATEGORY_DIGIT = "category_digit" +CATEGORY_NOT_DIGIT = "category_not_digit" +CATEGORY_SPACE = "category_space" +CATEGORY_NOT_SPACE = "category_not_space" +CATEGORY_WORD = "category_word" +CATEGORY_NOT_WORD = "category_not_word" +CATEGORY_LINEBREAK = "category_linebreak" +CATEGORY_NOT_LINEBREAK = "category_not_linebreak" +CATEGORY_LOC_WORD = "category_loc_word" +CATEGORY_LOC_NOT_WORD = "category_loc_not_word" +CATEGORY_UNI_DIGIT = "category_uni_digit" +CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" +CATEGORY_UNI_SPACE = "category_uni_space" +CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" +CATEGORY_UNI_WORD = "category_uni_word" +CATEGORY_UNI_NOT_WORD = "category_uni_not_word" +CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" +CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" + +OPCODES = [ + + # failure=0 success=1 (just because it looks better that way :-) + FAILURE, SUCCESS, + + ANY, ANY_ALL, + ASSERT, ASSERT_NOT, + AT, + BRANCH, + CALL, + CATEGORY, + CHARSET, BIGCHARSET, + GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, + IN, IN_IGNORE, + INFO, + JUMP, + LITERAL, LITERAL_IGNORE, + MARK, + MAX_UNTIL, + MIN_UNTIL, + NOT_LITERAL, NOT_LITERAL_IGNORE, + NEGATE, + RANGE, + REPEAT, + REPEAT_ONE, + SUBPATTERN, + MIN_REPEAT_ONE + +] + +ATCODES = [ + AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, + AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, + AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, + AT_UNI_NON_BOUNDARY +] + +CHCODES = [ + CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, + CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, + CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, + CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, + CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, + CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, + CATEGORY_UNI_NOT_LINEBREAK +] + +def makedict(list): + d = {} + i = 0 + for item in list: + d[item] = i + i = i + 1 + return d + +OPCODES = makedict(OPCODES) +ATCODES = makedict(ATCODES) +CHCODES = makedict(CHCODES) + +# replacement operations for "ignore case" mode +OP_IGNORE = { + GROUPREF: GROUPREF_IGNORE, + IN: IN_IGNORE, + LITERAL: LITERAL_IGNORE, + NOT_LITERAL: NOT_LITERAL_IGNORE +} + +AT_MULTILINE = { + AT_BEGINNING: AT_BEGINNING_LINE, + AT_END: AT_END_LINE +} + +AT_LOCALE = { + AT_BOUNDARY: AT_LOC_BOUNDARY, + AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY +} + +AT_UNICODE = { + AT_BOUNDARY: AT_UNI_BOUNDARY, + AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY +} + +CH_LOCALE = { + CATEGORY_DIGIT: CATEGORY_DIGIT, + CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, + CATEGORY_SPACE: CATEGORY_SPACE, + CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, + CATEGORY_WORD: CATEGORY_LOC_WORD, + CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, + CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, + CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK +} + +CH_UNICODE = { + CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, + CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, + CATEGORY_SPACE: CATEGORY_UNI_SPACE, + CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, + CATEGORY_WORD: CATEGORY_UNI_WORD, + CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, + CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, + CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK +} + +# flags +SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) +SRE_FLAG_IGNORECASE = 2 # case insensitive +SRE_FLAG_LOCALE = 4 # honour system locale +SRE_FLAG_MULTILINE = 8 # treat target as multiline string +SRE_FLAG_DOTALL = 16 # treat target as a single string +SRE_FLAG_UNICODE = 32 # use unicode locale +SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments +SRE_FLAG_DEBUG = 128 # debugging + +# flags for INFO primitive +SRE_INFO_PREFIX = 1 # has prefix +SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) +SRE_INFO_CHARSET = 4 # pattern starts with character from given set + +if __name__ == "__main__": + def dump(f, d, prefix): + items = d.items() + items.sort(key=lambda a: a[1]) + for k, v in items: + f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) + f = open("sre_constants.h", "w") + f.write("""\ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * NOTE: This file is generated by sre_constants.py. If you need + * to change anything in here, edit sre_constants.py and run it. + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * See the _sre.c file for information on usage and redistribution. + */ + +""") + + f.write("#define SRE_MAGIC %d\n" % MAGIC) + + dump(f, OPCODES, "SRE_OP") + dump(f, ATCODES, "SRE") + dump(f, CHCODES, "SRE") + + f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) + f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) + f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) + f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) + f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) + f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) + f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) + + f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) + f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) + f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) + + f.close() + print "done"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/sre_parse.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,834 @@ +# +# Secret Labs' Regular Expression Engine +# +# convert re-style regular expression to sre pattern +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# See the sre.py file for information on usage and redistribution. +# + +"""Internal support module for sre""" + +# XXX: show string offset and offending character for all errors + +import sys + +from sre_constants import * + +SPECIAL_CHARS = ".\\[{()*+?^$|" +REPEAT_CHARS = "*+?{" + +DIGITS = set("0123456789") + +OCTDIGITS = set("01234567") +HEXDIGITS = set("0123456789abcdefABCDEF") + +WHITESPACE = set(" \t\n\r\v\f") + +ESCAPES = { + r"\a": (LITERAL, ord("\a")), + r"\b": (LITERAL, ord("\b")), + r"\f": (LITERAL, ord("\f")), + r"\n": (LITERAL, ord("\n")), + r"\r": (LITERAL, ord("\r")), + r"\t": (LITERAL, ord("\t")), + r"\v": (LITERAL, ord("\v")), + r"\\": (LITERAL, ord("\\")) +} + +CATEGORIES = { + r"\A": (AT, AT_BEGINNING_STRING), # start of string + r"\b": (AT, AT_BOUNDARY), + r"\B": (AT, AT_NON_BOUNDARY), + r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), + r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), + r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), + r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), + r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), + r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), + r"\Z": (AT, AT_END_STRING), # end of string +} + +FLAGS = { + # standard flags + "i": SRE_FLAG_IGNORECASE, + "L": SRE_FLAG_LOCALE, + "m": SRE_FLAG_MULTILINE, + "s": SRE_FLAG_DOTALL, + "x": SRE_FLAG_VERBOSE, + # extensions + "t": SRE_FLAG_TEMPLATE, + "u": SRE_FLAG_UNICODE, +} + +class Pattern: + # master pattern object. keeps track of global attributes + def __init__(self): + self.flags = 0 + self.open = [] + self.groups = 1 + self.groupdict = {} + self.lookbehind = 0 + + def opengroup(self, name=None): + gid = self.groups + self.groups = gid + 1 + if name is not None: + ogid = self.groupdict.get(name, None) + if ogid is not None: + raise error, ("redefinition of group name %s as group %d; " + "was group %d" % (repr(name), gid, ogid)) + self.groupdict[name] = gid + self.open.append(gid) + return gid + def closegroup(self, gid): + self.open.remove(gid) + def checkgroup(self, gid): + return gid < self.groups and gid not in self.open + +class SubPattern: + # a subpattern, in intermediate form + def __init__(self, pattern, data=None): + self.pattern = pattern + if data is None: + data = [] + self.data = data + self.width = None + def dump(self, level=0): + seqtypes = (tuple, list) + for op, av in self.data: + print level*" " + op, + if op == IN: + # member sublanguage + print + for op, a in av: + print (level+1)*" " + op, a + elif op == BRANCH: + print + for i, a in enumerate(av[1]): + if i: + print level*" " + "or" + a.dump(level+1) + elif op == GROUPREF_EXISTS: + condgroup, item_yes, item_no = av + print condgroup + item_yes.dump(level+1) + if item_no: + print level*" " + "else" + item_no.dump(level+1) + elif isinstance(av, seqtypes): + nl = 0 + for a in av: + if isinstance(a, SubPattern): + if not nl: + print + a.dump(level+1) + nl = 1 + else: + print a, + nl = 0 + if not nl: + print + else: + print av + def __repr__(self): + return repr(self.data) + def __len__(self): + return len(self.data) + def __delitem__(self, index): + del self.data[index] + def __getitem__(self, index): + if isinstance(index, slice): + return SubPattern(self.pattern, self.data[index]) + return self.data[index] + def __setitem__(self, index, code): + self.data[index] = code + def insert(self, index, code): + self.data.insert(index, code) + def append(self, code): + self.data.append(code) + def getwidth(self): + # determine the width (min, max) for this subpattern + if self.width: + return self.width + lo = hi = 0 + UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY) + REPEATCODES = (MIN_REPEAT, MAX_REPEAT) + for op, av in self.data: + if op is BRANCH: + i = MAXREPEAT - 1 + j = 0 + for av in av[1]: + l, h = av.getwidth() + i = min(i, l) + j = max(j, h) + lo = lo + i + hi = hi + j + elif op is CALL: + i, j = av.getwidth() + lo = lo + i + hi = hi + j + elif op is SUBPATTERN: + i, j = av[1].getwidth() + lo = lo + i + hi = hi + j + elif op in REPEATCODES: + i, j = av[2].getwidth() + lo = lo + i * av[0] + hi = hi + j * av[1] + elif op in UNITCODES: + lo = lo + 1 + hi = hi + 1 + elif op == SUCCESS: + break + self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT) + return self.width + +class Tokenizer: + def __init__(self, string): + self.string = string + self.index = 0 + self.__next() + def __next(self): + if self.index >= len(self.string): + self.next = None + return + char = self.string[self.index] + if char[0] == "\\": + try: + c = self.string[self.index + 1] + except IndexError: + raise error, "bogus escape (end of line)" + char = char + c + self.index = self.index + len(char) + self.next = char + def match(self, char, skip=1): + if char == self.next: + if skip: + self.__next() + return 1 + return 0 + def get(self): + this = self.next + self.__next() + return this + def tell(self): + return self.index, self.next + def seek(self, index): + self.index, self.next = index + +def isident(char): + return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" + +def isdigit(char): + return "0" <= char <= "9" + +def isname(name): + # check that group name is a valid string + if not isident(name[0]): + return False + for char in name[1:]: + if not isident(char) and not isdigit(char): + return False + return True + +def _class_escape(source, escape): + # handle escape code inside character class + code = ESCAPES.get(escape) + if code: + return code + code = CATEGORIES.get(escape) + if code and code[0] == IN: + return code + try: + c = escape[1:2] + if c == "x": + # hexadecimal escape (exactly two digits) + while source.next in HEXDIGITS and len(escape) < 4: + escape = escape + source.get() + escape = escape[2:] + if len(escape) != 2: + raise error, "bogus escape: %s" % repr("\\" + escape) + return LITERAL, int(escape, 16) & 0xff + elif c in OCTDIGITS: + # octal escape (up to three digits) + while source.next in OCTDIGITS and len(escape) < 4: + escape = escape + source.get() + escape = escape[1:] + return LITERAL, int(escape, 8) & 0xff + elif c in DIGITS: + raise error, "bogus escape: %s" % repr(escape) + if len(escape) == 2: + return LITERAL, ord(escape[1]) + except ValueError: + pass + raise error, "bogus escape: %s" % repr(escape) + +def _escape(source, escape, state): + # handle escape code in expression + code = CATEGORIES.get(escape) + if code: + return code + code = ESCAPES.get(escape) + if code: + return code + try: + c = escape[1:2] + if c == "x": + # hexadecimal escape + while source.next in HEXDIGITS and len(escape) < 4: + escape = escape + source.get() + if len(escape) != 4: + raise ValueError + return LITERAL, int(escape[2:], 16) & 0xff + elif c == "0": + # octal escape + while source.next in OCTDIGITS and len(escape) < 4: + escape = escape + source.get() + return LITERAL, int(escape[1:], 8) & 0xff + elif c in DIGITS: + # octal escape *or* decimal group reference (sigh) + if source.next in DIGITS: + escape = escape + source.get() + if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and + source.next in OCTDIGITS): + # got three octal digits; this is an octal escape + escape = escape + source.get() + return LITERAL, int(escape[1:], 8) & 0xff + # not an octal escape, so this is a group reference + group = int(escape[1:]) + if group < state.groups: + if not state.checkgroup(group): + raise error, "cannot refer to open group" + if state.lookbehind: + import warnings + warnings.warn('group references in lookbehind ' + 'assertions are not supported', + RuntimeWarning) + return GROUPREF, group + raise ValueError + if len(escape) == 2: + return LITERAL, ord(escape[1]) + except ValueError: + pass + raise error, "bogus escape: %s" % repr(escape) + +def _parse_sub(source, state, nested=1): + # parse an alternation: a|b|c + + items = [] + itemsappend = items.append + sourcematch = source.match + while 1: + itemsappend(_parse(source, state)) + if sourcematch("|"): + continue + if not nested: + break + if not source.next or sourcematch(")", 0): + break + else: + raise error, "pattern not properly closed" + + if len(items) == 1: + return items[0] + + subpattern = SubPattern(state) + subpatternappend = subpattern.append + + # check if all items share a common prefix + while 1: + prefix = None + for item in items: + if not item: + break + if prefix is None: + prefix = item[0] + elif item[0] != prefix: + break + else: + # all subitems start with a common "prefix". + # move it out of the branch + for item in items: + del item[0] + subpatternappend(prefix) + continue # check next one + break + + # check if the branch can be replaced by a character set + for item in items: + if len(item) != 1 or item[0][0] != LITERAL: + break + else: + # we can store this as a character set instead of a + # branch (the compiler may optimize this even more) + set = [] + setappend = set.append + for item in items: + setappend(item[0]) + subpatternappend((IN, set)) + return subpattern + + subpattern.append((BRANCH, (None, items))) + return subpattern + +def _parse_sub_cond(source, state, condgroup): + item_yes = _parse(source, state) + if source.match("|"): + item_no = _parse(source, state) + if source.match("|"): + raise error, "conditional backref with more than two branches" + else: + item_no = None + if source.next and not source.match(")", 0): + raise error, "pattern not properly closed" + subpattern = SubPattern(state) + subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) + return subpattern + +_PATTERNENDERS = set("|)") +_ASSERTCHARS = set("=!<") +_LOOKBEHINDASSERTCHARS = set("=!") +_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT]) + +def _parse(source, state): + # parse a simple pattern + subpattern = SubPattern(state) + + # precompute constants into local variables + subpatternappend = subpattern.append + sourceget = source.get + sourcematch = source.match + _len = len + PATTERNENDERS = _PATTERNENDERS + ASSERTCHARS = _ASSERTCHARS + LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS + REPEATCODES = _REPEATCODES + + while 1: + + if source.next in PATTERNENDERS: + break # end of subpattern + this = sourceget() + if this is None: + break # end of pattern + + if state.flags & SRE_FLAG_VERBOSE: + # skip whitespace and comments + if this in WHITESPACE: + continue + if this == "#": + while 1: + this = sourceget() + if this in (None, "\n"): + break + continue + + if this and this[0] not in SPECIAL_CHARS: + subpatternappend((LITERAL, ord(this))) + + elif this == "[": + # character set + set = [] + setappend = set.append +## if sourcematch(":"): +## pass # handle character classes + if sourcematch("^"): + setappend((NEGATE, None)) + # check remaining characters + start = set[:] + while 1: + this = sourceget() + if this == "]" and set != start: + break + elif this and this[0] == "\\": + code1 = _class_escape(source, this) + elif this: + code1 = LITERAL, ord(this) + else: + raise error, "unexpected end of regular expression" + if sourcematch("-"): + # potential range + this = sourceget() + if this == "]": + if code1[0] is IN: + code1 = code1[1][0] + setappend(code1) + setappend((LITERAL, ord("-"))) + break + elif this: + if this[0] == "\\": + code2 = _class_escape(source, this) + else: + code2 = LITERAL, ord(this) + if code1[0] != LITERAL or code2[0] != LITERAL: + raise error, "bad character range" + lo = code1[1] + hi = code2[1] + if hi < lo: + raise error, "bad character range" + setappend((RANGE, (lo, hi))) + else: + raise error, "unexpected end of regular expression" + else: + if code1[0] is IN: + code1 = code1[1][0] + setappend(code1) + + # XXX: <fl> should move set optimization to compiler! + if _len(set)==1 and set[0][0] is LITERAL: + subpatternappend(set[0]) # optimization + elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL: + subpatternappend((NOT_LITERAL, set[1][1])) # optimization + else: + # XXX: <fl> should add charmap optimization here + subpatternappend((IN, set)) + + elif this and this[0] in REPEAT_CHARS: + # repeat previous item + if this == "?": + min, max = 0, 1 + elif this == "*": + min, max = 0, MAXREPEAT + + elif this == "+": + min, max = 1, MAXREPEAT + elif this == "{": + if source.next == "}": + subpatternappend((LITERAL, ord(this))) + continue + here = source.tell() + min, max = 0, MAXREPEAT + lo = hi = "" + while source.next in DIGITS: + lo = lo + source.get() + if sourcematch(","): + while source.next in DIGITS: + hi = hi + sourceget() + else: + hi = lo + if not sourcematch("}"): + subpatternappend((LITERAL, ord(this))) + source.seek(here) + continue + if lo: + min = int(lo) + if min >= MAXREPEAT: + raise OverflowError("the repetition number is too large") + if hi: + max = int(hi) + if max >= MAXREPEAT: + raise OverflowError("the repetition number is too large") + if max < min: + raise error("bad repeat interval") + else: + raise error, "not supported" + # figure out which item to repeat + if subpattern: + item = subpattern[-1:] + else: + item = None + if not item or (_len(item) == 1 and item[0][0] == AT): + raise error, "nothing to repeat" + if item[0][0] in REPEATCODES: + raise error, "multiple repeat" + if sourcematch("?"): + subpattern[-1] = (MIN_REPEAT, (min, max, item)) + else: + subpattern[-1] = (MAX_REPEAT, (min, max, item)) + + elif this == ".": + subpatternappend((ANY, None)) + + elif this == "(": + group = 1 + name = None + condgroup = None + if sourcematch("?"): + group = 0 + # options + if sourcematch("P"): + # python extensions + if sourcematch("<"): + # named group: skip forward to end of name + name = "" + while 1: + char = sourceget() + if char is None: + raise error, "unterminated name" + if char == ">": + break + name = name + char + group = 1 + if not name: + raise error("missing group name") + if not isname(name): + raise error("bad character in group name %r" % + name) + elif sourcematch("="): + # named backreference + name = "" + while 1: + char = sourceget() + if char is None: + raise error, "unterminated name" + if char == ")": + break + name = name + char + if not name: + raise error("missing group name") + if not isname(name): + raise error("bad character in backref group name " + "%r" % name) + gid = state.groupdict.get(name) + if gid is None: + msg = "unknown group name: {0!r}".format(name) + raise error(msg) + if state.lookbehind: + import warnings + warnings.warn('group references in lookbehind ' + 'assertions are not supported', + RuntimeWarning) + subpatternappend((GROUPREF, gid)) + continue + else: + char = sourceget() + if char is None: + raise error, "unexpected end of pattern" + raise error, "unknown specifier: ?P%s" % char + elif sourcematch(":"): + # non-capturing group + group = 2 + elif sourcematch("#"): + # comment + while 1: + if source.next is None or source.next == ")": + break + sourceget() + if not sourcematch(")"): + raise error, "unbalanced parenthesis" + continue + elif source.next in ASSERTCHARS: + # lookahead assertions + char = sourceget() + dir = 1 + if char == "<": + if source.next not in LOOKBEHINDASSERTCHARS: + raise error, "syntax error" + dir = -1 # lookbehind + char = sourceget() + state.lookbehind += 1 + p = _parse_sub(source, state) + if dir < 0: + state.lookbehind -= 1 + if not sourcematch(")"): + raise error, "unbalanced parenthesis" + if char == "=": + subpatternappend((ASSERT, (dir, p))) + else: + subpatternappend((ASSERT_NOT, (dir, p))) + continue + elif sourcematch("("): + # conditional backreference group + condname = "" + while 1: + char = sourceget() + if char is None: + raise error, "unterminated name" + if char == ")": + break + condname = condname + char + group = 2 + if not condname: + raise error("missing group name") + if isname(condname): + condgroup = state.groupdict.get(condname) + if condgroup is None: + msg = "unknown group name: {0!r}".format(condname) + raise error(msg) + else: + try: + condgroup = int(condname) + except ValueError: + raise error, "bad character in group name" + if state.lookbehind: + import warnings + warnings.warn('group references in lookbehind ' + 'assertions are not supported', + RuntimeWarning) + else: + # flags + if not source.next in FLAGS: + raise error, "unexpected end of pattern" + while source.next in FLAGS: + state.flags = state.flags | FLAGS[sourceget()] + if group: + # parse group contents + if group == 2: + # anonymous group + group = None + else: + group = state.opengroup(name) + if condgroup: + p = _parse_sub_cond(source, state, condgroup) + else: + p = _parse_sub(source, state) + if not sourcematch(")"): + raise error, "unbalanced parenthesis" + if group is not None: + state.closegroup(group) + subpatternappend((SUBPATTERN, (group, p))) + else: + while 1: + char = sourceget() + if char is None: + raise error, "unexpected end of pattern" + if char == ")": + break + raise error, "unknown extension" + + elif this == "^": + subpatternappend((AT, AT_BEGINNING)) + + elif this == "$": + subpattern.append((AT, AT_END)) + + elif this and this[0] == "\\": + code = _escape(source, this, state) + subpatternappend(code) + + else: + raise error, "parser error" + + return subpattern + +def parse(str, flags=0, pattern=None): + # parse 're' pattern into list of (opcode, argument) tuples + + source = Tokenizer(str) + + if pattern is None: + pattern = Pattern() + pattern.flags = flags + pattern.str = str + + p = _parse_sub(source, pattern, 0) + + tail = source.get() + if tail == ")": + raise error, "unbalanced parenthesis" + elif tail: + raise error, "bogus characters at end of regular expression" + + if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE: + # the VERBOSE flag was switched on inside the pattern. to be + # on the safe side, we'll parse the whole thing again... + return parse(str, p.pattern.flags) + + if flags & SRE_FLAG_DEBUG: + p.dump() + + return p + +def parse_template(source, pattern): + # parse 're' replacement string into list of literals and + # group references + s = Tokenizer(source) + sget = s.get + p = [] + a = p.append + def literal(literal, p=p, pappend=a): + if p and p[-1][0] is LITERAL: + p[-1] = LITERAL, p[-1][1] + literal + else: + pappend((LITERAL, literal)) + sep = source[:0] + if type(sep) is type(""): + makechar = chr + else: + makechar = unichr + while 1: + this = sget() + if this is None: + break # end of replacement string + if this and this[0] == "\\": + # group + c = this[1:2] + if c == "g": + name = "" + if s.match("<"): + while 1: + char = sget() + if char is None: + raise error, "unterminated group name" + if char == ">": + break + name = name + char + if not name: + raise error, "missing group name" + try: + index = int(name) + if index < 0: + raise error, "negative group number" + except ValueError: + if not isname(name): + raise error, "bad character in group name" + try: + index = pattern.groupindex[name] + except KeyError: + msg = "unknown group name: {0!r}".format(name) + raise IndexError(msg) + a((MARK, index)) + elif c == "0": + if s.next in OCTDIGITS: + this = this + sget() + if s.next in OCTDIGITS: + this = this + sget() + literal(makechar(int(this[1:], 8) & 0xff)) + elif c in DIGITS: + isoctal = False + if s.next in DIGITS: + this = this + sget() + if (c in OCTDIGITS and this[2] in OCTDIGITS and + s.next in OCTDIGITS): + this = this + sget() + isoctal = True + literal(makechar(int(this[1:], 8) & 0xff)) + if not isoctal: + a((MARK, int(this[1:]))) + else: + try: + this = makechar(ESCAPES[this][1]) + except KeyError: + pass + literal(this) + else: + literal(this) + # convert template to groups and literals lists + i = 0 + groups = [] + groupsappend = groups.append + literals = [None] * len(p) + for c, s in p: + if c is MARK: + groupsappend((i, s)) + # literal[i] is already None + else: + literals[i] = s + i = i + 1 + return groups, literals + +def expand_template(template, match): + g = match.group + sep = match.string[:0] + groups, literals = template + literals = literals[:] + try: + for index, group in groups: + literals[index] = s = g(group) + if s is None: + raise error, "unmatched group" + except IndexError: + raise error, "invalid group reference" + return sep.join(literals)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/stat.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,96 @@ +"""Constants/functions for interpreting results of os.stat() and os.lstat(). + +Suggested usage: from stat import * +""" + +# Indices for stat struct members in the tuple returned by os.stat() + +ST_MODE = 0 +ST_INO = 1 +ST_DEV = 2 +ST_NLINK = 3 +ST_UID = 4 +ST_GID = 5 +ST_SIZE = 6 +ST_ATIME = 7 +ST_MTIME = 8 +ST_CTIME = 9 + +# Extract bits from the mode + +def S_IMODE(mode): + return mode & 07777 + +def S_IFMT(mode): + return mode & 0170000 + +# Constants used as S_IFMT() for various file types +# (not all are implemented on all systems) + +S_IFDIR = 0040000 +S_IFCHR = 0020000 +S_IFBLK = 0060000 +S_IFREG = 0100000 +S_IFIFO = 0010000 +S_IFLNK = 0120000 +S_IFSOCK = 0140000 + +# Functions to test for each file type + +def S_ISDIR(mode): + return S_IFMT(mode) == S_IFDIR + +def S_ISCHR(mode): + return S_IFMT(mode) == S_IFCHR + +def S_ISBLK(mode): + return S_IFMT(mode) == S_IFBLK + +def S_ISREG(mode): + return S_IFMT(mode) == S_IFREG + +def S_ISFIFO(mode): + return S_IFMT(mode) == S_IFIFO + +def S_ISLNK(mode): + return S_IFMT(mode) == S_IFLNK + +def S_ISSOCK(mode): + return S_IFMT(mode) == S_IFSOCK + +# Names for permission bits + +S_ISUID = 04000 +S_ISGID = 02000 +S_ENFMT = S_ISGID +S_ISVTX = 01000 +S_IREAD = 00400 +S_IWRITE = 00200 +S_IEXEC = 00100 +S_IRWXU = 00700 +S_IRUSR = 00400 +S_IWUSR = 00200 +S_IXUSR = 00100 +S_IRWXG = 00070 +S_IRGRP = 00040 +S_IWGRP = 00020 +S_IXGRP = 00010 +S_IRWXO = 00007 +S_IROTH = 00004 +S_IWOTH = 00002 +S_IXOTH = 00001 + +# Names for file flags + +UF_NODUMP = 0x00000001 +UF_IMMUTABLE = 0x00000002 +UF_APPEND = 0x00000004 +UF_OPAQUE = 0x00000008 +UF_NOUNLINK = 0x00000010 +UF_COMPRESSED = 0x00000020 # OS X: file is hfs-compressed +UF_HIDDEN = 0x00008000 # OS X: file should not be displayed +SF_ARCHIVED = 0x00010000 +SF_IMMUTABLE = 0x00020000 +SF_APPEND = 0x00040000 +SF_NOUNLINK = 0x00100000 +SF_SNAPSHOT = 0x00200000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/types.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,86 @@ +"""Define names for all type symbols known in the standard interpreter. + +Types that are part of optional modules (e.g. array) are not listed. +""" +import sys + +# Iterators in Python aren't a matter of type but of protocol. A large +# and changing number of builtin types implement *some* flavor of +# iterator. Don't check the type! Use hasattr to check for both +# "__iter__" and "next" attributes instead. + +NoneType = type(None) +TypeType = type +ObjectType = object + +IntType = int +LongType = long +FloatType = float +BooleanType = bool +try: + ComplexType = complex +except NameError: + pass + +StringType = str + +# StringTypes is already outdated. Instead of writing "type(x) in +# types.StringTypes", you should use "isinstance(x, basestring)". But +# we keep around for compatibility with Python 2.2. +try: + UnicodeType = unicode + StringTypes = (StringType, UnicodeType) +except NameError: + StringTypes = (StringType,) + +BufferType = buffer + +TupleType = tuple +ListType = list +DictType = DictionaryType = dict + +def _f(): pass +FunctionType = type(_f) +LambdaType = type(lambda: None) # Same as FunctionType +CodeType = type(_f.func_code) + +def _g(): + yield 1 +GeneratorType = type(_g()) + +class _C: + def _m(self): pass +ClassType = type(_C) +UnboundMethodType = type(_C._m) # Same as MethodType +_x = _C() +InstanceType = type(_x) +MethodType = type(_x._m) + +BuiltinFunctionType = type(len) +BuiltinMethodType = type([].append) # Same as BuiltinFunctionType + +ModuleType = type(sys) +FileType = file +XRangeType = xrange + +try: + raise TypeError +except TypeError: + tb = sys.exc_info()[2] + TracebackType = type(tb) + FrameType = type(tb.tb_frame) + del tb + +SliceType = slice +EllipsisType = type(Ellipsis) + +DictProxyType = type(TypeType.__dict__) +NotImplementedType = type(NotImplemented) + +# For Jython, the following two types are identical +GetSetDescriptorType = type(FunctionType.func_code) +MemberDescriptorType = type(FunctionType.func_globals) + +del sys, _f, _g, _C, _x # Not for export + +__all__ = list(n for n in globals() if n[:1] != '_')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/lib/python2.7/warnings.py Thu May 18 18:37:28 2017 -0400 @@ -0,0 +1,422 @@ +"""Python part of the warnings subsystem.""" + +# Note: function level imports should *not* be used +# in this module as it may cause import lock deadlock. +# See bug 683658. +import linecache +import sys +import types + +__all__ = ["warn", "warn_explicit", "showwarning", + "formatwarning", "filterwarnings", "simplefilter", + "resetwarnings", "catch_warnings"] + + +def warnpy3k(message, category=None, stacklevel=1): + """Issue a deprecation warning for Python 3.x related changes. + + Warnings are omitted unless Python is started with the -3 option. + """ + if sys.py3kwarning: + if category is None: + category = DeprecationWarning + warn(message, category, stacklevel+1) + +def _show_warning(message, category, filename, lineno, file=None, line=None): + """Hook to write a warning to a file; replace if you like.""" + if file is None: + file = sys.stderr + if file is None: + # sys.stderr is None - warnings get lost + return + try: + file.write(formatwarning(message, category, filename, lineno, line)) + except (IOError, UnicodeError): + pass # the file (probably stderr) is invalid - this warning gets lost. +# Keep a working version around in case the deprecation of the old API is +# triggered. +showwarning = _show_warning + +def formatwarning(message, category, filename, lineno, line=None): + """Function to format a warning the standard way.""" + try: + unicodetype = unicode + except NameError: + unicodetype = () + try: + message = str(message) + except UnicodeEncodeError: + pass + s = "%s: %s: %s\n" % (lineno, category.__name__, message) + line = linecache.getline(filename, lineno) if line is None else line + if line: + line = line.strip() + if isinstance(s, unicodetype) and isinstance(line, str): + line = unicode(line, 'latin1') + s += " %s\n" % line + if isinstance(s, unicodetype) and isinstance(filename, str): + enc = sys.getfilesystemencoding() + if enc: + try: + filename = unicode(filename, enc) + except UnicodeDecodeError: + pass + s = "%s:%s" % (filename, s) + return s + +def filterwarnings(action, message="", category=Warning, module="", lineno=0, + append=0): + """Insert an entry into the list of warnings filters (at the front). + + 'action' -- one of "error", "ignore", "always", "default", "module", + or "once" + 'message' -- a regex that the warning message must match + 'category' -- a class that the warning must be a subclass of + 'module' -- a regex that the module name must match + 'lineno' -- an integer line number, 0 matches all warnings + 'append' -- if true, append to the list of filters + """ + import re + assert action in ("error", "ignore", "always", "default", "module", + "once"), "invalid action: %r" % (action,) + assert isinstance(message, basestring), "message must be a string" + assert isinstance(category, (type, types.ClassType)), \ + "category must be a class" + assert issubclass(category, Warning), "category must be a Warning subclass" + assert isinstance(module, basestring), "module must be a string" + assert isinstance(lineno, int) and lineno >= 0, \ + "lineno must be an int >= 0" + item = (action, re.compile(message, re.I), category, + re.compile(module), lineno) + if append: + filters.append(item) + else: + filters.insert(0, item) + +def simplefilter(action, category=Warning, lineno=0, append=0): + """Insert a simple entry into the list of warnings filters (at the front). + + A simple filter matches all modules and messages. + 'action' -- one of "error", "ignore", "always", "default", "module", + or "once" + 'category' -- a class that the warning must be a subclass of + 'lineno' -- an integer line number, 0 matches all warnings + 'append' -- if true, append to the list of filters + """ + assert action in ("error", "ignore", "always", "default", "module", + "once"), "invalid action: %r" % (action,) + assert isinstance(lineno, int) and lineno >= 0, \ + "lineno must be an int >= 0" + item = (action, None, category, None, lineno) + if append: + filters.append(item) + else: + filters.insert(0, item) + +def resetwarnings(): + """Clear the list of warning filters, so that no filters are active.""" + filters[:] = [] + +class _OptionError(Exception): + """Exception used by option processing helpers.""" + pass + +# Helper to process -W options passed via sys.warnoptions +def _processoptions(args): + for arg in args: + try: + _setoption(arg) + except _OptionError, msg: + print >>sys.stderr, "Invalid -W option ignored:", msg + +# Helper for _processoptions() +def _setoption(arg): + import re + parts = arg.split(':') + if len(parts) > 5: + raise _OptionError("too many fields (max 5): %r" % (arg,)) + while len(parts) < 5: + parts.append('') + action, message, category, module, lineno = [s.strip() + for s in parts] + action = _getaction(action) + message = re.escape(message) + category = _getcategory(category) + module = re.escape(module) + if module: + module = module + '$' + if lineno: + try: + lineno = int(lineno) + if lineno < 0: + raise ValueError + except (ValueError, OverflowError): + raise _OptionError("invalid lineno %r" % (lineno,)) + else: + lineno = 0 + filterwarnings(action, message, category, module, lineno) + +# Helper for _setoption() +def _getaction(action): + if not action: + return "default" + if action == "all": return "always" # Alias + for a in ('default', 'always', 'ignore', 'module', 'once', 'error'): + if a.startswith(action): + return a + raise _OptionError("invalid action: %r" % (action,)) + +# Helper for _setoption() +def _getcategory(category): + import re + if not category: + return Warning + if re.match("^[a-zA-Z0-9_]+$", category): + try: + cat = eval(category) + except NameError: + raise _OptionError("unknown warning category: %r" % (category,)) + else: + i = category.rfind(".") + module = category[:i] + klass = category[i+1:] + try: + m = __import__(module, None, None, [klass]) + except ImportError: + raise _OptionError("invalid module name: %r" % (module,)) + try: + cat = getattr(m, klass) + except AttributeError: + raise _OptionError("unknown warning category: %r" % (category,)) + if not issubclass(cat, Warning): + raise _OptionError("invalid warning category: %r" % (category,)) + return cat + + +# Code typically replaced by _warnings +def warn(message, category=None, stacklevel=1): + """Issue a warning, or maybe ignore it or raise an exception.""" + # Check if message is already a Warning object + if isinstance(message, Warning): + category = message.__class__ + # Check category argument + if category is None: + category = UserWarning + assert issubclass(category, Warning) + # Get context information + try: + caller = sys._getframe(stacklevel) + except ValueError: + globals = sys.__dict__ + lineno = 1 + else: + globals = caller.f_globals + lineno = caller.f_lineno + if '__name__' in globals: + module = globals['__name__'] + else: + module = "<string>" + filename = globals.get('__file__') + if filename: + fnl = filename.lower() + if fnl.endswith((".pyc", ".pyo")): + filename = filename[:-1] + else: + if module == "__main__": + try: + filename = sys.argv[0] + except AttributeError: + # embedded interpreters don't have sys.argv, see bug #839151 + filename = '__main__' + if not filename: + filename = module + registry = globals.setdefault("__warningregistry__", {}) + warn_explicit(message, category, filename, lineno, module, registry, + globals) + +def warn_explicit(message, category, filename, lineno, + module=None, registry=None, module_globals=None): + lineno = int(lineno) + if module is None: + module = filename or "<unknown>" + if module[-3:].lower() == ".py": + module = module[:-3] # XXX What about leading pathname? + if registry is None: + registry = {} + if isinstance(message, Warning): + text = str(message) + category = message.__class__ + else: + text = message + message = category(message) + key = (text, category, lineno) + # Quick test for common case + if registry.get(key): + return + # Search the filters + for item in filters: + action, msg, cat, mod, ln = item + if ((msg is None or msg.match(text)) and + issubclass(category, cat) and + (mod is None or mod.match(module)) and + (ln == 0 or lineno == ln)): + break + else: + action = defaultaction + # Early exit actions + if action == "ignore": + registry[key] = 1 + return + + # Prime the linecache for formatting, in case the + # "file" is actually in a zipfile or something. + linecache.getlines(filename, module_globals) + + if action == "error": + raise message + # Other actions + if action == "once": + registry[key] = 1 + oncekey = (text, category) + if onceregistry.get(oncekey): + return + onceregistry[oncekey] = 1 + elif action == "always": + pass + elif action == "module": + registry[key] = 1 + altkey = (text, category, 0) + if registry.get(altkey): + return + registry[altkey] = 1 + elif action == "default": + registry[key] = 1 + else: + # Unrecognized actions are errors + raise RuntimeError( + "Unrecognized action (%r) in warnings.filters:\n %s" % + (action, item)) + # Print message and context + showwarning(message, category, filename, lineno) + + +class WarningMessage(object): + + """Holds the result of a single showwarning() call.""" + + _WARNING_DETAILS = ("message", "category", "filename", "lineno", "file", + "line") + + def __init__(self, message, category, filename, lineno, file=None, + line=None): + local_values = locals() + for attr in self._WARNING_DETAILS: + setattr(self, attr, local_values[attr]) + self._category_name = category.__name__ if category else None + + def __str__(self): + return ("{message : %r, category : %r, filename : %r, lineno : %s, " + "line : %r}" % (self.message, self._category_name, + self.filename, self.lineno, self.line)) + + +class catch_warnings(object): + + """A context manager that copies and restores the warnings filter upon + exiting the context. + + The 'record' argument specifies whether warnings should be captured by a + custom implementation of warnings.showwarning() and be appended to a list + returned by the context manager. Otherwise None is returned by the context + manager. The objects appended to the list are arguments whose attributes + mirror the arguments to showwarning(). + + The 'module' argument is to specify an alternative module to the module + named 'warnings' and imported under that name. This argument is only useful + when testing the warnings module itself. + + """ + + def __init__(self, record=False, module=None): + """Specify whether to record warnings and if an alternative module + should be used other than sys.modules['warnings']. + + For compatibility with Python 3.0, please consider all arguments to be + keyword-only. + + """ + self._record = record + self._module = sys.modules['warnings'] if module is None else module + self._entered = False + + def __repr__(self): + args = [] + if self._record: + args.append("record=True") + if self._module is not sys.modules['warnings']: + args.append("module=%r" % self._module) + name = type(self).__name__ + return "%s(%s)" % (name, ", ".join(args)) + + def __enter__(self): + if self._entered: + raise RuntimeError("Cannot enter %r twice" % self) + self._entered = True + self._filters = self._module.filters + self._module.filters = self._filters[:] + self._showwarning = self._module.showwarning + if self._record: + log = [] + def showwarning(*args, **kwargs): + log.append(WarningMessage(*args, **kwargs)) + self._module.showwarning = showwarning + return log + else: + return None + + def __exit__(self, *exc_info): + if not self._entered: + raise RuntimeError("Cannot exit %r without entering first" % self) + self._module.filters = self._filters + self._module.showwarning = self._showwarning + + +# filters contains a sequence of filter 5-tuples +# The components of the 5-tuple are: +# - an action: error, ignore, always, default, module, or once +# - a compiled regex that must match the warning message +# - a class representing the warning category +# - a compiled regex that must match the module that is being warned +# - a line number for the line being warning, or 0 to mean any line +# If either if the compiled regexs are None, match anything. +_warnings_defaults = False +try: + from _warnings import (filters, default_action, once_registry, + warn, warn_explicit) + defaultaction = default_action + onceregistry = once_registry + _warnings_defaults = True +except ImportError: + filters = [] + defaultaction = "default" + onceregistry = {} + + +# Module initialization +_processoptions(sys.warnoptions) +if not _warnings_defaults: + silence = [ImportWarning, PendingDeprecationWarning] + # Don't silence DeprecationWarning if -3 or -Q was used. + if not sys.py3kwarning and not sys.flags.division_warning: + silence.append(DeprecationWarning) + for cls in silence: + simplefilter("ignore", category=cls) + bytes_warning = sys.flags.bytes_warning + if bytes_warning > 1: + bytes_action = "error" + elif bytes_warning: + bytes_action = "default" + else: + bytes_action = "ignore" + simplefilter(bytes_action, category=BytesWarning, append=1) +del _warnings_defaults
--- a/tool_dependencies.xml Thu May 18 17:49:58 2017 -0400 +++ b/tool_dependencies.xml Thu May 18 18:37:28 2017 -0400 @@ -68,17 +68,13 @@ <package name="jbrowse_tools" version="1.12.1"> <install version="1.0"> <actions_group> - <action type="setup_perl_environment"> - <repository name="package_perl_5_18" owner="iuc"> - <package name="perl" version="5.18.1" /> - </repository> - <package>XML::Parser</package> - <package>http://search.cpan.org/CPAN/authors/id/C/CJ/CJFIELDS/BioPerl-1.6.922.tar.gz</package> - </action> <actions architecture="x86_64" os="linux"> <action type="download_by_url">http://jbrowse.org/wordpress/wp-content/plugins/download-monitor/download.php?id=105</action> <action type="make_directory">$INSTALL_DIR/jbrowse</action> <action type="shell_command"> + if [ -d "$TMP_WORK_DIR" ] ; then export HOME=$TMP_WORK_DIR ; else export HOME=`mktemp -d` ; fi ; export PATH=$INSTALL_DIR/bin/:$PATH; perl Makefile.PL --bootstrap=$INSTALL_DIR/local-lib/ --no-manpages + </action> + <action type="shell_command"> ./setup.sh </action> <action type="move_directory_files">