comparison test/lib/python2.7/encodings/utf_8_sig.py @ 3:7d1a9a91b989 draft

planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
author yating-l
date Thu, 18 May 2017 18:37:28 -0400
parents
children
comparison
equal deleted inserted replaced
2:3e2160197902 3:7d1a9a91b989
1 """ Python 'utf-8-sig' Codec
2 This work similar to UTF-8 with the following changes:
3
4 * On encoding/writing a UTF-8 encoded BOM will be prepended/written as the
5 first three bytes.
6
7 * On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these
8 bytes will be skipped.
9 """
10 import codecs
11
12 ### Codec APIs
13
14 def encode(input, errors='strict'):
15 return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input))
16
17 def decode(input, errors='strict'):
18 prefix = 0
19 if input[:3] == codecs.BOM_UTF8:
20 input = input[3:]
21 prefix = 3
22 (output, consumed) = codecs.utf_8_decode(input, errors, True)
23 return (output, consumed+prefix)
24
25 class IncrementalEncoder(codecs.IncrementalEncoder):
26 def __init__(self, errors='strict'):
27 codecs.IncrementalEncoder.__init__(self, errors)
28 self.first = 1
29
30 def encode(self, input, final=False):
31 if self.first:
32 self.first = 0
33 return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0]
34 else:
35 return codecs.utf_8_encode(input, self.errors)[0]
36
37 def reset(self):
38 codecs.IncrementalEncoder.reset(self)
39 self.first = 1
40
41 def getstate(self):
42 return self.first
43
44 def setstate(self, state):
45 self.first = state
46
47 class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
48 def __init__(self, errors='strict'):
49 codecs.BufferedIncrementalDecoder.__init__(self, errors)
50 self.first = True
51
52 def _buffer_decode(self, input, errors, final):
53 if self.first:
54 if len(input) < 3:
55 if codecs.BOM_UTF8.startswith(input):
56 # not enough data to decide if this really is a BOM
57 # => try again on the next call
58 return (u"", 0)
59 else:
60 self.first = None
61 else:
62 self.first = None
63 if input[:3] == codecs.BOM_UTF8:
64 (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
65 return (output, consumed+3)
66 return codecs.utf_8_decode(input, errors, final)
67
68 def reset(self):
69 codecs.BufferedIncrementalDecoder.reset(self)
70 self.first = True
71
72 class StreamWriter(codecs.StreamWriter):
73 def reset(self):
74 codecs.StreamWriter.reset(self)
75 try:
76 del self.encode
77 except AttributeError:
78 pass
79
80 def encode(self, input, errors='strict'):
81 self.encode = codecs.utf_8_encode
82 return encode(input, errors)
83
84 class StreamReader(codecs.StreamReader):
85 def reset(self):
86 codecs.StreamReader.reset(self)
87 try:
88 del self.decode
89 except AttributeError:
90 pass
91
92 def decode(self, input, errors='strict'):
93 if len(input) < 3:
94 if codecs.BOM_UTF8.startswith(input):
95 # not enough data to decide if this is a BOM
96 # => try again on the next call
97 return (u"", 0)
98 elif input[:3] == codecs.BOM_UTF8:
99 self.decode = codecs.utf_8_decode
100 (output, consumed) = codecs.utf_8_decode(input[3:],errors)
101 return (output, consumed+3)
102 # (else) no BOM present
103 self.decode = codecs.utf_8_decode
104 return codecs.utf_8_decode(input, errors)
105
106 ### encodings module API
107
108 def getregentry():
109 return codecs.CodecInfo(
110 name='utf-8-sig',
111 encode=encode,
112 decode=decode,
113 incrementalencoder=IncrementalEncoder,
114 incrementaldecoder=IncrementalDecoder,
115 streamreader=StreamReader,
116 streamwriter=StreamWriter,
117 )