Mercurial > repos > yating-l > jbrowsearchivecreator
comparison test/lib/python2.7/encodings/utf_8_sig.py @ 3:7d1a9a91b989 draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
| author | yating-l |
|---|---|
| date | Thu, 18 May 2017 18:37:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:3e2160197902 | 3:7d1a9a91b989 |
|---|---|
| 1 """ Python 'utf-8-sig' Codec | |
| 2 This work similar to UTF-8 with the following changes: | |
| 3 | |
| 4 * On encoding/writing a UTF-8 encoded BOM will be prepended/written as the | |
| 5 first three bytes. | |
| 6 | |
| 7 * On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these | |
| 8 bytes will be skipped. | |
| 9 """ | |
| 10 import codecs | |
| 11 | |
| 12 ### Codec APIs | |
| 13 | |
| 14 def encode(input, errors='strict'): | |
| 15 return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input)) | |
| 16 | |
| 17 def decode(input, errors='strict'): | |
| 18 prefix = 0 | |
| 19 if input[:3] == codecs.BOM_UTF8: | |
| 20 input = input[3:] | |
| 21 prefix = 3 | |
| 22 (output, consumed) = codecs.utf_8_decode(input, errors, True) | |
| 23 return (output, consumed+prefix) | |
| 24 | |
| 25 class IncrementalEncoder(codecs.IncrementalEncoder): | |
| 26 def __init__(self, errors='strict'): | |
| 27 codecs.IncrementalEncoder.__init__(self, errors) | |
| 28 self.first = 1 | |
| 29 | |
| 30 def encode(self, input, final=False): | |
| 31 if self.first: | |
| 32 self.first = 0 | |
| 33 return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0] | |
| 34 else: | |
| 35 return codecs.utf_8_encode(input, self.errors)[0] | |
| 36 | |
| 37 def reset(self): | |
| 38 codecs.IncrementalEncoder.reset(self) | |
| 39 self.first = 1 | |
| 40 | |
| 41 def getstate(self): | |
| 42 return self.first | |
| 43 | |
| 44 def setstate(self, state): | |
| 45 self.first = state | |
| 46 | |
| 47 class IncrementalDecoder(codecs.BufferedIncrementalDecoder): | |
| 48 def __init__(self, errors='strict'): | |
| 49 codecs.BufferedIncrementalDecoder.__init__(self, errors) | |
| 50 self.first = True | |
| 51 | |
| 52 def _buffer_decode(self, input, errors, final): | |
| 53 if self.first: | |
| 54 if len(input) < 3: | |
| 55 if codecs.BOM_UTF8.startswith(input): | |
| 56 # not enough data to decide if this really is a BOM | |
| 57 # => try again on the next call | |
| 58 return (u"", 0) | |
| 59 else: | |
| 60 self.first = None | |
| 61 else: | |
| 62 self.first = None | |
| 63 if input[:3] == codecs.BOM_UTF8: | |
| 64 (output, consumed) = codecs.utf_8_decode(input[3:], errors, final) | |
| 65 return (output, consumed+3) | |
| 66 return codecs.utf_8_decode(input, errors, final) | |
| 67 | |
| 68 def reset(self): | |
| 69 codecs.BufferedIncrementalDecoder.reset(self) | |
| 70 self.first = True | |
| 71 | |
| 72 class StreamWriter(codecs.StreamWriter): | |
| 73 def reset(self): | |
| 74 codecs.StreamWriter.reset(self) | |
| 75 try: | |
| 76 del self.encode | |
| 77 except AttributeError: | |
| 78 pass | |
| 79 | |
| 80 def encode(self, input, errors='strict'): | |
| 81 self.encode = codecs.utf_8_encode | |
| 82 return encode(input, errors) | |
| 83 | |
| 84 class StreamReader(codecs.StreamReader): | |
| 85 def reset(self): | |
| 86 codecs.StreamReader.reset(self) | |
| 87 try: | |
| 88 del self.decode | |
| 89 except AttributeError: | |
| 90 pass | |
| 91 | |
| 92 def decode(self, input, errors='strict'): | |
| 93 if len(input) < 3: | |
| 94 if codecs.BOM_UTF8.startswith(input): | |
| 95 # not enough data to decide if this is a BOM | |
| 96 # => try again on the next call | |
| 97 return (u"", 0) | |
| 98 elif input[:3] == codecs.BOM_UTF8: | |
| 99 self.decode = codecs.utf_8_decode | |
| 100 (output, consumed) = codecs.utf_8_decode(input[3:],errors) | |
| 101 return (output, consumed+3) | |
| 102 # (else) no BOM present | |
| 103 self.decode = codecs.utf_8_decode | |
| 104 return codecs.utf_8_decode(input, errors) | |
| 105 | |
| 106 ### encodings module API | |
| 107 | |
| 108 def getregentry(): | |
| 109 return codecs.CodecInfo( | |
| 110 name='utf-8-sig', | |
| 111 encode=encode, | |
| 112 decode=decode, | |
| 113 incrementalencoder=IncrementalEncoder, | |
| 114 incrementaldecoder=IncrementalDecoder, | |
| 115 streamreader=StreamReader, | |
| 116 streamwriter=StreamWriter, | |
| 117 ) |
