Mercurial > repos > yating-l > jbrowsearchivecreator
comparison test/lib/python2.7/encodings/punycode.py @ 3:7d1a9a91b989 draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
| author | yating-l |
|---|---|
| date | Thu, 18 May 2017 18:37:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:3e2160197902 | 3:7d1a9a91b989 |
|---|---|
| 1 # -*- coding: iso-8859-1 -*- | |
| 2 """ Codec for the Punicode encoding, as specified in RFC 3492 | |
| 3 | |
| 4 Written by Martin v. Löwis. | |
| 5 """ | |
| 6 | |
| 7 import codecs | |
| 8 | |
| 9 ##################### Encoding ##################################### | |
| 10 | |
| 11 def segregate(str): | |
| 12 """3.1 Basic code point segregation""" | |
| 13 base = [] | |
| 14 extended = {} | |
| 15 for c in str: | |
| 16 if ord(c) < 128: | |
| 17 base.append(c) | |
| 18 else: | |
| 19 extended[c] = 1 | |
| 20 extended = extended.keys() | |
| 21 extended.sort() | |
| 22 return "".join(base).encode("ascii"),extended | |
| 23 | |
| 24 def selective_len(str, max): | |
| 25 """Return the length of str, considering only characters below max.""" | |
| 26 res = 0 | |
| 27 for c in str: | |
| 28 if ord(c) < max: | |
| 29 res += 1 | |
| 30 return res | |
| 31 | |
| 32 def selective_find(str, char, index, pos): | |
| 33 """Return a pair (index, pos), indicating the next occurrence of | |
| 34 char in str. index is the position of the character considering | |
| 35 only ordinals up to and including char, and pos is the position in | |
| 36 the full string. index/pos is the starting position in the full | |
| 37 string.""" | |
| 38 | |
| 39 l = len(str) | |
| 40 while 1: | |
| 41 pos += 1 | |
| 42 if pos == l: | |
| 43 return (-1, -1) | |
| 44 c = str[pos] | |
| 45 if c == char: | |
| 46 return index+1, pos | |
| 47 elif c < char: | |
| 48 index += 1 | |
| 49 | |
| 50 def insertion_unsort(str, extended): | |
| 51 """3.2 Insertion unsort coding""" | |
| 52 oldchar = 0x80 | |
| 53 result = [] | |
| 54 oldindex = -1 | |
| 55 for c in extended: | |
| 56 index = pos = -1 | |
| 57 char = ord(c) | |
| 58 curlen = selective_len(str, char) | |
| 59 delta = (curlen+1) * (char - oldchar) | |
| 60 while 1: | |
| 61 index,pos = selective_find(str,c,index,pos) | |
| 62 if index == -1: | |
| 63 break | |
| 64 delta += index - oldindex | |
| 65 result.append(delta-1) | |
| 66 oldindex = index | |
| 67 delta = 0 | |
| 68 oldchar = char | |
| 69 | |
| 70 return result | |
| 71 | |
| 72 def T(j, bias): | |
| 73 # Punycode parameters: tmin = 1, tmax = 26, base = 36 | |
| 74 res = 36 * (j + 1) - bias | |
| 75 if res < 1: return 1 | |
| 76 if res > 26: return 26 | |
| 77 return res | |
| 78 | |
| 79 digits = "abcdefghijklmnopqrstuvwxyz0123456789" | |
| 80 def generate_generalized_integer(N, bias): | |
| 81 """3.3 Generalized variable-length integers""" | |
| 82 result = [] | |
| 83 j = 0 | |
| 84 while 1: | |
| 85 t = T(j, bias) | |
| 86 if N < t: | |
| 87 result.append(digits[N]) | |
| 88 return result | |
| 89 result.append(digits[t + ((N - t) % (36 - t))]) | |
| 90 N = (N - t) // (36 - t) | |
| 91 j += 1 | |
| 92 | |
| 93 def adapt(delta, first, numchars): | |
| 94 if first: | |
| 95 delta //= 700 | |
| 96 else: | |
| 97 delta //= 2 | |
| 98 delta += delta // numchars | |
| 99 # ((base - tmin) * tmax) // 2 == 455 | |
| 100 divisions = 0 | |
| 101 while delta > 455: | |
| 102 delta = delta // 35 # base - tmin | |
| 103 divisions += 36 | |
| 104 bias = divisions + (36 * delta // (delta + 38)) | |
| 105 return bias | |
| 106 | |
| 107 | |
| 108 def generate_integers(baselen, deltas): | |
| 109 """3.4 Bias adaptation""" | |
| 110 # Punycode parameters: initial bias = 72, damp = 700, skew = 38 | |
| 111 result = [] | |
| 112 bias = 72 | |
| 113 for points, delta in enumerate(deltas): | |
| 114 s = generate_generalized_integer(delta, bias) | |
| 115 result.extend(s) | |
| 116 bias = adapt(delta, points==0, baselen+points+1) | |
| 117 return "".join(result) | |
| 118 | |
| 119 def punycode_encode(text): | |
| 120 base, extended = segregate(text) | |
| 121 base = base.encode("ascii") | |
| 122 deltas = insertion_unsort(text, extended) | |
| 123 extended = generate_integers(len(base), deltas) | |
| 124 if base: | |
| 125 return base + "-" + extended | |
| 126 return extended | |
| 127 | |
| 128 ##################### Decoding ##################################### | |
| 129 | |
| 130 def decode_generalized_number(extended, extpos, bias, errors): | |
| 131 """3.3 Generalized variable-length integers""" | |
| 132 result = 0 | |
| 133 w = 1 | |
| 134 j = 0 | |
| 135 while 1: | |
| 136 try: | |
| 137 char = ord(extended[extpos]) | |
| 138 except IndexError: | |
| 139 if errors == "strict": | |
| 140 raise UnicodeError, "incomplete punicode string" | |
| 141 return extpos + 1, None | |
| 142 extpos += 1 | |
| 143 if 0x41 <= char <= 0x5A: # A-Z | |
| 144 digit = char - 0x41 | |
| 145 elif 0x30 <= char <= 0x39: | |
| 146 digit = char - 22 # 0x30-26 | |
| 147 elif errors == "strict": | |
| 148 raise UnicodeError("Invalid extended code point '%s'" | |
| 149 % extended[extpos]) | |
| 150 else: | |
| 151 return extpos, None | |
| 152 t = T(j, bias) | |
| 153 result += digit * w | |
| 154 if digit < t: | |
| 155 return extpos, result | |
| 156 w = w * (36 - t) | |
| 157 j += 1 | |
| 158 | |
| 159 | |
| 160 def insertion_sort(base, extended, errors): | |
| 161 """3.2 Insertion unsort coding""" | |
| 162 char = 0x80 | |
| 163 pos = -1 | |
| 164 bias = 72 | |
| 165 extpos = 0 | |
| 166 while extpos < len(extended): | |
| 167 newpos, delta = decode_generalized_number(extended, extpos, | |
| 168 bias, errors) | |
| 169 if delta is None: | |
| 170 # There was an error in decoding. We can't continue because | |
| 171 # synchronization is lost. | |
| 172 return base | |
| 173 pos += delta+1 | |
| 174 char += pos // (len(base) + 1) | |
| 175 if char > 0x10FFFF: | |
| 176 if errors == "strict": | |
| 177 raise UnicodeError, ("Invalid character U+%x" % char) | |
| 178 char = ord('?') | |
| 179 pos = pos % (len(base) + 1) | |
| 180 base = base[:pos] + unichr(char) + base[pos:] | |
| 181 bias = adapt(delta, (extpos == 0), len(base)) | |
| 182 extpos = newpos | |
| 183 return base | |
| 184 | |
| 185 def punycode_decode(text, errors): | |
| 186 pos = text.rfind("-") | |
| 187 if pos == -1: | |
| 188 base = "" | |
| 189 extended = text | |
| 190 else: | |
| 191 base = text[:pos] | |
| 192 extended = text[pos+1:] | |
| 193 base = unicode(base, "ascii", errors) | |
| 194 extended = extended.upper() | |
| 195 return insertion_sort(base, extended, errors) | |
| 196 | |
| 197 ### Codec APIs | |
| 198 | |
| 199 class Codec(codecs.Codec): | |
| 200 | |
| 201 def encode(self,input,errors='strict'): | |
| 202 res = punycode_encode(input) | |
| 203 return res, len(input) | |
| 204 | |
| 205 def decode(self,input,errors='strict'): | |
| 206 if errors not in ('strict', 'replace', 'ignore'): | |
| 207 raise UnicodeError, "Unsupported error handling "+errors | |
| 208 res = punycode_decode(input, errors) | |
| 209 return res, len(input) | |
| 210 | |
| 211 class IncrementalEncoder(codecs.IncrementalEncoder): | |
| 212 def encode(self, input, final=False): | |
| 213 return punycode_encode(input) | |
| 214 | |
| 215 class IncrementalDecoder(codecs.IncrementalDecoder): | |
| 216 def decode(self, input, final=False): | |
| 217 if self.errors not in ('strict', 'replace', 'ignore'): | |
| 218 raise UnicodeError, "Unsupported error handling "+self.errors | |
| 219 return punycode_decode(input, self.errors) | |
| 220 | |
| 221 class StreamWriter(Codec,codecs.StreamWriter): | |
| 222 pass | |
| 223 | |
| 224 class StreamReader(Codec,codecs.StreamReader): | |
| 225 pass | |
| 226 | |
| 227 ### encodings module API | |
| 228 | |
| 229 def getregentry(): | |
| 230 return codecs.CodecInfo( | |
| 231 name='punycode', | |
| 232 encode=Codec().encode, | |
| 233 decode=Codec().decode, | |
| 234 incrementalencoder=IncrementalEncoder, | |
| 235 incrementaldecoder=IncrementalDecoder, | |
| 236 streamwriter=StreamWriter, | |
| 237 streamreader=StreamReader, | |
| 238 ) |
