Mercurial > repos > bcclaywell > argo_navis
annotate venv/lib/python2.7/site-packages/requests/packages/chardet/chardistribution.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
rev | line source |
---|---|
0
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
1 ######################## BEGIN LICENSE BLOCK ######################## |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
2 # The Original Code is Mozilla Communicator client code. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
3 # |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
4 # The Initial Developer of the Original Code is |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
5 # Netscape Communications Corporation. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
6 # Portions created by the Initial Developer are Copyright (C) 1998 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
7 # the Initial Developer. All Rights Reserved. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
8 # |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
9 # Contributor(s): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
10 # Mark Pilgrim - port to Python |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
11 # |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
12 # This library is free software; you can redistribute it and/or |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
13 # modify it under the terms of the GNU Lesser General Public |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
14 # License as published by the Free Software Foundation; either |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
15 # version 2.1 of the License, or (at your option) any later version. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
16 # |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
17 # This library is distributed in the hope that it will be useful, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
20 # Lesser General Public License for more details. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
21 # |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
22 # You should have received a copy of the GNU Lesser General Public |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
23 # License along with this library; if not, write to the Free Software |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
25 # 02110-1301 USA |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
26 ######################### END LICENSE BLOCK ######################### |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
27 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
28 from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
29 EUCTW_TYPICAL_DISTRIBUTION_RATIO) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
30 from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
31 EUCKR_TYPICAL_DISTRIBUTION_RATIO) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
32 from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
33 GB2312_TYPICAL_DISTRIBUTION_RATIO) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
34 from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
35 BIG5_TYPICAL_DISTRIBUTION_RATIO) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
36 from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
37 JIS_TYPICAL_DISTRIBUTION_RATIO) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
38 from .compat import wrap_ord |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
39 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
40 ENOUGH_DATA_THRESHOLD = 1024 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
41 SURE_YES = 0.99 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
42 SURE_NO = 0.01 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
43 MINIMUM_DATA_THRESHOLD = 3 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
44 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
45 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
46 class CharDistributionAnalysis: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
47 def __init__(self): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
48 # Mapping table to get frequency order from char order (get from |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
49 # GetOrder()) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
50 self._mCharToFreqOrder = None |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
51 self._mTableSize = None # Size of above table |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
52 # This is a constant value which varies from language to language, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
53 # used in calculating confidence. See |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
54 # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
55 # for further detail. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
56 self._mTypicalDistributionRatio = None |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
57 self.reset() |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
58 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
59 def reset(self): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
60 """reset analyser, clear any state""" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
61 # If this flag is set to True, detection is done and conclusion has |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
62 # been made |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
63 self._mDone = False |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
64 self._mTotalChars = 0 # Total characters encountered |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
65 # The number of characters whose frequency order is less than 512 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
66 self._mFreqChars = 0 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
67 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
68 def feed(self, aBuf, aCharLen): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
69 """feed a character with known length""" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
70 if aCharLen == 2: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
71 # we only care about 2-bytes character in our distribution analysis |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
72 order = self.get_order(aBuf) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
73 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
74 order = -1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
75 if order >= 0: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
76 self._mTotalChars += 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
77 # order is valid |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
78 if order < self._mTableSize: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
79 if 512 > self._mCharToFreqOrder[order]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
80 self._mFreqChars += 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
81 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
82 def get_confidence(self): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
83 """return confidence based on existing data""" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
84 # if we didn't receive any character in our consideration range, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
85 # return negative answer |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
86 if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
87 return SURE_NO |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
88 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
89 if self._mTotalChars != self._mFreqChars: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
90 r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
91 * self._mTypicalDistributionRatio)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
92 if r < SURE_YES: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
93 return r |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
94 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
95 # normalize confidence (we don't want to be 100% sure) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
96 return SURE_YES |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
97 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
98 def got_enough_data(self): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
99 # It is not necessary to receive all data to draw conclusion. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
100 # For charset detection, certain amount of data is enough |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
101 return self._mTotalChars > ENOUGH_DATA_THRESHOLD |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
102 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
103 def get_order(self, aBuf): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
104 # We do not handle characters based on the original encoding string, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
105 # but convert this encoding string to a number, here called order. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
106 # This allows multiple encodings of a language to share one frequency |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
107 # table. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
108 return -1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
109 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
110 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
111 class EUCTWDistributionAnalysis(CharDistributionAnalysis): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
112 def __init__(self): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
113 CharDistributionAnalysis.__init__(self) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
114 self._mCharToFreqOrder = EUCTWCharToFreqOrder |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
115 self._mTableSize = EUCTW_TABLE_SIZE |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
116 self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
117 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
118 def get_order(self, aBuf): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
119 # for euc-TW encoding, we are interested |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
120 # first byte range: 0xc4 -- 0xfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
121 # second byte range: 0xa1 -- 0xfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
122 # no validation needed here. State machine has done that |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
123 first_char = wrap_ord(aBuf[0]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
124 if first_char >= 0xC4: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
125 return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
126 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
127 return -1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
128 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
129 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
130 class EUCKRDistributionAnalysis(CharDistributionAnalysis): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
131 def __init__(self): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
132 CharDistributionAnalysis.__init__(self) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
133 self._mCharToFreqOrder = EUCKRCharToFreqOrder |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
134 self._mTableSize = EUCKR_TABLE_SIZE |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
135 self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
136 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
137 def get_order(self, aBuf): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
138 # for euc-KR encoding, we are interested |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
139 # first byte range: 0xb0 -- 0xfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
140 # second byte range: 0xa1 -- 0xfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
141 # no validation needed here. State machine has done that |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
142 first_char = wrap_ord(aBuf[0]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
143 if first_char >= 0xB0: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
144 return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
145 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
146 return -1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
147 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
148 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
149 class GB2312DistributionAnalysis(CharDistributionAnalysis): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
150 def __init__(self): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
151 CharDistributionAnalysis.__init__(self) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
152 self._mCharToFreqOrder = GB2312CharToFreqOrder |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
153 self._mTableSize = GB2312_TABLE_SIZE |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
154 self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
155 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
156 def get_order(self, aBuf): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
157 # for GB2312 encoding, we are interested |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
158 # first byte range: 0xb0 -- 0xfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
159 # second byte range: 0xa1 -- 0xfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
160 # no validation needed here. State machine has done that |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
161 first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
162 if (first_char >= 0xB0) and (second_char >= 0xA1): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
163 return 94 * (first_char - 0xB0) + second_char - 0xA1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
164 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
165 return -1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
166 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
167 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
168 class Big5DistributionAnalysis(CharDistributionAnalysis): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
169 def __init__(self): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
170 CharDistributionAnalysis.__init__(self) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
171 self._mCharToFreqOrder = Big5CharToFreqOrder |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
172 self._mTableSize = BIG5_TABLE_SIZE |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
173 self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
174 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
175 def get_order(self, aBuf): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
176 # for big5 encoding, we are interested |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
177 # first byte range: 0xa4 -- 0xfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
178 # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
179 # no validation needed here. State machine has done that |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
180 first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
181 if first_char >= 0xA4: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
182 if second_char >= 0xA1: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
183 return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
184 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
185 return 157 * (first_char - 0xA4) + second_char - 0x40 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
186 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
187 return -1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
188 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
189 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
190 class SJISDistributionAnalysis(CharDistributionAnalysis): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
191 def __init__(self): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
192 CharDistributionAnalysis.__init__(self) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
193 self._mCharToFreqOrder = JISCharToFreqOrder |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
194 self._mTableSize = JIS_TABLE_SIZE |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
195 self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
196 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
197 def get_order(self, aBuf): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
198 # for sjis encoding, we are interested |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
199 # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
200 # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
201 # no validation needed here. State machine has done that |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
202 first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
203 if (first_char >= 0x81) and (first_char <= 0x9F): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
204 order = 188 * (first_char - 0x81) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
205 elif (first_char >= 0xE0) and (first_char <= 0xEF): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
206 order = 188 * (first_char - 0xE0 + 31) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
207 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
208 return -1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
209 order = order + second_char - 0x40 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
210 if second_char > 0x7F: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
211 order = -1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
212 return order |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
213 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
214 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
215 class EUCJPDistributionAnalysis(CharDistributionAnalysis): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
216 def __init__(self): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
217 CharDistributionAnalysis.__init__(self) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
218 self._mCharToFreqOrder = JISCharToFreqOrder |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
219 self._mTableSize = JIS_TABLE_SIZE |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
220 self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
221 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
222 def get_order(self, aBuf): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
223 # for euc-JP encoding, we are interested |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
224 # first byte range: 0xa0 -- 0xfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
225 # second byte range: 0xa1 -- 0xfe |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
226 # no validation needed here. State machine has done that |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
227 char = wrap_ord(aBuf[0]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
228 if char >= 0xA0: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
229 return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
230 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
231 return -1 |