summaryrefslogtreecommitdiff
path: root/src/python/stdlib/test/test_normalization.py
diff options
context:
space:
mode:
authorLieuwe <lieuwemo@gmail.com>2011-03-18 16:04:51 (GMT)
committer Lieuwe <lieuwemo@gmail.com>2011-03-18 16:04:51 (GMT)
commit9c39875ef09d439ef51716dba091188f72977f5e (patch)
treefb0fd27a1bcd3e54a1913852c23aa137ee5eb30b /src/python/stdlib/test/test_normalization.py
parentbc8af4e2101ac93bdc51b59ee64fa43ecb2442f3 (diff)
downloadpowder-9c39875ef09d439ef51716dba091188f72977f5e.zip
powder-9c39875ef09d439ef51716dba091188f72977f5e.tar.gz
...
Diffstat (limited to 'src/python/stdlib/test/test_normalization.py')
-rw-r--r--src/python/stdlib/test/test_normalization.py105
1 files changed, 105 insertions, 0 deletions
diff --git a/src/python/stdlib/test/test_normalization.py b/src/python/stdlib/test/test_normalization.py
new file mode 100644
index 0000000..2c49720
--- /dev/null
+++ b/src/python/stdlib/test/test_normalization.py
@@ -0,0 +1,105 @@
+from test.test_support import run_unittest, open_urlresource
+import unittest
+
+from httplib import HTTPException
+import sys
+import os
+from unicodedata import normalize, unidata_version
+
+TESTDATAFILE = "NormalizationTest.txt"
+TESTDATAURL = "http://www.unicode.org/Public/" + unidata_version + "/ucd/" + TESTDATAFILE
+
+def check_version(testfile):
+ hdr = testfile.readline()
+ return unidata_version in hdr
+
+class RangeError(Exception):
+ pass
+
+def NFC(str):
+ return normalize("NFC", str)
+
+def NFKC(str):
+ return normalize("NFKC", str)
+
+def NFD(str):
+ return normalize("NFD", str)
+
+def NFKD(str):
+ return normalize("NFKD", str)
+
+def unistr(data):
+ data = [int(x, 16) for x in data.split(" ")]
+ for x in data:
+ if x > sys.maxunicode:
+ raise RangeError
+ return u"".join([unichr(x) for x in data])
+
+class NormalizationTest(unittest.TestCase):
+ def test_main(self):
+ part = None
+ part1_data = {}
+ # Hit the exception early
+ try:
+ testdata = open_urlresource(TESTDATAURL, check_version)
+ except (IOError, HTTPException):
+ self.skipTest("Could not retrieve " + TESTDATAURL)
+ for line in testdata:
+ if '#' in line:
+ line = line.split('#')[0]
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith("@Part"):
+ part = line.split()[0]
+ continue
+ if part == "@Part3":
+ # XXX we don't support PRI #29 yet, so skip these tests for now
+ continue
+ try:
+ c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
+ except RangeError:
+ # Skip unsupported characters;
+ # try atleast adding c1 if we are in part1
+ if part == "@Part1":
+ try:
+ c1 = unistr(line.split(';')[0])
+ except RangeError:
+ pass
+ else:
+ part1_data[c1] = 1
+ continue
+
+ # Perform tests
+ self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line)
+ self.assertTrue(c4 == NFC(c4) == NFC(c5), line)
+ self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line)
+ self.assertTrue(c5 == NFD(c4) == NFD(c5), line)
+ self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
+ NFKC(c3) == NFKC(c4) == NFKC(c5),
+ line)
+ self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
+ NFKD(c3) == NFKD(c4) == NFKD(c5),
+ line)
+
+ # Record part 1 data
+ if part == "@Part1":
+ part1_data[c1] = 1
+
+ # Perform tests for all other data
+ for c in range(sys.maxunicode+1):
+ X = unichr(c)
+ if X in part1_data:
+ continue
+ self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
+
+ def test_bug_834676(self):
+ # Check for bug 834676
+ normalize('NFC', u'\ud55c\uae00')
+
+
+def test_main():
+ run_unittest(NormalizationTest)
+
+if __name__ == "__main__":
+ test_main()