mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-25 01:02:24 +00:00
Implement Unicode normalisation.
A new module in 'utils' computes NFC and NFD, via a new set of data tables generated by read_ucd.py. The new module comes with a new test program, which can read the NormalizationTest.txt that appears in the Unicode Character Database. All the tests pass, as of Unicode 15.
This commit is contained in:
parent
4cb429e3f4
commit
b35d23f699
@ -85,6 +85,11 @@ add_executable(test_decode_utf8
|
||||
target_compile_definitions(test_decode_utf8 PRIVATE TEST)
|
||||
target_link_libraries(test_decode_utf8 utils ${platform_libraries})
|
||||
|
||||
add_executable(test_unicode_norm
|
||||
utils/unicode-norm.c)
|
||||
target_compile_definitions(test_unicode_norm PRIVATE TEST)
|
||||
target_link_libraries(test_unicode_norm utils ${platform_libraries})
|
||||
|
||||
add_executable(test_tree234
|
||||
utils/tree234.c)
|
||||
target_compile_definitions(test_tree234 PRIVATE TEST)
|
||||
|
3
misc.h
3
misc.h
@ -265,6 +265,9 @@ unsigned decode_utf8(BinarySource *src);
|
||||
* number written. */
|
||||
size_t decode_utf8_to_wchar(BinarySource *src, wchar_t *out);
|
||||
|
||||
/* Normalise a UTF-8 string into Normalisation Form C. */
|
||||
strbuf *utf8_to_nfc(ptrlen input);
|
||||
|
||||
/* Write a string out in C string-literal format. */
|
||||
void write_c_string_literal(FILE *fp, ptrlen str);
|
||||
|
||||
|
950
unicode/canonical_comp.h
Normal file
950
unicode/canonical_comp.h
Normal file
@ -0,0 +1,950 @@
|
||||
/*
|
||||
* Autogenerated by read_ucd.py from The Unicode Standard 15.0.0
|
||||
*
|
||||
* List the pairs of Unicode characters that canonically recompose to a
|
||||
* single character in NFC.
|
||||
*
|
||||
* Used by utils/unicode-norm.c.
|
||||
*/
|
||||
|
||||
{0x003c, 0x0338, 0x226e},
|
||||
{0x003d, 0x0338, 0x2260},
|
||||
{0x003e, 0x0338, 0x226f},
|
||||
{0x0041, 0x0300, 0x00c0},
|
||||
{0x0041, 0x0301, 0x00c1},
|
||||
{0x0041, 0x0302, 0x00c2},
|
||||
{0x0041, 0x0303, 0x00c3},
|
||||
{0x0041, 0x0304, 0x0100},
|
||||
{0x0041, 0x0306, 0x0102},
|
||||
{0x0041, 0x0307, 0x0226},
|
||||
{0x0041, 0x0308, 0x00c4},
|
||||
{0x0041, 0x0309, 0x1ea2},
|
||||
{0x0041, 0x030a, 0x00c5},
|
||||
{0x0041, 0x030c, 0x01cd},
|
||||
{0x0041, 0x030f, 0x0200},
|
||||
{0x0041, 0x0311, 0x0202},
|
||||
{0x0041, 0x0323, 0x1ea0},
|
||||
{0x0041, 0x0325, 0x1e00},
|
||||
{0x0041, 0x0328, 0x0104},
|
||||
{0x0042, 0x0307, 0x1e02},
|
||||
{0x0042, 0x0323, 0x1e04},
|
||||
{0x0042, 0x0331, 0x1e06},
|
||||
{0x0043, 0x0301, 0x0106},
|
||||
{0x0043, 0x0302, 0x0108},
|
||||
{0x0043, 0x0307, 0x010a},
|
||||
{0x0043, 0x030c, 0x010c},
|
||||
{0x0043, 0x0327, 0x00c7},
|
||||
{0x0044, 0x0307, 0x1e0a},
|
||||
{0x0044, 0x030c, 0x010e},
|
||||
{0x0044, 0x0323, 0x1e0c},
|
||||
{0x0044, 0x0327, 0x1e10},
|
||||
{0x0044, 0x032d, 0x1e12},
|
||||
{0x0044, 0x0331, 0x1e0e},
|
||||
{0x0045, 0x0300, 0x00c8},
|
||||
{0x0045, 0x0301, 0x00c9},
|
||||
{0x0045, 0x0302, 0x00ca},
|
||||
{0x0045, 0x0303, 0x1ebc},
|
||||
{0x0045, 0x0304, 0x0112},
|
||||
{0x0045, 0x0306, 0x0114},
|
||||
{0x0045, 0x0307, 0x0116},
|
||||
{0x0045, 0x0308, 0x00cb},
|
||||
{0x0045, 0x0309, 0x1eba},
|
||||
{0x0045, 0x030c, 0x011a},
|
||||
{0x0045, 0x030f, 0x0204},
|
||||
{0x0045, 0x0311, 0x0206},
|
||||
{0x0045, 0x0323, 0x1eb8},
|
||||
{0x0045, 0x0327, 0x0228},
|
||||
{0x0045, 0x0328, 0x0118},
|
||||
{0x0045, 0x032d, 0x1e18},
|
||||
{0x0045, 0x0330, 0x1e1a},
|
||||
{0x0046, 0x0307, 0x1e1e},
|
||||
{0x0047, 0x0301, 0x01f4},
|
||||
{0x0047, 0x0302, 0x011c},
|
||||
{0x0047, 0x0304, 0x1e20},
|
||||
{0x0047, 0x0306, 0x011e},
|
||||
{0x0047, 0x0307, 0x0120},
|
||||
{0x0047, 0x030c, 0x01e6},
|
||||
{0x0047, 0x0327, 0x0122},
|
||||
{0x0048, 0x0302, 0x0124},
|
||||
{0x0048, 0x0307, 0x1e22},
|
||||
{0x0048, 0x0308, 0x1e26},
|
||||
{0x0048, 0x030c, 0x021e},
|
||||
{0x0048, 0x0323, 0x1e24},
|
||||
{0x0048, 0x0327, 0x1e28},
|
||||
{0x0048, 0x032e, 0x1e2a},
|
||||
{0x0049, 0x0300, 0x00cc},
|
||||
{0x0049, 0x0301, 0x00cd},
|
||||
{0x0049, 0x0302, 0x00ce},
|
||||
{0x0049, 0x0303, 0x0128},
|
||||
{0x0049, 0x0304, 0x012a},
|
||||
{0x0049, 0x0306, 0x012c},
|
||||
{0x0049, 0x0307, 0x0130},
|
||||
{0x0049, 0x0308, 0x00cf},
|
||||
{0x0049, 0x0309, 0x1ec8},
|
||||
{0x0049, 0x030c, 0x01cf},
|
||||
{0x0049, 0x030f, 0x0208},
|
||||
{0x0049, 0x0311, 0x020a},
|
||||
{0x0049, 0x0323, 0x1eca},
|
||||
{0x0049, 0x0328, 0x012e},
|
||||
{0x0049, 0x0330, 0x1e2c},
|
||||
{0x004a, 0x0302, 0x0134},
|
||||
{0x004b, 0x0301, 0x1e30},
|
||||
{0x004b, 0x030c, 0x01e8},
|
||||
{0x004b, 0x0323, 0x1e32},
|
||||
{0x004b, 0x0327, 0x0136},
|
||||
{0x004b, 0x0331, 0x1e34},
|
||||
{0x004c, 0x0301, 0x0139},
|
||||
{0x004c, 0x030c, 0x013d},
|
||||
{0x004c, 0x0323, 0x1e36},
|
||||
{0x004c, 0x0327, 0x013b},
|
||||
{0x004c, 0x032d, 0x1e3c},
|
||||
{0x004c, 0x0331, 0x1e3a},
|
||||
{0x004d, 0x0301, 0x1e3e},
|
||||
{0x004d, 0x0307, 0x1e40},
|
||||
{0x004d, 0x0323, 0x1e42},
|
||||
{0x004e, 0x0300, 0x01f8},
|
||||
{0x004e, 0x0301, 0x0143},
|
||||
{0x004e, 0x0303, 0x00d1},
|
||||
{0x004e, 0x0307, 0x1e44},
|
||||
{0x004e, 0x030c, 0x0147},
|
||||
{0x004e, 0x0323, 0x1e46},
|
||||
{0x004e, 0x0327, 0x0145},
|
||||
{0x004e, 0x032d, 0x1e4a},
|
||||
{0x004e, 0x0331, 0x1e48},
|
||||
{0x004f, 0x0300, 0x00d2},
|
||||
{0x004f, 0x0301, 0x00d3},
|
||||
{0x004f, 0x0302, 0x00d4},
|
||||
{0x004f, 0x0303, 0x00d5},
|
||||
{0x004f, 0x0304, 0x014c},
|
||||
{0x004f, 0x0306, 0x014e},
|
||||
{0x004f, 0x0307, 0x022e},
|
||||
{0x004f, 0x0308, 0x00d6},
|
||||
{0x004f, 0x0309, 0x1ece},
|
||||
{0x004f, 0x030b, 0x0150},
|
||||
{0x004f, 0x030c, 0x01d1},
|
||||
{0x004f, 0x030f, 0x020c},
|
||||
{0x004f, 0x0311, 0x020e},
|
||||
{0x004f, 0x031b, 0x01a0},
|
||||
{0x004f, 0x0323, 0x1ecc},
|
||||
{0x004f, 0x0328, 0x01ea},
|
||||
{0x0050, 0x0301, 0x1e54},
|
||||
{0x0050, 0x0307, 0x1e56},
|
||||
{0x0052, 0x0301, 0x0154},
|
||||
{0x0052, 0x0307, 0x1e58},
|
||||
{0x0052, 0x030c, 0x0158},
|
||||
{0x0052, 0x030f, 0x0210},
|
||||
{0x0052, 0x0311, 0x0212},
|
||||
{0x0052, 0x0323, 0x1e5a},
|
||||
{0x0052, 0x0327, 0x0156},
|
||||
{0x0052, 0x0331, 0x1e5e},
|
||||
{0x0053, 0x0301, 0x015a},
|
||||
{0x0053, 0x0302, 0x015c},
|
||||
{0x0053, 0x0307, 0x1e60},
|
||||
{0x0053, 0x030c, 0x0160},
|
||||
{0x0053, 0x0323, 0x1e62},
|
||||
{0x0053, 0x0326, 0x0218},
|
||||
{0x0053, 0x0327, 0x015e},
|
||||
{0x0054, 0x0307, 0x1e6a},
|
||||
{0x0054, 0x030c, 0x0164},
|
||||
{0x0054, 0x0323, 0x1e6c},
|
||||
{0x0054, 0x0326, 0x021a},
|
||||
{0x0054, 0x0327, 0x0162},
|
||||
{0x0054, 0x032d, 0x1e70},
|
||||
{0x0054, 0x0331, 0x1e6e},
|
||||
{0x0055, 0x0300, 0x00d9},
|
||||
{0x0055, 0x0301, 0x00da},
|
||||
{0x0055, 0x0302, 0x00db},
|
||||
{0x0055, 0x0303, 0x0168},
|
||||
{0x0055, 0x0304, 0x016a},
|
||||
{0x0055, 0x0306, 0x016c},
|
||||
{0x0055, 0x0308, 0x00dc},
|
||||
{0x0055, 0x0309, 0x1ee6},
|
||||
{0x0055, 0x030a, 0x016e},
|
||||
{0x0055, 0x030b, 0x0170},
|
||||
{0x0055, 0x030c, 0x01d3},
|
||||
{0x0055, 0x030f, 0x0214},
|
||||
{0x0055, 0x0311, 0x0216},
|
||||
{0x0055, 0x031b, 0x01af},
|
||||
{0x0055, 0x0323, 0x1ee4},
|
||||
{0x0055, 0x0324, 0x1e72},
|
||||
{0x0055, 0x0328, 0x0172},
|
||||
{0x0055, 0x032d, 0x1e76},
|
||||
{0x0055, 0x0330, 0x1e74},
|
||||
{0x0056, 0x0303, 0x1e7c},
|
||||
{0x0056, 0x0323, 0x1e7e},
|
||||
{0x0057, 0x0300, 0x1e80},
|
||||
{0x0057, 0x0301, 0x1e82},
|
||||
{0x0057, 0x0302, 0x0174},
|
||||
{0x0057, 0x0307, 0x1e86},
|
||||
{0x0057, 0x0308, 0x1e84},
|
||||
{0x0057, 0x0323, 0x1e88},
|
||||
{0x0058, 0x0307, 0x1e8a},
|
||||
{0x0058, 0x0308, 0x1e8c},
|
||||
{0x0059, 0x0300, 0x1ef2},
|
||||
{0x0059, 0x0301, 0x00dd},
|
||||
{0x0059, 0x0302, 0x0176},
|
||||
{0x0059, 0x0303, 0x1ef8},
|
||||
{0x0059, 0x0304, 0x0232},
|
||||
{0x0059, 0x0307, 0x1e8e},
|
||||
{0x0059, 0x0308, 0x0178},
|
||||
{0x0059, 0x0309, 0x1ef6},
|
||||
{0x0059, 0x0323, 0x1ef4},
|
||||
{0x005a, 0x0301, 0x0179},
|
||||
{0x005a, 0x0302, 0x1e90},
|
||||
{0x005a, 0x0307, 0x017b},
|
||||
{0x005a, 0x030c, 0x017d},
|
||||
{0x005a, 0x0323, 0x1e92},
|
||||
{0x005a, 0x0331, 0x1e94},
|
||||
{0x0061, 0x0300, 0x00e0},
|
||||
{0x0061, 0x0301, 0x00e1},
|
||||
{0x0061, 0x0302, 0x00e2},
|
||||
{0x0061, 0x0303, 0x00e3},
|
||||
{0x0061, 0x0304, 0x0101},
|
||||
{0x0061, 0x0306, 0x0103},
|
||||
{0x0061, 0x0307, 0x0227},
|
||||
{0x0061, 0x0308, 0x00e4},
|
||||
{0x0061, 0x0309, 0x1ea3},
|
||||
{0x0061, 0x030a, 0x00e5},
|
||||
{0x0061, 0x030c, 0x01ce},
|
||||
{0x0061, 0x030f, 0x0201},
|
||||
{0x0061, 0x0311, 0x0203},
|
||||
{0x0061, 0x0323, 0x1ea1},
|
||||
{0x0061, 0x0325, 0x1e01},
|
||||
{0x0061, 0x0328, 0x0105},
|
||||
{0x0062, 0x0307, 0x1e03},
|
||||
{0x0062, 0x0323, 0x1e05},
|
||||
{0x0062, 0x0331, 0x1e07},
|
||||
{0x0063, 0x0301, 0x0107},
|
||||
{0x0063, 0x0302, 0x0109},
|
||||
{0x0063, 0x0307, 0x010b},
|
||||
{0x0063, 0x030c, 0x010d},
|
||||
{0x0063, 0x0327, 0x00e7},
|
||||
{0x0064, 0x0307, 0x1e0b},
|
||||
{0x0064, 0x030c, 0x010f},
|
||||
{0x0064, 0x0323, 0x1e0d},
|
||||
{0x0064, 0x0327, 0x1e11},
|
||||
{0x0064, 0x032d, 0x1e13},
|
||||
{0x0064, 0x0331, 0x1e0f},
|
||||
{0x0065, 0x0300, 0x00e8},
|
||||
{0x0065, 0x0301, 0x00e9},
|
||||
{0x0065, 0x0302, 0x00ea},
|
||||
{0x0065, 0x0303, 0x1ebd},
|
||||
{0x0065, 0x0304, 0x0113},
|
||||
{0x0065, 0x0306, 0x0115},
|
||||
{0x0065, 0x0307, 0x0117},
|
||||
{0x0065, 0x0308, 0x00eb},
|
||||
{0x0065, 0x0309, 0x1ebb},
|
||||
{0x0065, 0x030c, 0x011b},
|
||||
{0x0065, 0x030f, 0x0205},
|
||||
{0x0065, 0x0311, 0x0207},
|
||||
{0x0065, 0x0323, 0x1eb9},
|
||||
{0x0065, 0x0327, 0x0229},
|
||||
{0x0065, 0x0328, 0x0119},
|
||||
{0x0065, 0x032d, 0x1e19},
|
||||
{0x0065, 0x0330, 0x1e1b},
|
||||
{0x0066, 0x0307, 0x1e1f},
|
||||
{0x0067, 0x0301, 0x01f5},
|
||||
{0x0067, 0x0302, 0x011d},
|
||||
{0x0067, 0x0304, 0x1e21},
|
||||
{0x0067, 0x0306, 0x011f},
|
||||
{0x0067, 0x0307, 0x0121},
|
||||
{0x0067, 0x030c, 0x01e7},
|
||||
{0x0067, 0x0327, 0x0123},
|
||||
{0x0068, 0x0302, 0x0125},
|
||||
{0x0068, 0x0307, 0x1e23},
|
||||
{0x0068, 0x0308, 0x1e27},
|
||||
{0x0068, 0x030c, 0x021f},
|
||||
{0x0068, 0x0323, 0x1e25},
|
||||
{0x0068, 0x0327, 0x1e29},
|
||||
{0x0068, 0x032e, 0x1e2b},
|
||||
{0x0068, 0x0331, 0x1e96},
|
||||
{0x0069, 0x0300, 0x00ec},
|
||||
{0x0069, 0x0301, 0x00ed},
|
||||
{0x0069, 0x0302, 0x00ee},
|
||||
{0x0069, 0x0303, 0x0129},
|
||||
{0x0069, 0x0304, 0x012b},
|
||||
{0x0069, 0x0306, 0x012d},
|
||||
{0x0069, 0x0308, 0x00ef},
|
||||
{0x0069, 0x0309, 0x1ec9},
|
||||
{0x0069, 0x030c, 0x01d0},
|
||||
{0x0069, 0x030f, 0x0209},
|
||||
{0x0069, 0x0311, 0x020b},
|
||||
{0x0069, 0x0323, 0x1ecb},
|
||||
{0x0069, 0x0328, 0x012f},
|
||||
{0x0069, 0x0330, 0x1e2d},
|
||||
{0x006a, 0x0302, 0x0135},
|
||||
{0x006a, 0x030c, 0x01f0},
|
||||
{0x006b, 0x0301, 0x1e31},
|
||||
{0x006b, 0x030c, 0x01e9},
|
||||
{0x006b, 0x0323, 0x1e33},
|
||||
{0x006b, 0x0327, 0x0137},
|
||||
{0x006b, 0x0331, 0x1e35},
|
||||
{0x006c, 0x0301, 0x013a},
|
||||
{0x006c, 0x030c, 0x013e},
|
||||
{0x006c, 0x0323, 0x1e37},
|
||||
{0x006c, 0x0327, 0x013c},
|
||||
{0x006c, 0x032d, 0x1e3d},
|
||||
{0x006c, 0x0331, 0x1e3b},
|
||||
{0x006d, 0x0301, 0x1e3f},
|
||||
{0x006d, 0x0307, 0x1e41},
|
||||
{0x006d, 0x0323, 0x1e43},
|
||||
{0x006e, 0x0300, 0x01f9},
|
||||
{0x006e, 0x0301, 0x0144},
|
||||
{0x006e, 0x0303, 0x00f1},
|
||||
{0x006e, 0x0307, 0x1e45},
|
||||
{0x006e, 0x030c, 0x0148},
|
||||
{0x006e, 0x0323, 0x1e47},
|
||||
{0x006e, 0x0327, 0x0146},
|
||||
{0x006e, 0x032d, 0x1e4b},
|
||||
{0x006e, 0x0331, 0x1e49},
|
||||
{0x006f, 0x0300, 0x00f2},
|
||||
{0x006f, 0x0301, 0x00f3},
|
||||
{0x006f, 0x0302, 0x00f4},
|
||||
{0x006f, 0x0303, 0x00f5},
|
||||
{0x006f, 0x0304, 0x014d},
|
||||
{0x006f, 0x0306, 0x014f},
|
||||
{0x006f, 0x0307, 0x022f},
|
||||
{0x006f, 0x0308, 0x00f6},
|
||||
{0x006f, 0x0309, 0x1ecf},
|
||||
{0x006f, 0x030b, 0x0151},
|
||||
{0x006f, 0x030c, 0x01d2},
|
||||
{0x006f, 0x030f, 0x020d},
|
||||
{0x006f, 0x0311, 0x020f},
|
||||
{0x006f, 0x031b, 0x01a1},
|
||||
{0x006f, 0x0323, 0x1ecd},
|
||||
{0x006f, 0x0328, 0x01eb},
|
||||
{0x0070, 0x0301, 0x1e55},
|
||||
{0x0070, 0x0307, 0x1e57},
|
||||
{0x0072, 0x0301, 0x0155},
|
||||
{0x0072, 0x0307, 0x1e59},
|
||||
{0x0072, 0x030c, 0x0159},
|
||||
{0x0072, 0x030f, 0x0211},
|
||||
{0x0072, 0x0311, 0x0213},
|
||||
{0x0072, 0x0323, 0x1e5b},
|
||||
{0x0072, 0x0327, 0x0157},
|
||||
{0x0072, 0x0331, 0x1e5f},
|
||||
{0x0073, 0x0301, 0x015b},
|
||||
{0x0073, 0x0302, 0x015d},
|
||||
{0x0073, 0x0307, 0x1e61},
|
||||
{0x0073, 0x030c, 0x0161},
|
||||
{0x0073, 0x0323, 0x1e63},
|
||||
{0x0073, 0x0326, 0x0219},
|
||||
{0x0073, 0x0327, 0x015f},
|
||||
{0x0074, 0x0307, 0x1e6b},
|
||||
{0x0074, 0x0308, 0x1e97},
|
||||
{0x0074, 0x030c, 0x0165},
|
||||
{0x0074, 0x0323, 0x1e6d},
|
||||
{0x0074, 0x0326, 0x021b},
|
||||
{0x0074, 0x0327, 0x0163},
|
||||
{0x0074, 0x032d, 0x1e71},
|
||||
{0x0074, 0x0331, 0x1e6f},
|
||||
{0x0075, 0x0300, 0x00f9},
|
||||
{0x0075, 0x0301, 0x00fa},
|
||||
{0x0075, 0x0302, 0x00fb},
|
||||
{0x0075, 0x0303, 0x0169},
|
||||
{0x0075, 0x0304, 0x016b},
|
||||
{0x0075, 0x0306, 0x016d},
|
||||
{0x0075, 0x0308, 0x00fc},
|
||||
{0x0075, 0x0309, 0x1ee7},
|
||||
{0x0075, 0x030a, 0x016f},
|
||||
{0x0075, 0x030b, 0x0171},
|
||||
{0x0075, 0x030c, 0x01d4},
|
||||
{0x0075, 0x030f, 0x0215},
|
||||
{0x0075, 0x0311, 0x0217},
|
||||
{0x0075, 0x031b, 0x01b0},
|
||||
{0x0075, 0x0323, 0x1ee5},
|
||||
{0x0075, 0x0324, 0x1e73},
|
||||
{0x0075, 0x0328, 0x0173},
|
||||
{0x0075, 0x032d, 0x1e77},
|
||||
{0x0075, 0x0330, 0x1e75},
|
||||
{0x0076, 0x0303, 0x1e7d},
|
||||
{0x0076, 0x0323, 0x1e7f},
|
||||
{0x0077, 0x0300, 0x1e81},
|
||||
{0x0077, 0x0301, 0x1e83},
|
||||
{0x0077, 0x0302, 0x0175},
|
||||
{0x0077, 0x0307, 0x1e87},
|
||||
{0x0077, 0x0308, 0x1e85},
|
||||
{0x0077, 0x030a, 0x1e98},
|
||||
{0x0077, 0x0323, 0x1e89},
|
||||
{0x0078, 0x0307, 0x1e8b},
|
||||
{0x0078, 0x0308, 0x1e8d},
|
||||
{0x0079, 0x0300, 0x1ef3},
|
||||
{0x0079, 0x0301, 0x00fd},
|
||||
{0x0079, 0x0302, 0x0177},
|
||||
{0x0079, 0x0303, 0x1ef9},
|
||||
{0x0079, 0x0304, 0x0233},
|
||||
{0x0079, 0x0307, 0x1e8f},
|
||||
{0x0079, 0x0308, 0x00ff},
|
||||
{0x0079, 0x0309, 0x1ef7},
|
||||
{0x0079, 0x030a, 0x1e99},
|
||||
{0x0079, 0x0323, 0x1ef5},
|
||||
{0x007a, 0x0301, 0x017a},
|
||||
{0x007a, 0x0302, 0x1e91},
|
||||
{0x007a, 0x0307, 0x017c},
|
||||
{0x007a, 0x030c, 0x017e},
|
||||
{0x007a, 0x0323, 0x1e93},
|
||||
{0x007a, 0x0331, 0x1e95},
|
||||
{0x00a8, 0x0300, 0x1fed},
|
||||
{0x00a8, 0x0301, 0x0385},
|
||||
{0x00a8, 0x0342, 0x1fc1},
|
||||
{0x00c2, 0x0300, 0x1ea6},
|
||||
{0x00c2, 0x0301, 0x1ea4},
|
||||
{0x00c2, 0x0303, 0x1eaa},
|
||||
{0x00c2, 0x0309, 0x1ea8},
|
||||
{0x00c4, 0x0304, 0x01de},
|
||||
{0x00c5, 0x0301, 0x01fa},
|
||||
{0x00c6, 0x0301, 0x01fc},
|
||||
{0x00c6, 0x0304, 0x01e2},
|
||||
{0x00c7, 0x0301, 0x1e08},
|
||||
{0x00ca, 0x0300, 0x1ec0},
|
||||
{0x00ca, 0x0301, 0x1ebe},
|
||||
{0x00ca, 0x0303, 0x1ec4},
|
||||
{0x00ca, 0x0309, 0x1ec2},
|
||||
{0x00cf, 0x0301, 0x1e2e},
|
||||
{0x00d4, 0x0300, 0x1ed2},
|
||||
{0x00d4, 0x0301, 0x1ed0},
|
||||
{0x00d4, 0x0303, 0x1ed6},
|
||||
{0x00d4, 0x0309, 0x1ed4},
|
||||
{0x00d5, 0x0301, 0x1e4c},
|
||||
{0x00d5, 0x0304, 0x022c},
|
||||
{0x00d5, 0x0308, 0x1e4e},
|
||||
{0x00d6, 0x0304, 0x022a},
|
||||
{0x00d8, 0x0301, 0x01fe},
|
||||
{0x00dc, 0x0300, 0x01db},
|
||||
{0x00dc, 0x0301, 0x01d7},
|
||||
{0x00dc, 0x0304, 0x01d5},
|
||||
{0x00dc, 0x030c, 0x01d9},
|
||||
{0x00e2, 0x0300, 0x1ea7},
|
||||
{0x00e2, 0x0301, 0x1ea5},
|
||||
{0x00e2, 0x0303, 0x1eab},
|
||||
{0x00e2, 0x0309, 0x1ea9},
|
||||
{0x00e4, 0x0304, 0x01df},
|
||||
{0x00e5, 0x0301, 0x01fb},
|
||||
{0x00e6, 0x0301, 0x01fd},
|
||||
{0x00e6, 0x0304, 0x01e3},
|
||||
{0x00e7, 0x0301, 0x1e09},
|
||||
{0x00ea, 0x0300, 0x1ec1},
|
||||
{0x00ea, 0x0301, 0x1ebf},
|
||||
{0x00ea, 0x0303, 0x1ec5},
|
||||
{0x00ea, 0x0309, 0x1ec3},
|
||||
{0x00ef, 0x0301, 0x1e2f},
|
||||
{0x00f4, 0x0300, 0x1ed3},
|
||||
{0x00f4, 0x0301, 0x1ed1},
|
||||
{0x00f4, 0x0303, 0x1ed7},
|
||||
{0x00f4, 0x0309, 0x1ed5},
|
||||
{0x00f5, 0x0301, 0x1e4d},
|
||||
{0x00f5, 0x0304, 0x022d},
|
||||
{0x00f5, 0x0308, 0x1e4f},
|
||||
{0x00f6, 0x0304, 0x022b},
|
||||
{0x00f8, 0x0301, 0x01ff},
|
||||
{0x00fc, 0x0300, 0x01dc},
|
||||
{0x00fc, 0x0301, 0x01d8},
|
||||
{0x00fc, 0x0304, 0x01d6},
|
||||
{0x00fc, 0x030c, 0x01da},
|
||||
{0x0102, 0x0300, 0x1eb0},
|
||||
{0x0102, 0x0301, 0x1eae},
|
||||
{0x0102, 0x0303, 0x1eb4},
|
||||
{0x0102, 0x0309, 0x1eb2},
|
||||
{0x0103, 0x0300, 0x1eb1},
|
||||
{0x0103, 0x0301, 0x1eaf},
|
||||
{0x0103, 0x0303, 0x1eb5},
|
||||
{0x0103, 0x0309, 0x1eb3},
|
||||
{0x0112, 0x0300, 0x1e14},
|
||||
{0x0112, 0x0301, 0x1e16},
|
||||
{0x0113, 0x0300, 0x1e15},
|
||||
{0x0113, 0x0301, 0x1e17},
|
||||
{0x014c, 0x0300, 0x1e50},
|
||||
{0x014c, 0x0301, 0x1e52},
|
||||
{0x014d, 0x0300, 0x1e51},
|
||||
{0x014d, 0x0301, 0x1e53},
|
||||
{0x015a, 0x0307, 0x1e64},
|
||||
{0x015b, 0x0307, 0x1e65},
|
||||
{0x0160, 0x0307, 0x1e66},
|
||||
{0x0161, 0x0307, 0x1e67},
|
||||
{0x0168, 0x0301, 0x1e78},
|
||||
{0x0169, 0x0301, 0x1e79},
|
||||
{0x016a, 0x0308, 0x1e7a},
|
||||
{0x016b, 0x0308, 0x1e7b},
|
||||
{0x017f, 0x0307, 0x1e9b},
|
||||
{0x01a0, 0x0300, 0x1edc},
|
||||
{0x01a0, 0x0301, 0x1eda},
|
||||
{0x01a0, 0x0303, 0x1ee0},
|
||||
{0x01a0, 0x0309, 0x1ede},
|
||||
{0x01a0, 0x0323, 0x1ee2},
|
||||
{0x01a1, 0x0300, 0x1edd},
|
||||
{0x01a1, 0x0301, 0x1edb},
|
||||
{0x01a1, 0x0303, 0x1ee1},
|
||||
{0x01a1, 0x0309, 0x1edf},
|
||||
{0x01a1, 0x0323, 0x1ee3},
|
||||
{0x01af, 0x0300, 0x1eea},
|
||||
{0x01af, 0x0301, 0x1ee8},
|
||||
{0x01af, 0x0303, 0x1eee},
|
||||
{0x01af, 0x0309, 0x1eec},
|
||||
{0x01af, 0x0323, 0x1ef0},
|
||||
{0x01b0, 0x0300, 0x1eeb},
|
||||
{0x01b0, 0x0301, 0x1ee9},
|
||||
{0x01b0, 0x0303, 0x1eef},
|
||||
{0x01b0, 0x0309, 0x1eed},
|
||||
{0x01b0, 0x0323, 0x1ef1},
|
||||
{0x01b7, 0x030c, 0x01ee},
|
||||
{0x01ea, 0x0304, 0x01ec},
|
||||
{0x01eb, 0x0304, 0x01ed},
|
||||
{0x0226, 0x0304, 0x01e0},
|
||||
{0x0227, 0x0304, 0x01e1},
|
||||
{0x0228, 0x0306, 0x1e1c},
|
||||
{0x0229, 0x0306, 0x1e1d},
|
||||
{0x022e, 0x0304, 0x0230},
|
||||
{0x022f, 0x0304, 0x0231},
|
||||
{0x0292, 0x030c, 0x01ef},
|
||||
{0x0391, 0x0300, 0x1fba},
|
||||
{0x0391, 0x0301, 0x0386},
|
||||
{0x0391, 0x0304, 0x1fb9},
|
||||
{0x0391, 0x0306, 0x1fb8},
|
||||
{0x0391, 0x0313, 0x1f08},
|
||||
{0x0391, 0x0314, 0x1f09},
|
||||
{0x0391, 0x0345, 0x1fbc},
|
||||
{0x0395, 0x0300, 0x1fc8},
|
||||
{0x0395, 0x0301, 0x0388},
|
||||
{0x0395, 0x0313, 0x1f18},
|
||||
{0x0395, 0x0314, 0x1f19},
|
||||
{0x0397, 0x0300, 0x1fca},
|
||||
{0x0397, 0x0301, 0x0389},
|
||||
{0x0397, 0x0313, 0x1f28},
|
||||
{0x0397, 0x0314, 0x1f29},
|
||||
{0x0397, 0x0345, 0x1fcc},
|
||||
{0x0399, 0x0300, 0x1fda},
|
||||
{0x0399, 0x0301, 0x038a},
|
||||
{0x0399, 0x0304, 0x1fd9},
|
||||
{0x0399, 0x0306, 0x1fd8},
|
||||
{0x0399, 0x0308, 0x03aa},
|
||||
{0x0399, 0x0313, 0x1f38},
|
||||
{0x0399, 0x0314, 0x1f39},
|
||||
{0x039f, 0x0300, 0x1ff8},
|
||||
{0x039f, 0x0301, 0x038c},
|
||||
{0x039f, 0x0313, 0x1f48},
|
||||
{0x039f, 0x0314, 0x1f49},
|
||||
{0x03a1, 0x0314, 0x1fec},
|
||||
{0x03a5, 0x0300, 0x1fea},
|
||||
{0x03a5, 0x0301, 0x038e},
|
||||
{0x03a5, 0x0304, 0x1fe9},
|
||||
{0x03a5, 0x0306, 0x1fe8},
|
||||
{0x03a5, 0x0308, 0x03ab},
|
||||
{0x03a5, 0x0314, 0x1f59},
|
||||
{0x03a9, 0x0300, 0x1ffa},
|
||||
{0x03a9, 0x0301, 0x038f},
|
||||
{0x03a9, 0x0313, 0x1f68},
|
||||
{0x03a9, 0x0314, 0x1f69},
|
||||
{0x03a9, 0x0345, 0x1ffc},
|
||||
{0x03ac, 0x0345, 0x1fb4},
|
||||
{0x03ae, 0x0345, 0x1fc4},
|
||||
{0x03b1, 0x0300, 0x1f70},
|
||||
{0x03b1, 0x0301, 0x03ac},
|
||||
{0x03b1, 0x0304, 0x1fb1},
|
||||
{0x03b1, 0x0306, 0x1fb0},
|
||||
{0x03b1, 0x0313, 0x1f00},
|
||||
{0x03b1, 0x0314, 0x1f01},
|
||||
{0x03b1, 0x0342, 0x1fb6},
|
||||
{0x03b1, 0x0345, 0x1fb3},
|
||||
{0x03b5, 0x0300, 0x1f72},
|
||||
{0x03b5, 0x0301, 0x03ad},
|
||||
{0x03b5, 0x0313, 0x1f10},
|
||||
{0x03b5, 0x0314, 0x1f11},
|
||||
{0x03b7, 0x0300, 0x1f74},
|
||||
{0x03b7, 0x0301, 0x03ae},
|
||||
{0x03b7, 0x0313, 0x1f20},
|
||||
{0x03b7, 0x0314, 0x1f21},
|
||||
{0x03b7, 0x0342, 0x1fc6},
|
||||
{0x03b7, 0x0345, 0x1fc3},
|
||||
{0x03b9, 0x0300, 0x1f76},
|
||||
{0x03b9, 0x0301, 0x03af},
|
||||
{0x03b9, 0x0304, 0x1fd1},
|
||||
{0x03b9, 0x0306, 0x1fd0},
|
||||
{0x03b9, 0x0308, 0x03ca},
|
||||
{0x03b9, 0x0313, 0x1f30},
|
||||
{0x03b9, 0x0314, 0x1f31},
|
||||
{0x03b9, 0x0342, 0x1fd6},
|
||||
{0x03bf, 0x0300, 0x1f78},
|
||||
{0x03bf, 0x0301, 0x03cc},
|
||||
{0x03bf, 0x0313, 0x1f40},
|
||||
{0x03bf, 0x0314, 0x1f41},
|
||||
{0x03c1, 0x0313, 0x1fe4},
|
||||
{0x03c1, 0x0314, 0x1fe5},
|
||||
{0x03c5, 0x0300, 0x1f7a},
|
||||
{0x03c5, 0x0301, 0x03cd},
|
||||
{0x03c5, 0x0304, 0x1fe1},
|
||||
{0x03c5, 0x0306, 0x1fe0},
|
||||
{0x03c5, 0x0308, 0x03cb},
|
||||
{0x03c5, 0x0313, 0x1f50},
|
||||
{0x03c5, 0x0314, 0x1f51},
|
||||
{0x03c5, 0x0342, 0x1fe6},
|
||||
{0x03c9, 0x0300, 0x1f7c},
|
||||
{0x03c9, 0x0301, 0x03ce},
|
||||
{0x03c9, 0x0313, 0x1f60},
|
||||
{0x03c9, 0x0314, 0x1f61},
|
||||
{0x03c9, 0x0342, 0x1ff6},
|
||||
{0x03c9, 0x0345, 0x1ff3},
|
||||
{0x03ca, 0x0300, 0x1fd2},
|
||||
{0x03ca, 0x0301, 0x0390},
|
||||
{0x03ca, 0x0342, 0x1fd7},
|
||||
{0x03cb, 0x0300, 0x1fe2},
|
||||
{0x03cb, 0x0301, 0x03b0},
|
||||
{0x03cb, 0x0342, 0x1fe7},
|
||||
{0x03ce, 0x0345, 0x1ff4},
|
||||
{0x03d2, 0x0301, 0x03d3},
|
||||
{0x03d2, 0x0308, 0x03d4},
|
||||
{0x0406, 0x0308, 0x0407},
|
||||
{0x0410, 0x0306, 0x04d0},
|
||||
{0x0410, 0x0308, 0x04d2},
|
||||
{0x0413, 0x0301, 0x0403},
|
||||
{0x0415, 0x0300, 0x0400},
|
||||
{0x0415, 0x0306, 0x04d6},
|
||||
{0x0415, 0x0308, 0x0401},
|
||||
{0x0416, 0x0306, 0x04c1},
|
||||
{0x0416, 0x0308, 0x04dc},
|
||||
{0x0417, 0x0308, 0x04de},
|
||||
{0x0418, 0x0300, 0x040d},
|
||||
{0x0418, 0x0304, 0x04e2},
|
||||
{0x0418, 0x0306, 0x0419},
|
||||
{0x0418, 0x0308, 0x04e4},
|
||||
{0x041a, 0x0301, 0x040c},
|
||||
{0x041e, 0x0308, 0x04e6},
|
||||
{0x0423, 0x0304, 0x04ee},
|
||||
{0x0423, 0x0306, 0x040e},
|
||||
{0x0423, 0x0308, 0x04f0},
|
||||
{0x0423, 0x030b, 0x04f2},
|
||||
{0x0427, 0x0308, 0x04f4},
|
||||
{0x042b, 0x0308, 0x04f8},
|
||||
{0x042d, 0x0308, 0x04ec},
|
||||
{0x0430, 0x0306, 0x04d1},
|
||||
{0x0430, 0x0308, 0x04d3},
|
||||
{0x0433, 0x0301, 0x0453},
|
||||
{0x0435, 0x0300, 0x0450},
|
||||
{0x0435, 0x0306, 0x04d7},
|
||||
{0x0435, 0x0308, 0x0451},
|
||||
{0x0436, 0x0306, 0x04c2},
|
||||
{0x0436, 0x0308, 0x04dd},
|
||||
{0x0437, 0x0308, 0x04df},
|
||||
{0x0438, 0x0300, 0x045d},
|
||||
{0x0438, 0x0304, 0x04e3},
|
||||
{0x0438, 0x0306, 0x0439},
|
||||
{0x0438, 0x0308, 0x04e5},
|
||||
{0x043a, 0x0301, 0x045c},
|
||||
{0x043e, 0x0308, 0x04e7},
|
||||
{0x0443, 0x0304, 0x04ef},
|
||||
{0x0443, 0x0306, 0x045e},
|
||||
{0x0443, 0x0308, 0x04f1},
|
||||
{0x0443, 0x030b, 0x04f3},
|
||||
{0x0447, 0x0308, 0x04f5},
|
||||
{0x044b, 0x0308, 0x04f9},
|
||||
{0x044d, 0x0308, 0x04ed},
|
||||
{0x0456, 0x0308, 0x0457},
|
||||
{0x0474, 0x030f, 0x0476},
|
||||
{0x0475, 0x030f, 0x0477},
|
||||
{0x04d8, 0x0308, 0x04da},
|
||||
{0x04d9, 0x0308, 0x04db},
|
||||
{0x04e8, 0x0308, 0x04ea},
|
||||
{0x04e9, 0x0308, 0x04eb},
|
||||
{0x0627, 0x0653, 0x0622},
|
||||
{0x0627, 0x0654, 0x0623},
|
||||
{0x0627, 0x0655, 0x0625},
|
||||
{0x0648, 0x0654, 0x0624},
|
||||
{0x064a, 0x0654, 0x0626},
|
||||
{0x06c1, 0x0654, 0x06c2},
|
||||
{0x06d2, 0x0654, 0x06d3},
|
||||
{0x06d5, 0x0654, 0x06c0},
|
||||
{0x0928, 0x093c, 0x0929},
|
||||
{0x0930, 0x093c, 0x0931},
|
||||
{0x0933, 0x093c, 0x0934},
|
||||
{0x09c7, 0x09be, 0x09cb},
|
||||
{0x09c7, 0x09d7, 0x09cc},
|
||||
{0x0b47, 0x0b3e, 0x0b4b},
|
||||
{0x0b47, 0x0b56, 0x0b48},
|
||||
{0x0b47, 0x0b57, 0x0b4c},
|
||||
{0x0b92, 0x0bd7, 0x0b94},
|
||||
{0x0bc6, 0x0bbe, 0x0bca},
|
||||
{0x0bc6, 0x0bd7, 0x0bcc},
|
||||
{0x0bc7, 0x0bbe, 0x0bcb},
|
||||
{0x0c46, 0x0c56, 0x0c48},
|
||||
{0x0cbf, 0x0cd5, 0x0cc0},
|
||||
{0x0cc6, 0x0cc2, 0x0cca},
|
||||
{0x0cc6, 0x0cd5, 0x0cc7},
|
||||
{0x0cc6, 0x0cd6, 0x0cc8},
|
||||
{0x0cca, 0x0cd5, 0x0ccb},
|
||||
{0x0d46, 0x0d3e, 0x0d4a},
|
||||
{0x0d46, 0x0d57, 0x0d4c},
|
||||
{0x0d47, 0x0d3e, 0x0d4b},
|
||||
{0x0dd9, 0x0dca, 0x0dda},
|
||||
{0x0dd9, 0x0dcf, 0x0ddc},
|
||||
{0x0dd9, 0x0ddf, 0x0dde},
|
||||
{0x0ddc, 0x0dca, 0x0ddd},
|
||||
{0x1025, 0x102e, 0x1026},
|
||||
{0x1b05, 0x1b35, 0x1b06},
|
||||
{0x1b07, 0x1b35, 0x1b08},
|
||||
{0x1b09, 0x1b35, 0x1b0a},
|
||||
{0x1b0b, 0x1b35, 0x1b0c},
|
||||
{0x1b0d, 0x1b35, 0x1b0e},
|
||||
{0x1b11, 0x1b35, 0x1b12},
|
||||
{0x1b3a, 0x1b35, 0x1b3b},
|
||||
{0x1b3c, 0x1b35, 0x1b3d},
|
||||
{0x1b3e, 0x1b35, 0x1b40},
|
||||
{0x1b3f, 0x1b35, 0x1b41},
|
||||
{0x1b42, 0x1b35, 0x1b43},
|
||||
{0x1e36, 0x0304, 0x1e38},
|
||||
{0x1e37, 0x0304, 0x1e39},
|
||||
{0x1e5a, 0x0304, 0x1e5c},
|
||||
{0x1e5b, 0x0304, 0x1e5d},
|
||||
{0x1e62, 0x0307, 0x1e68},
|
||||
{0x1e63, 0x0307, 0x1e69},
|
||||
{0x1ea0, 0x0302, 0x1eac},
|
||||
{0x1ea0, 0x0306, 0x1eb6},
|
||||
{0x1ea1, 0x0302, 0x1ead},
|
||||
{0x1ea1, 0x0306, 0x1eb7},
|
||||
{0x1eb8, 0x0302, 0x1ec6},
|
||||
{0x1eb9, 0x0302, 0x1ec7},
|
||||
{0x1ecc, 0x0302, 0x1ed8},
|
||||
{0x1ecd, 0x0302, 0x1ed9},
|
||||
{0x1f00, 0x0300, 0x1f02},
|
||||
{0x1f00, 0x0301, 0x1f04},
|
||||
{0x1f00, 0x0342, 0x1f06},
|
||||
{0x1f00, 0x0345, 0x1f80},
|
||||
{0x1f01, 0x0300, 0x1f03},
|
||||
{0x1f01, 0x0301, 0x1f05},
|
||||
{0x1f01, 0x0342, 0x1f07},
|
||||
{0x1f01, 0x0345, 0x1f81},
|
||||
{0x1f02, 0x0345, 0x1f82},
|
||||
{0x1f03, 0x0345, 0x1f83},
|
||||
{0x1f04, 0x0345, 0x1f84},
|
||||
{0x1f05, 0x0345, 0x1f85},
|
||||
{0x1f06, 0x0345, 0x1f86},
|
||||
{0x1f07, 0x0345, 0x1f87},
|
||||
{0x1f08, 0x0300, 0x1f0a},
|
||||
{0x1f08, 0x0301, 0x1f0c},
|
||||
{0x1f08, 0x0342, 0x1f0e},
|
||||
{0x1f08, 0x0345, 0x1f88},
|
||||
{0x1f09, 0x0300, 0x1f0b},
|
||||
{0x1f09, 0x0301, 0x1f0d},
|
||||
{0x1f09, 0x0342, 0x1f0f},
|
||||
{0x1f09, 0x0345, 0x1f89},
|
||||
{0x1f0a, 0x0345, 0x1f8a},
|
||||
{0x1f0b, 0x0345, 0x1f8b},
|
||||
{0x1f0c, 0x0345, 0x1f8c},
|
||||
{0x1f0d, 0x0345, 0x1f8d},
|
||||
{0x1f0e, 0x0345, 0x1f8e},
|
||||
{0x1f0f, 0x0345, 0x1f8f},
|
||||
{0x1f10, 0x0300, 0x1f12},
|
||||
{0x1f10, 0x0301, 0x1f14},
|
||||
{0x1f11, 0x0300, 0x1f13},
|
||||
{0x1f11, 0x0301, 0x1f15},
|
||||
{0x1f18, 0x0300, 0x1f1a},
|
||||
{0x1f18, 0x0301, 0x1f1c},
|
||||
{0x1f19, 0x0300, 0x1f1b},
|
||||
{0x1f19, 0x0301, 0x1f1d},
|
||||
{0x1f20, 0x0300, 0x1f22},
|
||||
{0x1f20, 0x0301, 0x1f24},
|
||||
{0x1f20, 0x0342, 0x1f26},
|
||||
{0x1f20, 0x0345, 0x1f90},
|
||||
{0x1f21, 0x0300, 0x1f23},
|
||||
{0x1f21, 0x0301, 0x1f25},
|
||||
{0x1f21, 0x0342, 0x1f27},
|
||||
{0x1f21, 0x0345, 0x1f91},
|
||||
{0x1f22, 0x0345, 0x1f92},
|
||||
{0x1f23, 0x0345, 0x1f93},
|
||||
{0x1f24, 0x0345, 0x1f94},
|
||||
{0x1f25, 0x0345, 0x1f95},
|
||||
{0x1f26, 0x0345, 0x1f96},
|
||||
{0x1f27, 0x0345, 0x1f97},
|
||||
{0x1f28, 0x0300, 0x1f2a},
|
||||
{0x1f28, 0x0301, 0x1f2c},
|
||||
{0x1f28, 0x0342, 0x1f2e},
|
||||
{0x1f28, 0x0345, 0x1f98},
|
||||
{0x1f29, 0x0300, 0x1f2b},
|
||||
{0x1f29, 0x0301, 0x1f2d},
|
||||
{0x1f29, 0x0342, 0x1f2f},
|
||||
{0x1f29, 0x0345, 0x1f99},
|
||||
{0x1f2a, 0x0345, 0x1f9a},
|
||||
{0x1f2b, 0x0345, 0x1f9b},
|
||||
{0x1f2c, 0x0345, 0x1f9c},
|
||||
{0x1f2d, 0x0345, 0x1f9d},
|
||||
{0x1f2e, 0x0345, 0x1f9e},
|
||||
{0x1f2f, 0x0345, 0x1f9f},
|
||||
{0x1f30, 0x0300, 0x1f32},
|
||||
{0x1f30, 0x0301, 0x1f34},
|
||||
{0x1f30, 0x0342, 0x1f36},
|
||||
{0x1f31, 0x0300, 0x1f33},
|
||||
{0x1f31, 0x0301, 0x1f35},
|
||||
{0x1f31, 0x0342, 0x1f37},
|
||||
{0x1f38, 0x0300, 0x1f3a},
|
||||
{0x1f38, 0x0301, 0x1f3c},
|
||||
{0x1f38, 0x0342, 0x1f3e},
|
||||
{0x1f39, 0x0300, 0x1f3b},
|
||||
{0x1f39, 0x0301, 0x1f3d},
|
||||
{0x1f39, 0x0342, 0x1f3f},
|
||||
{0x1f40, 0x0300, 0x1f42},
|
||||
{0x1f40, 0x0301, 0x1f44},
|
||||
{0x1f41, 0x0300, 0x1f43},
|
||||
{0x1f41, 0x0301, 0x1f45},
|
||||
{0x1f48, 0x0300, 0x1f4a},
|
||||
{0x1f48, 0x0301, 0x1f4c},
|
||||
{0x1f49, 0x0300, 0x1f4b},
|
||||
{0x1f49, 0x0301, 0x1f4d},
|
||||
{0x1f50, 0x0300, 0x1f52},
|
||||
{0x1f50, 0x0301, 0x1f54},
|
||||
{0x1f50, 0x0342, 0x1f56},
|
||||
{0x1f51, 0x0300, 0x1f53},
|
||||
{0x1f51, 0x0301, 0x1f55},
|
||||
{0x1f51, 0x0342, 0x1f57},
|
||||
{0x1f59, 0x0300, 0x1f5b},
|
||||
{0x1f59, 0x0301, 0x1f5d},
|
||||
{0x1f59, 0x0342, 0x1f5f},
|
||||
{0x1f60, 0x0300, 0x1f62},
|
||||
{0x1f60, 0x0301, 0x1f64},
|
||||
{0x1f60, 0x0342, 0x1f66},
|
||||
{0x1f60, 0x0345, 0x1fa0},
|
||||
{0x1f61, 0x0300, 0x1f63},
|
||||
{0x1f61, 0x0301, 0x1f65},
|
||||
{0x1f61, 0x0342, 0x1f67},
|
||||
{0x1f61, 0x0345, 0x1fa1},
|
||||
{0x1f62, 0x0345, 0x1fa2},
|
||||
{0x1f63, 0x0345, 0x1fa3},
|
||||
{0x1f64, 0x0345, 0x1fa4},
|
||||
{0x1f65, 0x0345, 0x1fa5},
|
||||
{0x1f66, 0x0345, 0x1fa6},
|
||||
{0x1f67, 0x0345, 0x1fa7},
|
||||
{0x1f68, 0x0300, 0x1f6a},
|
||||
{0x1f68, 0x0301, 0x1f6c},
|
||||
{0x1f68, 0x0342, 0x1f6e},
|
||||
{0x1f68, 0x0345, 0x1fa8},
|
||||
{0x1f69, 0x0300, 0x1f6b},
|
||||
{0x1f69, 0x0301, 0x1f6d},
|
||||
{0x1f69, 0x0342, 0x1f6f},
|
||||
{0x1f69, 0x0345, 0x1fa9},
|
||||
{0x1f6a, 0x0345, 0x1faa},
|
||||
{0x1f6b, 0x0345, 0x1fab},
|
||||
{0x1f6c, 0x0345, 0x1fac},
|
||||
{0x1f6d, 0x0345, 0x1fad},
|
||||
{0x1f6e, 0x0345, 0x1fae},
|
||||
{0x1f6f, 0x0345, 0x1faf},
|
||||
{0x1f70, 0x0345, 0x1fb2},
|
||||
{0x1f74, 0x0345, 0x1fc2},
|
||||
{0x1f7c, 0x0345, 0x1ff2},
|
||||
{0x1fb6, 0x0345, 0x1fb7},
|
||||
{0x1fbf, 0x0300, 0x1fcd},
|
||||
{0x1fbf, 0x0301, 0x1fce},
|
||||
{0x1fbf, 0x0342, 0x1fcf},
|
||||
{0x1fc6, 0x0345, 0x1fc7},
|
||||
{0x1ff6, 0x0345, 0x1ff7},
|
||||
{0x1ffe, 0x0300, 0x1fdd},
|
||||
{0x1ffe, 0x0301, 0x1fde},
|
||||
{0x1ffe, 0x0342, 0x1fdf},
|
||||
{0x2190, 0x0338, 0x219a},
|
||||
{0x2192, 0x0338, 0x219b},
|
||||
{0x2194, 0x0338, 0x21ae},
|
||||
{0x21d0, 0x0338, 0x21cd},
|
||||
{0x21d2, 0x0338, 0x21cf},
|
||||
{0x21d4, 0x0338, 0x21ce},
|
||||
{0x2203, 0x0338, 0x2204},
|
||||
{0x2208, 0x0338, 0x2209},
|
||||
{0x220b, 0x0338, 0x220c},
|
||||
{0x2223, 0x0338, 0x2224},
|
||||
{0x2225, 0x0338, 0x2226},
|
||||
{0x223c, 0x0338, 0x2241},
|
||||
{0x2243, 0x0338, 0x2244},
|
||||
{0x2245, 0x0338, 0x2247},
|
||||
{0x2248, 0x0338, 0x2249},
|
||||
{0x224d, 0x0338, 0x226d},
|
||||
{0x2261, 0x0338, 0x2262},
|
||||
{0x2264, 0x0338, 0x2270},
|
||||
{0x2265, 0x0338, 0x2271},
|
||||
{0x2272, 0x0338, 0x2274},
|
||||
{0x2273, 0x0338, 0x2275},
|
||||
{0x2276, 0x0338, 0x2278},
|
||||
{0x2277, 0x0338, 0x2279},
|
||||
{0x227a, 0x0338, 0x2280},
|
||||
{0x227b, 0x0338, 0x2281},
|
||||
{0x227c, 0x0338, 0x22e0},
|
||||
{0x227d, 0x0338, 0x22e1},
|
||||
{0x2282, 0x0338, 0x2284},
|
||||
{0x2283, 0x0338, 0x2285},
|
||||
{0x2286, 0x0338, 0x2288},
|
||||
{0x2287, 0x0338, 0x2289},
|
||||
{0x2291, 0x0338, 0x22e2},
|
||||
{0x2292, 0x0338, 0x22e3},
|
||||
{0x22a2, 0x0338, 0x22ac},
|
||||
{0x22a8, 0x0338, 0x22ad},
|
||||
{0x22a9, 0x0338, 0x22ae},
|
||||
{0x22ab, 0x0338, 0x22af},
|
||||
{0x22b2, 0x0338, 0x22ea},
|
||||
{0x22b3, 0x0338, 0x22eb},
|
||||
{0x22b4, 0x0338, 0x22ec},
|
||||
{0x22b5, 0x0338, 0x22ed},
|
||||
{0x3046, 0x3099, 0x3094},
|
||||
{0x304b, 0x3099, 0x304c},
|
||||
{0x304d, 0x3099, 0x304e},
|
||||
{0x304f, 0x3099, 0x3050},
|
||||
{0x3051, 0x3099, 0x3052},
|
||||
{0x3053, 0x3099, 0x3054},
|
||||
{0x3055, 0x3099, 0x3056},
|
||||
{0x3057, 0x3099, 0x3058},
|
||||
{0x3059, 0x3099, 0x305a},
|
||||
{0x305b, 0x3099, 0x305c},
|
||||
{0x305d, 0x3099, 0x305e},
|
||||
{0x305f, 0x3099, 0x3060},
|
||||
{0x3061, 0x3099, 0x3062},
|
||||
{0x3064, 0x3099, 0x3065},
|
||||
{0x3066, 0x3099, 0x3067},
|
||||
{0x3068, 0x3099, 0x3069},
|
||||
{0x306f, 0x3099, 0x3070},
|
||||
{0x306f, 0x309a, 0x3071},
|
||||
{0x3072, 0x3099, 0x3073},
|
||||
{0x3072, 0x309a, 0x3074},
|
||||
{0x3075, 0x3099, 0x3076},
|
||||
{0x3075, 0x309a, 0x3077},
|
||||
{0x3078, 0x3099, 0x3079},
|
||||
{0x3078, 0x309a, 0x307a},
|
||||
{0x307b, 0x3099, 0x307c},
|
||||
{0x307b, 0x309a, 0x307d},
|
||||
{0x309d, 0x3099, 0x309e},
|
||||
{0x30a6, 0x3099, 0x30f4},
|
||||
{0x30ab, 0x3099, 0x30ac},
|
||||
{0x30ad, 0x3099, 0x30ae},
|
||||
{0x30af, 0x3099, 0x30b0},
|
||||
{0x30b1, 0x3099, 0x30b2},
|
||||
{0x30b3, 0x3099, 0x30b4},
|
||||
{0x30b5, 0x3099, 0x30b6},
|
||||
{0x30b7, 0x3099, 0x30b8},
|
||||
{0x30b9, 0x3099, 0x30ba},
|
||||
{0x30bb, 0x3099, 0x30bc},
|
||||
{0x30bd, 0x3099, 0x30be},
|
||||
{0x30bf, 0x3099, 0x30c0},
|
||||
{0x30c1, 0x3099, 0x30c2},
|
||||
{0x30c4, 0x3099, 0x30c5},
|
||||
{0x30c6, 0x3099, 0x30c7},
|
||||
{0x30c8, 0x3099, 0x30c9},
|
||||
{0x30cf, 0x3099, 0x30d0},
|
||||
{0x30cf, 0x309a, 0x30d1},
|
||||
{0x30d2, 0x3099, 0x30d3},
|
||||
{0x30d2, 0x309a, 0x30d4},
|
||||
{0x30d5, 0x3099, 0x30d6},
|
||||
{0x30d5, 0x309a, 0x30d7},
|
||||
{0x30d8, 0x3099, 0x30d9},
|
||||
{0x30d8, 0x309a, 0x30da},
|
||||
{0x30db, 0x3099, 0x30dc},
|
||||
{0x30db, 0x309a, 0x30dd},
|
||||
{0x30ef, 0x3099, 0x30f7},
|
||||
{0x30f0, 0x3099, 0x30f8},
|
||||
{0x30f1, 0x3099, 0x30f9},
|
||||
{0x30f2, 0x3099, 0x30fa},
|
||||
{0x30fd, 0x3099, 0x30fe},
|
||||
{0x11099, 0x110ba, 0x1109a},
|
||||
{0x1109b, 0x110ba, 0x1109c},
|
||||
{0x110a5, 0x110ba, 0x110ab},
|
||||
{0x11131, 0x11127, 0x1112e},
|
||||
{0x11132, 0x11127, 0x1112f},
|
||||
{0x11347, 0x1133e, 0x1134b},
|
||||
{0x11347, 0x11357, 0x1134c},
|
||||
{0x114b9, 0x114b0, 0x114bc},
|
||||
{0x114b9, 0x114ba, 0x114bb},
|
||||
{0x114b9, 0x114bd, 0x114be},
|
||||
{0x115b8, 0x115af, 0x115ba},
|
||||
{0x115b9, 0x115af, 0x115bb},
|
||||
{0x11935, 0x11930, 0x11938},
|
2071
unicode/canonical_decomp.h
Normal file
2071
unicode/canonical_decomp.h
Normal file
File diff suppressed because it is too large
Load Diff
398
unicode/combining_classes.h
Normal file
398
unicode/combining_classes.h
Normal file
@ -0,0 +1,398 @@
|
||||
/*
|
||||
* Autogenerated by read_ucd.py from The Unicode Standard 15.0.0
|
||||
*
|
||||
* List the canonical combining class of each Unicode character, if it is
|
||||
* not zero. This controls how combining marks can be reordered by the
|
||||
* Unicode normalisation algorithms.
|
||||
*
|
||||
* Used by utils/unicode-norm.c.
|
||||
*/
|
||||
|
||||
{0x0300, 0x0314, 230},
|
||||
{0x0315, 0x0315, 232},
|
||||
{0x0316, 0x0319, 220},
|
||||
{0x031a, 0x031a, 232},
|
||||
{0x031b, 0x031b, 216},
|
||||
{0x031c, 0x0320, 220},
|
||||
{0x0321, 0x0322, 202},
|
||||
{0x0323, 0x0326, 220},
|
||||
{0x0327, 0x0328, 202},
|
||||
{0x0329, 0x0333, 220},
|
||||
{0x0334, 0x0338, 1},
|
||||
{0x0339, 0x033c, 220},
|
||||
{0x033d, 0x0344, 230},
|
||||
{0x0345, 0x0345, 240},
|
||||
{0x0346, 0x0346, 230},
|
||||
{0x0347, 0x0349, 220},
|
||||
{0x034a, 0x034c, 230},
|
||||
{0x034d, 0x034e, 220},
|
||||
{0x0350, 0x0352, 230},
|
||||
{0x0353, 0x0356, 220},
|
||||
{0x0357, 0x0357, 230},
|
||||
{0x0358, 0x0358, 232},
|
||||
{0x0359, 0x035a, 220},
|
||||
{0x035b, 0x035b, 230},
|
||||
{0x035c, 0x035c, 233},
|
||||
{0x035d, 0x035e, 234},
|
||||
{0x035f, 0x035f, 233},
|
||||
{0x0360, 0x0361, 234},
|
||||
{0x0362, 0x0362, 233},
|
||||
{0x0363, 0x036f, 230},
|
||||
{0x0483, 0x0487, 230},
|
||||
{0x0591, 0x0591, 220},
|
||||
{0x0592, 0x0595, 230},
|
||||
{0x0596, 0x0596, 220},
|
||||
{0x0597, 0x0599, 230},
|
||||
{0x059a, 0x059a, 222},
|
||||
{0x059b, 0x059b, 220},
|
||||
{0x059c, 0x05a1, 230},
|
||||
{0x05a2, 0x05a7, 220},
|
||||
{0x05a8, 0x05a9, 230},
|
||||
{0x05aa, 0x05aa, 220},
|
||||
{0x05ab, 0x05ac, 230},
|
||||
{0x05ad, 0x05ad, 222},
|
||||
{0x05ae, 0x05ae, 228},
|
||||
{0x05af, 0x05af, 230},
|
||||
{0x05b0, 0x05b0, 10},
|
||||
{0x05b1, 0x05b1, 11},
|
||||
{0x05b2, 0x05b2, 12},
|
||||
{0x05b3, 0x05b3, 13},
|
||||
{0x05b4, 0x05b4, 14},
|
||||
{0x05b5, 0x05b5, 15},
|
||||
{0x05b6, 0x05b6, 16},
|
||||
{0x05b7, 0x05b7, 17},
|
||||
{0x05b8, 0x05b8, 18},
|
||||
{0x05b9, 0x05ba, 19},
|
||||
{0x05bb, 0x05bb, 20},
|
||||
{0x05bc, 0x05bc, 21},
|
||||
{0x05bd, 0x05bd, 22},
|
||||
{0x05bf, 0x05bf, 23},
|
||||
{0x05c1, 0x05c1, 24},
|
||||
{0x05c2, 0x05c2, 25},
|
||||
{0x05c4, 0x05c4, 230},
|
||||
{0x05c5, 0x05c5, 220},
|
||||
{0x05c7, 0x05c7, 18},
|
||||
{0x0610, 0x0617, 230},
|
||||
{0x0618, 0x0618, 30},
|
||||
{0x0619, 0x0619, 31},
|
||||
{0x061a, 0x061a, 32},
|
||||
{0x064b, 0x064b, 27},
|
||||
{0x064c, 0x064c, 28},
|
||||
{0x064d, 0x064d, 29},
|
||||
{0x064e, 0x064e, 30},
|
||||
{0x064f, 0x064f, 31},
|
||||
{0x0650, 0x0650, 32},
|
||||
{0x0651, 0x0651, 33},
|
||||
{0x0652, 0x0652, 34},
|
||||
{0x0653, 0x0654, 230},
|
||||
{0x0655, 0x0656, 220},
|
||||
{0x0657, 0x065b, 230},
|
||||
{0x065c, 0x065c, 220},
|
||||
{0x065d, 0x065e, 230},
|
||||
{0x065f, 0x065f, 220},
|
||||
{0x0670, 0x0670, 35},
|
||||
{0x06d6, 0x06dc, 230},
|
||||
{0x06df, 0x06e2, 230},
|
||||
{0x06e3, 0x06e3, 220},
|
||||
{0x06e4, 0x06e4, 230},
|
||||
{0x06e7, 0x06e8, 230},
|
||||
{0x06ea, 0x06ea, 220},
|
||||
{0x06eb, 0x06ec, 230},
|
||||
{0x06ed, 0x06ed, 220},
|
||||
{0x0711, 0x0711, 36},
|
||||
{0x0730, 0x0730, 230},
|
||||
{0x0731, 0x0731, 220},
|
||||
{0x0732, 0x0733, 230},
|
||||
{0x0734, 0x0734, 220},
|
||||
{0x0735, 0x0736, 230},
|
||||
{0x0737, 0x0739, 220},
|
||||
{0x073a, 0x073a, 230},
|
||||
{0x073b, 0x073c, 220},
|
||||
{0x073d, 0x073d, 230},
|
||||
{0x073e, 0x073e, 220},
|
||||
{0x073f, 0x0741, 230},
|
||||
{0x0742, 0x0742, 220},
|
||||
{0x0743, 0x0743, 230},
|
||||
{0x0744, 0x0744, 220},
|
||||
{0x0745, 0x0745, 230},
|
||||
{0x0746, 0x0746, 220},
|
||||
{0x0747, 0x0747, 230},
|
||||
{0x0748, 0x0748, 220},
|
||||
{0x0749, 0x074a, 230},
|
||||
{0x07eb, 0x07f1, 230},
|
||||
{0x07f2, 0x07f2, 220},
|
||||
{0x07f3, 0x07f3, 230},
|
||||
{0x07fd, 0x07fd, 220},
|
||||
{0x0816, 0x0819, 230},
|
||||
{0x081b, 0x0823, 230},
|
||||
{0x0825, 0x0827, 230},
|
||||
{0x0829, 0x082d, 230},
|
||||
{0x0859, 0x085b, 220},
|
||||
{0x0898, 0x0898, 230},
|
||||
{0x0899, 0x089b, 220},
|
||||
{0x089c, 0x089f, 230},
|
||||
{0x08ca, 0x08ce, 230},
|
||||
{0x08cf, 0x08d3, 220},
|
||||
{0x08d4, 0x08e1, 230},
|
||||
{0x08e3, 0x08e3, 220},
|
||||
{0x08e4, 0x08e5, 230},
|
||||
{0x08e6, 0x08e6, 220},
|
||||
{0x08e7, 0x08e8, 230},
|
||||
{0x08e9, 0x08e9, 220},
|
||||
{0x08ea, 0x08ec, 230},
|
||||
{0x08ed, 0x08ef, 220},
|
||||
{0x08f0, 0x08f0, 27},
|
||||
{0x08f1, 0x08f1, 28},
|
||||
{0x08f2, 0x08f2, 29},
|
||||
{0x08f3, 0x08f5, 230},
|
||||
{0x08f6, 0x08f6, 220},
|
||||
{0x08f7, 0x08f8, 230},
|
||||
{0x08f9, 0x08fa, 220},
|
||||
{0x08fb, 0x08ff, 230},
|
||||
{0x093c, 0x093c, 7},
|
||||
{0x094d, 0x094d, 9},
|
||||
{0x0951, 0x0951, 230},
|
||||
{0x0952, 0x0952, 220},
|
||||
{0x0953, 0x0954, 230},
|
||||
{0x09bc, 0x09bc, 7},
|
||||
{0x09cd, 0x09cd, 9},
|
||||
{0x09fe, 0x09fe, 230},
|
||||
{0x0a3c, 0x0a3c, 7},
|
||||
{0x0a4d, 0x0a4d, 9},
|
||||
{0x0abc, 0x0abc, 7},
|
||||
{0x0acd, 0x0acd, 9},
|
||||
{0x0b3c, 0x0b3c, 7},
|
||||
{0x0b4d, 0x0b4d, 9},
|
||||
{0x0bcd, 0x0bcd, 9},
|
||||
{0x0c3c, 0x0c3c, 7},
|
||||
{0x0c4d, 0x0c4d, 9},
|
||||
{0x0c55, 0x0c55, 84},
|
||||
{0x0c56, 0x0c56, 91},
|
||||
{0x0cbc, 0x0cbc, 7},
|
||||
{0x0ccd, 0x0ccd, 9},
|
||||
{0x0d3b, 0x0d3c, 9},
|
||||
{0x0d4d, 0x0d4d, 9},
|
||||
{0x0dca, 0x0dca, 9},
|
||||
{0x0e38, 0x0e39, 103},
|
||||
{0x0e3a, 0x0e3a, 9},
|
||||
{0x0e48, 0x0e4b, 107},
|
||||
{0x0eb8, 0x0eb9, 118},
|
||||
{0x0eba, 0x0eba, 9},
|
||||
{0x0ec8, 0x0ecb, 122},
|
||||
{0x0f18, 0x0f19, 220},
|
||||
{0x0f35, 0x0f35, 220},
|
||||
{0x0f37, 0x0f37, 220},
|
||||
{0x0f39, 0x0f39, 216},
|
||||
{0x0f71, 0x0f71, 129},
|
||||
{0x0f72, 0x0f72, 130},
|
||||
{0x0f74, 0x0f74, 132},
|
||||
{0x0f7a, 0x0f7d, 130},
|
||||
{0x0f80, 0x0f80, 130},
|
||||
{0x0f82, 0x0f83, 230},
|
||||
{0x0f84, 0x0f84, 9},
|
||||
{0x0f86, 0x0f87, 230},
|
||||
{0x0fc6, 0x0fc6, 220},
|
||||
{0x1037, 0x1037, 7},
|
||||
{0x1039, 0x103a, 9},
|
||||
{0x108d, 0x108d, 220},
|
||||
{0x135d, 0x135f, 230},
|
||||
{0x1714, 0x1715, 9},
|
||||
{0x1734, 0x1734, 9},
|
||||
{0x17d2, 0x17d2, 9},
|
||||
{0x17dd, 0x17dd, 230},
|
||||
{0x18a9, 0x18a9, 228},
|
||||
{0x1939, 0x1939, 222},
|
||||
{0x193a, 0x193a, 230},
|
||||
{0x193b, 0x193b, 220},
|
||||
{0x1a17, 0x1a17, 230},
|
||||
{0x1a18, 0x1a18, 220},
|
||||
{0x1a60, 0x1a60, 9},
|
||||
{0x1a75, 0x1a7c, 230},
|
||||
{0x1a7f, 0x1a7f, 220},
|
||||
{0x1ab0, 0x1ab4, 230},
|
||||
{0x1ab5, 0x1aba, 220},
|
||||
{0x1abb, 0x1abc, 230},
|
||||
{0x1abd, 0x1abd, 220},
|
||||
{0x1abf, 0x1ac0, 220},
|
||||
{0x1ac1, 0x1ac2, 230},
|
||||
{0x1ac3, 0x1ac4, 220},
|
||||
{0x1ac5, 0x1ac9, 230},
|
||||
{0x1aca, 0x1aca, 220},
|
||||
{0x1acb, 0x1ace, 230},
|
||||
{0x1b34, 0x1b34, 7},
|
||||
{0x1b44, 0x1b44, 9},
|
||||
{0x1b6b, 0x1b6b, 230},
|
||||
{0x1b6c, 0x1b6c, 220},
|
||||
{0x1b6d, 0x1b73, 230},
|
||||
{0x1baa, 0x1bab, 9},
|
||||
{0x1be6, 0x1be6, 7},
|
||||
{0x1bf2, 0x1bf3, 9},
|
||||
{0x1c37, 0x1c37, 7},
|
||||
{0x1cd0, 0x1cd2, 230},
|
||||
{0x1cd4, 0x1cd4, 1},
|
||||
{0x1cd5, 0x1cd9, 220},
|
||||
{0x1cda, 0x1cdb, 230},
|
||||
{0x1cdc, 0x1cdf, 220},
|
||||
{0x1ce0, 0x1ce0, 230},
|
||||
{0x1ce2, 0x1ce8, 1},
|
||||
{0x1ced, 0x1ced, 220},
|
||||
{0x1cf4, 0x1cf4, 230},
|
||||
{0x1cf8, 0x1cf9, 230},
|
||||
{0x1dc0, 0x1dc1, 230},
|
||||
{0x1dc2, 0x1dc2, 220},
|
||||
{0x1dc3, 0x1dc9, 230},
|
||||
{0x1dca, 0x1dca, 220},
|
||||
{0x1dcb, 0x1dcc, 230},
|
||||
{0x1dcd, 0x1dcd, 234},
|
||||
{0x1dce, 0x1dce, 214},
|
||||
{0x1dcf, 0x1dcf, 220},
|
||||
{0x1dd0, 0x1dd0, 202},
|
||||
{0x1dd1, 0x1df5, 230},
|
||||
{0x1df6, 0x1df6, 232},
|
||||
{0x1df7, 0x1df8, 228},
|
||||
{0x1df9, 0x1df9, 220},
|
||||
{0x1dfa, 0x1dfa, 218},
|
||||
{0x1dfb, 0x1dfb, 230},
|
||||
{0x1dfc, 0x1dfc, 233},
|
||||
{0x1dfd, 0x1dfd, 220},
|
||||
{0x1dfe, 0x1dfe, 230},
|
||||
{0x1dff, 0x1dff, 220},
|
||||
{0x20d0, 0x20d1, 230},
|
||||
{0x20d2, 0x20d3, 1},
|
||||
{0x20d4, 0x20d7, 230},
|
||||
{0x20d8, 0x20da, 1},
|
||||
{0x20db, 0x20dc, 230},
|
||||
{0x20e1, 0x20e1, 230},
|
||||
{0x20e5, 0x20e6, 1},
|
||||
{0x20e7, 0x20e7, 230},
|
||||
{0x20e8, 0x20e8, 220},
|
||||
{0x20e9, 0x20e9, 230},
|
||||
{0x20ea, 0x20eb, 1},
|
||||
{0x20ec, 0x20ef, 220},
|
||||
{0x20f0, 0x20f0, 230},
|
||||
{0x2cef, 0x2cf1, 230},
|
||||
{0x2d7f, 0x2d7f, 9},
|
||||
{0x2de0, 0x2dff, 230},
|
||||
{0x302a, 0x302a, 218},
|
||||
{0x302b, 0x302b, 228},
|
||||
{0x302c, 0x302c, 232},
|
||||
{0x302d, 0x302d, 222},
|
||||
{0x302e, 0x302f, 224},
|
||||
{0x3099, 0x309a, 8},
|
||||
{0xa66f, 0xa66f, 230},
|
||||
{0xa674, 0xa67d, 230},
|
||||
{0xa69e, 0xa69f, 230},
|
||||
{0xa6f0, 0xa6f1, 230},
|
||||
{0xa806, 0xa806, 9},
|
||||
{0xa82c, 0xa82c, 9},
|
||||
{0xa8c4, 0xa8c4, 9},
|
||||
{0xa8e0, 0xa8f1, 230},
|
||||
{0xa92b, 0xa92d, 220},
|
||||
{0xa953, 0xa953, 9},
|
||||
{0xa9b3, 0xa9b3, 7},
|
||||
{0xa9c0, 0xa9c0, 9},
|
||||
{0xaab0, 0xaab0, 230},
|
||||
{0xaab2, 0xaab3, 230},
|
||||
{0xaab4, 0xaab4, 220},
|
||||
{0xaab7, 0xaab8, 230},
|
||||
{0xaabe, 0xaabf, 230},
|
||||
{0xaac1, 0xaac1, 230},
|
||||
{0xaaf6, 0xaaf6, 9},
|
||||
{0xabed, 0xabed, 9},
|
||||
{0xfb1e, 0xfb1e, 26},
|
||||
{0xfe20, 0xfe26, 230},
|
||||
{0xfe27, 0xfe2d, 220},
|
||||
{0xfe2e, 0xfe2f, 230},
|
||||
{0x101fd, 0x101fd, 220},
|
||||
{0x102e0, 0x102e0, 220},
|
||||
{0x10376, 0x1037a, 230},
|
||||
{0x10a0d, 0x10a0d, 220},
|
||||
{0x10a0f, 0x10a0f, 230},
|
||||
{0x10a38, 0x10a38, 230},
|
||||
{0x10a39, 0x10a39, 1},
|
||||
{0x10a3a, 0x10a3a, 220},
|
||||
{0x10a3f, 0x10a3f, 9},
|
||||
{0x10ae5, 0x10ae5, 230},
|
||||
{0x10ae6, 0x10ae6, 220},
|
||||
{0x10d24, 0x10d27, 230},
|
||||
{0x10eab, 0x10eac, 230},
|
||||
{0x10efd, 0x10eff, 220},
|
||||
{0x10f46, 0x10f47, 220},
|
||||
{0x10f48, 0x10f4a, 230},
|
||||
{0x10f4b, 0x10f4b, 220},
|
||||
{0x10f4c, 0x10f4c, 230},
|
||||
{0x10f4d, 0x10f50, 220},
|
||||
{0x10f82, 0x10f82, 230},
|
||||
{0x10f83, 0x10f83, 220},
|
||||
{0x10f84, 0x10f84, 230},
|
||||
{0x10f85, 0x10f85, 220},
|
||||
{0x11046, 0x11046, 9},
|
||||
{0x11070, 0x11070, 9},
|
||||
{0x1107f, 0x1107f, 9},
|
||||
{0x110b9, 0x110b9, 9},
|
||||
{0x110ba, 0x110ba, 7},
|
||||
{0x11100, 0x11102, 230},
|
||||
{0x11133, 0x11134, 9},
|
||||
{0x11173, 0x11173, 7},
|
||||
{0x111c0, 0x111c0, 9},
|
||||
{0x111ca, 0x111ca, 7},
|
||||
{0x11235, 0x11235, 9},
|
||||
{0x11236, 0x11236, 7},
|
||||
{0x112e9, 0x112e9, 7},
|
||||
{0x112ea, 0x112ea, 9},
|
||||
{0x1133b, 0x1133c, 7},
|
||||
{0x1134d, 0x1134d, 9},
|
||||
{0x11366, 0x1136c, 230},
|
||||
{0x11370, 0x11374, 230},
|
||||
{0x11442, 0x11442, 9},
|
||||
{0x11446, 0x11446, 7},
|
||||
{0x1145e, 0x1145e, 230},
|
||||
{0x114c2, 0x114c2, 9},
|
||||
{0x114c3, 0x114c3, 7},
|
||||
{0x115bf, 0x115bf, 9},
|
||||
{0x115c0, 0x115c0, 7},
|
||||
{0x1163f, 0x1163f, 9},
|
||||
{0x116b6, 0x116b6, 9},
|
||||
{0x116b7, 0x116b7, 7},
|
||||
{0x1172b, 0x1172b, 9},
|
||||
{0x11839, 0x11839, 9},
|
||||
{0x1183a, 0x1183a, 7},
|
||||
{0x1193d, 0x1193e, 9},
|
||||
{0x11943, 0x11943, 7},
|
||||
{0x119e0, 0x119e0, 9},
|
||||
{0x11a34, 0x11a34, 9},
|
||||
{0x11a47, 0x11a47, 9},
|
||||
{0x11a99, 0x11a99, 9},
|
||||
{0x11c3f, 0x11c3f, 9},
|
||||
{0x11d42, 0x11d42, 7},
|
||||
{0x11d44, 0x11d45, 9},
|
||||
{0x11d97, 0x11d97, 9},
|
||||
{0x11f41, 0x11f42, 9},
|
||||
{0x16af0, 0x16af4, 1},
|
||||
{0x16b30, 0x16b36, 230},
|
||||
{0x16ff0, 0x16ff1, 6},
|
||||
{0x1bc9e, 0x1bc9e, 1},
|
||||
{0x1d165, 0x1d166, 216},
|
||||
{0x1d167, 0x1d169, 1},
|
||||
{0x1d16d, 0x1d16d, 226},
|
||||
{0x1d16e, 0x1d172, 216},
|
||||
{0x1d17b, 0x1d182, 220},
|
||||
{0x1d185, 0x1d189, 230},
|
||||
{0x1d18a, 0x1d18b, 220},
|
||||
{0x1d1aa, 0x1d1ad, 230},
|
||||
{0x1d242, 0x1d244, 230},
|
||||
{0x1e000, 0x1e006, 230},
|
||||
{0x1e008, 0x1e018, 230},
|
||||
{0x1e01b, 0x1e021, 230},
|
||||
{0x1e023, 0x1e024, 230},
|
||||
{0x1e026, 0x1e02a, 230},
|
||||
{0x1e08f, 0x1e08f, 230},
|
||||
{0x1e130, 0x1e136, 230},
|
||||
{0x1e2ae, 0x1e2ae, 230},
|
||||
{0x1e2ec, 0x1e2ef, 230},
|
||||
{0x1e4ec, 0x1e4ed, 232},
|
||||
{0x1e4ee, 0x1e4ee, 220},
|
||||
{0x1e4ef, 0x1e4ef, 230},
|
||||
{0x1e8d0, 0x1e8d6, 220},
|
||||
{0x1e944, 0x1e949, 230},
|
||||
{0x1e94a, 0x1e94a, 7},
|
@ -20,7 +20,9 @@ import zipfile
|
||||
UCDRecord = collections.namedtuple('UCDRecord', [
|
||||
'c',
|
||||
'General_Category',
|
||||
'Canonical_Combining_Class',
|
||||
'Bidi_Class',
|
||||
'Decomposition_Type',
|
||||
'Decomposition_Mapping',
|
||||
])
|
||||
|
||||
@ -107,6 +109,12 @@ class Main:
|
||||
self.write_wide_chars_list(fh)
|
||||
with open("ambiguous_wide_chars.h", "w") as fh:
|
||||
self.write_ambiguous_wide_chars_list(fh)
|
||||
with open("combining_classes.h", "w") as fh:
|
||||
self.write_combining_class_table(fh)
|
||||
with open("canonical_decomp.h", "w") as fh:
|
||||
self.write_canonical_decomp_table(fh)
|
||||
with open("canonical_comp.h", "w") as fh:
|
||||
self.write_canonical_comp_table(fh)
|
||||
|
||||
def find_unicode_version(self):
|
||||
"""Find out the version of Unicode.
|
||||
@ -166,14 +174,21 @@ class Main:
|
||||
|
||||
# Decode some of the raw fields into more cooked
|
||||
# forms.
|
||||
cclass = int(cclass)
|
||||
|
||||
# For the moment, we only care about decomposition
|
||||
# mappings that consist of a single hex number (i.e.
|
||||
# are singletons and not compatibility mappings)
|
||||
try:
|
||||
dm = [int(decomp, 16)]
|
||||
except ValueError:
|
||||
dm = []
|
||||
# Separate the decomposition field into decomposition
|
||||
# type and mapping.
|
||||
if decomp == "":
|
||||
dtype = decomp = None
|
||||
elif "<" not in decomp:
|
||||
dtype = 'canonical'
|
||||
else:
|
||||
assert decomp.startswith("<")
|
||||
dtype, decomp = decomp[1:].split(">", 1)
|
||||
decomp = decomp.lstrip(" ")
|
||||
# And decode the mapping part from hex strings to integers.
|
||||
if decomp is not None:
|
||||
decomp = [int(w, 16) for w in decomp.split(" ")]
|
||||
|
||||
# And yield a UCDRecord for each code point in our
|
||||
# range.
|
||||
@ -181,8 +196,10 @@ class Main:
|
||||
yield UCDRecord(
|
||||
c=codepoint,
|
||||
General_Category=category,
|
||||
Canonical_Combining_Class=cclass,
|
||||
Bidi_Class=bidiclass,
|
||||
Decomposition_Mapping=dm,
|
||||
Decomposition_Type=dtype,
|
||||
Decomposition_Mapping=decomp,
|
||||
)
|
||||
|
||||
@property
|
||||
@ -231,6 +248,16 @@ class Main:
|
||||
for c in cs:
|
||||
yield c, fields[1]
|
||||
|
||||
@property
|
||||
def CompositionExclusions(self):
|
||||
"""Composition exclusions from CompositionExclusions.txt.
|
||||
|
||||
Each yielded item is just a code point.
|
||||
"""
|
||||
with self.open_ucd_file("CompositionExclusions.txt") as fh:
|
||||
for line in lines(fh):
|
||||
yield int(line, 16)
|
||||
|
||||
def write_file_header_comment(self, fh, description):
|
||||
print("/*", file=fh)
|
||||
print(" * Autogenerated by read_ucd.py from",
|
||||
@ -311,7 +338,8 @@ Used by terminal/bidi.c.
|
||||
|
||||
equivalents = {}
|
||||
for rec in self.UnicodeData:
|
||||
if len(rec.Decomposition_Mapping) == 1:
|
||||
if (rec.Decomposition_Type == 'canonical' and
|
||||
len(rec.Decomposition_Mapping) == 1):
|
||||
c = rec.c
|
||||
c2 = rec.Decomposition_Mapping[0]
|
||||
equivalents[c] = c2
|
||||
@ -389,5 +417,78 @@ Used by utils/wcwidth.c.
|
||||
""")
|
||||
self.write_width_table(fh, {'A'})
|
||||
|
||||
def write_combining_class_table(self, fh):
|
||||
self.write_file_header_comment(fh, """
|
||||
|
||||
List the canonical combining class of each Unicode character, if it is
|
||||
not zero. This controls how combining marks can be reordered by the
|
||||
Unicode normalisation algorithms.
|
||||
|
||||
Used by utils/unicode-norm.c.
|
||||
|
||||
""")
|
||||
cclasses = {}
|
||||
|
||||
for rec in self.UnicodeData:
|
||||
cc = rec.Canonical_Combining_Class
|
||||
if cc != 0:
|
||||
cclasses[rec.c] = cc
|
||||
|
||||
for (start, end), cclass in map_to_ranges(cclasses):
|
||||
print(f"{{0x{start:04x}, 0x{end:04x}, {cclass:d}}},", file=fh)
|
||||
|
||||
def write_canonical_decomp_table(self, fh):
|
||||
self.write_file_header_comment(fh, """
|
||||
|
||||
List the canonical decomposition of every Unicode character that has
|
||||
one. This consists of up to two characters, but those may need
|
||||
decomposition in turn.
|
||||
|
||||
Used by utils/unicode-norm.c.
|
||||
|
||||
""")
|
||||
decomps = {}
|
||||
|
||||
for rec in self.UnicodeData:
|
||||
if rec.Decomposition_Type != 'canonical':
|
||||
continue
|
||||
# Fill in a zero code point as the second character, if
|
||||
# it's only one character long
|
||||
decomps[rec.c] = (rec.Decomposition_Mapping + [0])[:2]
|
||||
|
||||
for c, (d1, d2) in sorted(decomps.items()):
|
||||
d2s = f"0x{d2:04x}" if d2 else "0"
|
||||
print(f"{{0x{c:04x}, 0x{d1:04x}, {d2s}}},", file=fh)
|
||||
|
||||
def write_canonical_comp_table(self, fh):
|
||||
self.write_file_header_comment(fh, """
|
||||
|
||||
List the pairs of Unicode characters that canonically recompose to a
|
||||
single character in NFC.
|
||||
|
||||
Used by utils/unicode-norm.c.
|
||||
|
||||
""")
|
||||
exclusions = set(self.CompositionExclusions)
|
||||
nonstarters = set(rec.c for rec in self.UnicodeData
|
||||
if rec.Canonical_Combining_Class != 0)
|
||||
|
||||
decomps = {}
|
||||
|
||||
for rec in self.UnicodeData:
|
||||
if rec.Decomposition_Type != 'canonical':
|
||||
continue # we don't want compatibility decompositions
|
||||
if len(rec.Decomposition_Mapping) != 2:
|
||||
continue # we don't want singletons either
|
||||
if rec.c in exclusions:
|
||||
continue # we don't want anything explicitly excluded
|
||||
if (rec.c in nonstarters or
|
||||
rec.Decomposition_Mapping[0] in nonstarters):
|
||||
continue # we don't want non-starter decompositions
|
||||
decomps[tuple(rec.Decomposition_Mapping)] = rec.c
|
||||
|
||||
for (d0, d1), c in sorted(decomps.items()):
|
||||
print(f"{{0x{d0:04x}, 0x{d1:04x}, 0x{c:04x}}},", file=fh)
|
||||
|
||||
if __name__ == '__main__':
|
||||
Main().run()
|
||||
|
@ -65,6 +65,7 @@ add_sources_from_current_dir(utils
|
||||
stripctrl.c
|
||||
tempseat.c
|
||||
tree234.c
|
||||
unicode-norm.c
|
||||
validate_manual_hostkey.c
|
||||
version.c
|
||||
wcwidth.c
|
||||
|
446
utils/unicode-norm.c
Normal file
446
utils/unicode-norm.c
Normal file
@ -0,0 +1,446 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "misc.h"
|
||||
|
||||
typedef uint32_t uchar;
|
||||
typedef int cclass_t;
|
||||
|
||||
/* A local uchar-oriented analogue of strbuf */
|
||||
typedef struct ucharbuf {
|
||||
uchar *buf;
|
||||
size_t len, size;
|
||||
} ucharbuf;
|
||||
|
||||
static ucharbuf *ucharbuf_new(void)
|
||||
{
|
||||
ucharbuf *ub = snew(ucharbuf);
|
||||
ub->buf = NULL;
|
||||
ub->len = ub->size = 0;
|
||||
return ub;
|
||||
}
|
||||
|
||||
static void ucharbuf_append(ucharbuf *ub, uchar c)
|
||||
{
|
||||
/* Use the _nm variant because this is used for passphrases */
|
||||
sgrowarray_nm(ub->buf, ub->size, ub->len);
|
||||
ub->buf[ub->len++] = c;
|
||||
}
|
||||
|
||||
static void ucharbuf_free(ucharbuf *ub)
|
||||
{
|
||||
if (ub->buf) {
|
||||
memset(ub->buf, 0, ub->size * sizeof(*ub->buf));
|
||||
sfree(ub->buf);
|
||||
}
|
||||
sfree(ub);
|
||||
}
|
||||
|
||||
/*
|
||||
* Constants relating to the arithmetic decomposition mapping of
|
||||
* Hangul to jamo, from section 3.12 of Unicode 15.0.0. The following
|
||||
* constant names match those in the spec.
|
||||
*/
|
||||
static const uchar SBase = 0xAC00;
|
||||
static const uchar LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
|
||||
static const uchar LCount = 19, VCount = 21, TCount = 28;
|
||||
static const uchar NCount = VCount * TCount, SCount = LCount * NCount;
|
||||
|
||||
static cclass_t combining_class(uchar c)
|
||||
{
|
||||
struct range {
|
||||
uchar start, end;
|
||||
cclass_t cclass;
|
||||
};
|
||||
static const struct range ranges[] = {
|
||||
#include "unicode/combining_classes.h"
|
||||
};
|
||||
|
||||
const struct range *start = ranges, *end = start + lenof(ranges);
|
||||
|
||||
while (end > start) {
|
||||
const struct range *curr = start + (end-start) / 2;
|
||||
if (c < curr->start)
|
||||
end = curr;
|
||||
else if (c > curr->end)
|
||||
start = curr + 1;
|
||||
else
|
||||
return curr->cclass;
|
||||
}
|
||||
|
||||
return 0;
|
||||
};
|
||||
|
||||
static unsigned decompose_char(uchar c, uchar *out)
|
||||
{
|
||||
struct decomp {
|
||||
uchar composed, dec0, dec1;
|
||||
};
|
||||
static const struct decomp decomps[] = {
|
||||
#include "unicode/canonical_decomp.h"
|
||||
};
|
||||
|
||||
if (c - SBase < SCount) {
|
||||
/* Arithmetically decompose a Hangul character into jamo */
|
||||
uchar SIndex = c - SBase;
|
||||
uchar LIndex = SIndex / NCount;
|
||||
uchar VIndex = SIndex % NCount / TCount;
|
||||
uchar TIndex = SIndex % TCount;
|
||||
|
||||
unsigned n = 0;
|
||||
out[n++] = LBase + LIndex;
|
||||
out[n++] = VBase + VIndex;
|
||||
if (TIndex)
|
||||
out[n++] = TBase + TIndex;
|
||||
return n;
|
||||
}
|
||||
|
||||
const struct decomp *start = decomps, *end = start + lenof(decomps);
|
||||
|
||||
while (end > start) {
|
||||
const struct decomp *curr = start + (end-start) / 2;
|
||||
if (c < curr->composed)
|
||||
end = curr;
|
||||
else if (c > curr->composed)
|
||||
start = curr + 1;
|
||||
else {
|
||||
out[0] = curr->dec0;
|
||||
if (curr->dec1) {
|
||||
out[1] = curr->dec1;
|
||||
return 2;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
};
|
||||
|
||||
static uchar compose_chars(uchar S, uchar C)
|
||||
{
|
||||
struct comp {
|
||||
uchar dec0, dec1, composed;
|
||||
};
|
||||
static const struct comp comps[] = {
|
||||
#include "unicode/canonical_comp.h"
|
||||
};
|
||||
|
||||
if (S - LBase < LCount && C - VBase < VCount) {
|
||||
/* Arithmetically compose an L and V jamo into a Hangul LV
|
||||
* character */
|
||||
return SBase + (S - LBase) * NCount + (C - VBase) * TCount;
|
||||
}
|
||||
|
||||
if (S - SBase < SCount && (S - SBase) % TCount == 0 &&
|
||||
C - TBase < TCount) {
|
||||
/* Arithmetically compose an LV Hangul character and a T jamo
|
||||
* into a Hangul LVT character */
|
||||
return S + C - TBase;
|
||||
}
|
||||
|
||||
const struct comp *start = comps, *end = start + lenof(comps);
|
||||
|
||||
while (end > start) {
|
||||
const struct comp *curr = start + (end-start) / 2;
|
||||
if (S < curr->dec0)
|
||||
end = curr;
|
||||
else if (S > curr->dec0)
|
||||
start = curr + 1;
|
||||
else if (C < curr->dec1)
|
||||
end = curr;
|
||||
else if (C > curr->dec1)
|
||||
start = curr + 1;
|
||||
else
|
||||
return curr->composed;
|
||||
}
|
||||
|
||||
return 0;
|
||||
};
|
||||
|
||||
/*
|
||||
* Recursively decompose a sequence of Unicode characters. The output
|
||||
* is written to 'out', as a sequence of native-byte-order uchar.
|
||||
*/
|
||||
static void recursively_decompose(const uchar *str, size_t len, ucharbuf *out)
|
||||
{
|
||||
uchar decomposed[3];
|
||||
|
||||
while (len-- > 0) {
|
||||
uchar c = *str++;
|
||||
unsigned n = decompose_char(c, decomposed);
|
||||
if (n == 0) {
|
||||
/* This character is indecomposable */
|
||||
ucharbuf_append(out, c);
|
||||
} else {
|
||||
/* This character has been decomposed into up to 3
|
||||
* characters, so we must now recursively decompose those */
|
||||
recursively_decompose(decomposed, n, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Reorder combining marks according to the Canonical Ordering
|
||||
* Algorithm (definition D109 in Unicode 15.0.0 section 3.11).
|
||||
*
|
||||
* The algorithm is phrased mechanistically, but the essence is: among
|
||||
* any contiguous sequence of combining marks (that is, characters
|
||||
* with cclass > 0), sort them by their cclass - but _stably_, i.e.
|
||||
* breaking ties in cclass by preserving the original order of the
|
||||
* characters in question.
|
||||
*/
|
||||
static void canonical_ordering(uchar *str, size_t len)
|
||||
{
|
||||
for (size_t i = 1; i < len; i++) {
|
||||
cclass_t cclass = combining_class(str[i]);
|
||||
if (cclass == 0)
|
||||
continue;
|
||||
|
||||
size_t j = i;
|
||||
while (j > 0 && combining_class(str[j-1]) > cclass) {
|
||||
uchar tmp = str[j-1];
|
||||
str[j-1] = str[j];
|
||||
str[j] = tmp;
|
||||
|
||||
j--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Canonically recompose characters according to the Canonical
|
||||
* Composition Algorithm (definition D117 in Unicode 15.0.0 section
|
||||
* 3.11).
|
||||
*/
|
||||
static size_t canonical_composition(uchar *str, size_t len)
|
||||
{
|
||||
const uchar *in = str;
|
||||
uchar *out = str;
|
||||
uchar *last_starter = NULL;
|
||||
cclass_t highest_cclass_between = -1;
|
||||
|
||||
while (len > 0) {
|
||||
len--;
|
||||
uchar c = *in++;
|
||||
cclass_t cclass = combining_class(c);
|
||||
|
||||
if (last_starter && highest_cclass_between < cclass) {
|
||||
uchar composed = compose_chars(*last_starter, c);
|
||||
if (composed) {
|
||||
*last_starter = composed;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (cclass == 0) {
|
||||
last_starter = out;
|
||||
highest_cclass_between = -1;
|
||||
} else if (cclass > highest_cclass_between) {
|
||||
highest_cclass_between = cclass;
|
||||
}
|
||||
|
||||
*out++ = c;
|
||||
}
|
||||
|
||||
return out - str;
|
||||
}
|
||||
|
||||
/*
|
||||
* Render a string into NFD.
|
||||
*/
|
||||
static ucharbuf *nfd(ucharbuf *input)
|
||||
{
|
||||
ucharbuf *output = ucharbuf_new();
|
||||
|
||||
/*
|
||||
* Definition D118 in Unicode 15.0.0 section 3.11, referring to
|
||||
* D68 in section 3.7: recursively decompose characters, then
|
||||
* reorder combining marks.
|
||||
*/
|
||||
recursively_decompose(input->buf, input->len, output);
|
||||
canonical_ordering(output->buf, output->len);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
/*
|
||||
* Render a string into NFC.
|
||||
*/
|
||||
static ucharbuf *nfc(ucharbuf *input)
|
||||
{
|
||||
/*
|
||||
* Definition D120 in Unicode 15.0.0 section 3.11: render the
|
||||
* string into NFD, then apply the canonical composition algorithm.
|
||||
*/
|
||||
ucharbuf *output = nfd(input);
|
||||
output->len = canonical_composition(output->buf, output->len);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a UTF-8 string into NFC, returning it as UTF-8 again.
|
||||
*/
|
||||
strbuf *utf8_to_nfc(ptrlen input)
|
||||
{
|
||||
BinarySource src[1];
|
||||
BinarySource_BARE_INIT_PL(src, input);
|
||||
|
||||
ucharbuf *inbuf = ucharbuf_new();
|
||||
while (get_avail(src))
|
||||
ucharbuf_append(inbuf, decode_utf8(src));
|
||||
|
||||
ucharbuf *outbuf = nfc(inbuf);
|
||||
|
||||
strbuf *output = strbuf_new_nm();
|
||||
for (size_t i = 0; i < outbuf->len; i++)
|
||||
put_utf8_char(output, outbuf->buf[i]);
|
||||
|
||||
ucharbuf_free(inbuf);
|
||||
ucharbuf_free(outbuf);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
#ifdef TEST
|
||||
void out_of_memory(void)
|
||||
{
|
||||
fprintf(stderr, "out of memory!\n");
|
||||
exit(2);
|
||||
}
|
||||
|
||||
static int pass, fail;
|
||||
|
||||
static void subtest(const char *filename, int lineno, const char *subdesc,
|
||||
char nftype, ucharbuf *input, ucharbuf *expected)
|
||||
{
|
||||
/*
|
||||
* Convert input into either NFC or NFD, and check it's equal to
|
||||
* expected
|
||||
*/
|
||||
ucharbuf *nf;
|
||||
switch (nftype) {
|
||||
case 'C':
|
||||
nf = nfc(input);
|
||||
break;
|
||||
case 'D':
|
||||
nf = nfd(input);
|
||||
break;
|
||||
default:
|
||||
unreachable("bad nftype");
|
||||
}
|
||||
|
||||
if (nf->len == expected->len && !memcmp(nf->buf, expected->buf, nf->len)) {
|
||||
pass++;
|
||||
} else {
|
||||
printf("%s:%d: failed %s: NF%c([", filename, lineno, subdesc, nftype);
|
||||
for (size_t pos = 0; pos < input->len; pos += sizeof(uchar))
|
||||
printf("%s%04X", pos ? " " : "", (unsigned)input->buf[pos]);
|
||||
printf("]) -> [");
|
||||
for (size_t pos = 0; pos < nf->len; pos += sizeof(uchar))
|
||||
printf("%s%04X", pos ? " " : "", (unsigned)nf->buf[pos]);
|
||||
printf("] != [");
|
||||
for (size_t pos = 0; pos < expected->len; pos += sizeof(uchar))
|
||||
printf("%s%04X", pos ? " " : "", (unsigned)expected->buf[pos]);
|
||||
printf("]\n");
|
||||
fail++;
|
||||
}
|
||||
|
||||
ucharbuf_free(nf);
|
||||
}
|
||||
|
||||
static void run_tests(const char *filename, FILE *fp)
|
||||
{
|
||||
for (int lineno = 1;; lineno++) {
|
||||
char *line = chomp(fgetline(fp));
|
||||
if (!line)
|
||||
break;
|
||||
|
||||
/* Strip section dividers which begin with @ */
|
||||
if (*line == '@') {
|
||||
sfree(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Strip comments, if any */
|
||||
ptrlen pl = ptrlen_from_asciz(line);
|
||||
{
|
||||
const char *p = memchr(pl.ptr, '#', pl.len);
|
||||
if (p)
|
||||
pl.len = p - (const char *)pl.ptr;
|
||||
}
|
||||
|
||||
/* Strip trailing space */
|
||||
while (pl.len > 0 &&
|
||||
(((char *)pl.ptr)[pl.len-1] == ' ' ||
|
||||
((char *)pl.ptr)[pl.len-1] == '\t'))
|
||||
pl.len--;
|
||||
|
||||
/* Skip empty lines */
|
||||
if (!pl.len) {
|
||||
sfree(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Break up at semicolons, expecting five fields, each of
|
||||
* which we decode into hex code points */
|
||||
ucharbuf *fields[5];
|
||||
for (size_t i = 0; i < lenof(fields); i++) {
|
||||
ptrlen field = ptrlen_get_word(&pl, ";");
|
||||
fields[i] = ucharbuf_new();
|
||||
|
||||
ptrlen chr;
|
||||
while ((chr = ptrlen_get_word(&field, " ")).len) {
|
||||
char *chrstr = mkstr(chr);
|
||||
uchar c = strtoul(chrstr, NULL, 16);
|
||||
sfree(chrstr);
|
||||
ucharbuf_append(fields[i], c);
|
||||
}
|
||||
}
|
||||
|
||||
subtest(filename, lineno, "NFC(c1) = c2", 'C', fields[0], fields[1]);
|
||||
subtest(filename, lineno, "NFC(c2) = c2", 'C', fields[1], fields[1]);
|
||||
subtest(filename, lineno, "NFC(c3) = c2", 'C', fields[2], fields[1]);
|
||||
subtest(filename, lineno, "NFC(c4) = c4", 'C', fields[3], fields[3]);
|
||||
subtest(filename, lineno, "NFC(c5) = c4", 'C', fields[4], fields[3]);
|
||||
subtest(filename, lineno, "NFD(c1) = c3", 'D', fields[0], fields[2]);
|
||||
subtest(filename, lineno, "NFD(c2) = c3", 'D', fields[1], fields[2]);
|
||||
subtest(filename, lineno, "NFD(c3) = c3", 'D', fields[2], fields[2]);
|
||||
subtest(filename, lineno, "NFD(c4) = c5", 'D', fields[3], fields[4]);
|
||||
subtest(filename, lineno, "NFD(c5) = c5", 'D', fields[4], fields[4]);
|
||||
|
||||
for (size_t i = 0; i < lenof(fields); i++)
|
||||
ucharbuf_free(fields[i]);
|
||||
|
||||
sfree(line);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "test_unicode_norm: give an input file "
|
||||
"of tests or '-'\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *filename = argv[1];
|
||||
|
||||
if (!strcmp(filename, "-")) {
|
||||
run_tests("<standard input>", stdin);
|
||||
} else {
|
||||
FILE *fp = fopen(filename, "r");
|
||||
if (!fp) {
|
||||
fprintf(stderr, "test_unicode_norm: unable to open '%s'\n",
|
||||
filename);
|
||||
return 1;
|
||||
}
|
||||
run_tests(filename, fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
printf("pass %d fail %d total %d\n", pass, fail, pass + fail);
|
||||
|
||||
return fail != 0;
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user