1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-07-01 03:22:48 -05:00

Implement Unicode normalisation.

A new module in 'utils' computes NFC and NFD, via a new set of data
tables generated by read_ucd.py.

The new module comes with a new test program, which can read the
NormalizationTest.txt that appears in the Unicode Character Database.
All the tests pass, as of Unicode 15.
This commit is contained in:
Simon Tatham
2022-11-09 19:28:51 +00:00
parent 4cb429e3f4
commit b35d23f699
8 changed files with 3984 additions and 9 deletions

950
unicode/canonical_comp.h Normal file
View File

@ -0,0 +1,950 @@
/*
* Autogenerated by read_ucd.py from The Unicode Standard 15.0.0
*
* List the pairs of Unicode characters that canonically recompose to a
* single character in NFC.
*
* Used by utils/unicode-norm.c.
*/
{0x003c, 0x0338, 0x226e},
{0x003d, 0x0338, 0x2260},
{0x003e, 0x0338, 0x226f},
{0x0041, 0x0300, 0x00c0},
{0x0041, 0x0301, 0x00c1},
{0x0041, 0x0302, 0x00c2},
{0x0041, 0x0303, 0x00c3},
{0x0041, 0x0304, 0x0100},
{0x0041, 0x0306, 0x0102},
{0x0041, 0x0307, 0x0226},
{0x0041, 0x0308, 0x00c4},
{0x0041, 0x0309, 0x1ea2},
{0x0041, 0x030a, 0x00c5},
{0x0041, 0x030c, 0x01cd},
{0x0041, 0x030f, 0x0200},
{0x0041, 0x0311, 0x0202},
{0x0041, 0x0323, 0x1ea0},
{0x0041, 0x0325, 0x1e00},
{0x0041, 0x0328, 0x0104},
{0x0042, 0x0307, 0x1e02},
{0x0042, 0x0323, 0x1e04},
{0x0042, 0x0331, 0x1e06},
{0x0043, 0x0301, 0x0106},
{0x0043, 0x0302, 0x0108},
{0x0043, 0x0307, 0x010a},
{0x0043, 0x030c, 0x010c},
{0x0043, 0x0327, 0x00c7},
{0x0044, 0x0307, 0x1e0a},
{0x0044, 0x030c, 0x010e},
{0x0044, 0x0323, 0x1e0c},
{0x0044, 0x0327, 0x1e10},
{0x0044, 0x032d, 0x1e12},
{0x0044, 0x0331, 0x1e0e},
{0x0045, 0x0300, 0x00c8},
{0x0045, 0x0301, 0x00c9},
{0x0045, 0x0302, 0x00ca},
{0x0045, 0x0303, 0x1ebc},
{0x0045, 0x0304, 0x0112},
{0x0045, 0x0306, 0x0114},
{0x0045, 0x0307, 0x0116},
{0x0045, 0x0308, 0x00cb},
{0x0045, 0x0309, 0x1eba},
{0x0045, 0x030c, 0x011a},
{0x0045, 0x030f, 0x0204},
{0x0045, 0x0311, 0x0206},
{0x0045, 0x0323, 0x1eb8},
{0x0045, 0x0327, 0x0228},
{0x0045, 0x0328, 0x0118},
{0x0045, 0x032d, 0x1e18},
{0x0045, 0x0330, 0x1e1a},
{0x0046, 0x0307, 0x1e1e},
{0x0047, 0x0301, 0x01f4},
{0x0047, 0x0302, 0x011c},
{0x0047, 0x0304, 0x1e20},
{0x0047, 0x0306, 0x011e},
{0x0047, 0x0307, 0x0120},
{0x0047, 0x030c, 0x01e6},
{0x0047, 0x0327, 0x0122},
{0x0048, 0x0302, 0x0124},
{0x0048, 0x0307, 0x1e22},
{0x0048, 0x0308, 0x1e26},
{0x0048, 0x030c, 0x021e},
{0x0048, 0x0323, 0x1e24},
{0x0048, 0x0327, 0x1e28},
{0x0048, 0x032e, 0x1e2a},
{0x0049, 0x0300, 0x00cc},
{0x0049, 0x0301, 0x00cd},
{0x0049, 0x0302, 0x00ce},
{0x0049, 0x0303, 0x0128},
{0x0049, 0x0304, 0x012a},
{0x0049, 0x0306, 0x012c},
{0x0049, 0x0307, 0x0130},
{0x0049, 0x0308, 0x00cf},
{0x0049, 0x0309, 0x1ec8},
{0x0049, 0x030c, 0x01cf},
{0x0049, 0x030f, 0x0208},
{0x0049, 0x0311, 0x020a},
{0x0049, 0x0323, 0x1eca},
{0x0049, 0x0328, 0x012e},
{0x0049, 0x0330, 0x1e2c},
{0x004a, 0x0302, 0x0134},
{0x004b, 0x0301, 0x1e30},
{0x004b, 0x030c, 0x01e8},
{0x004b, 0x0323, 0x1e32},
{0x004b, 0x0327, 0x0136},
{0x004b, 0x0331, 0x1e34},
{0x004c, 0x0301, 0x0139},
{0x004c, 0x030c, 0x013d},
{0x004c, 0x0323, 0x1e36},
{0x004c, 0x0327, 0x013b},
{0x004c, 0x032d, 0x1e3c},
{0x004c, 0x0331, 0x1e3a},
{0x004d, 0x0301, 0x1e3e},
{0x004d, 0x0307, 0x1e40},
{0x004d, 0x0323, 0x1e42},
{0x004e, 0x0300, 0x01f8},
{0x004e, 0x0301, 0x0143},
{0x004e, 0x0303, 0x00d1},
{0x004e, 0x0307, 0x1e44},
{0x004e, 0x030c, 0x0147},
{0x004e, 0x0323, 0x1e46},
{0x004e, 0x0327, 0x0145},
{0x004e, 0x032d, 0x1e4a},
{0x004e, 0x0331, 0x1e48},
{0x004f, 0x0300, 0x00d2},
{0x004f, 0x0301, 0x00d3},
{0x004f, 0x0302, 0x00d4},
{0x004f, 0x0303, 0x00d5},
{0x004f, 0x0304, 0x014c},
{0x004f, 0x0306, 0x014e},
{0x004f, 0x0307, 0x022e},
{0x004f, 0x0308, 0x00d6},
{0x004f, 0x0309, 0x1ece},
{0x004f, 0x030b, 0x0150},
{0x004f, 0x030c, 0x01d1},
{0x004f, 0x030f, 0x020c},
{0x004f, 0x0311, 0x020e},
{0x004f, 0x031b, 0x01a0},
{0x004f, 0x0323, 0x1ecc},
{0x004f, 0x0328, 0x01ea},
{0x0050, 0x0301, 0x1e54},
{0x0050, 0x0307, 0x1e56},
{0x0052, 0x0301, 0x0154},
{0x0052, 0x0307, 0x1e58},
{0x0052, 0x030c, 0x0158},
{0x0052, 0x030f, 0x0210},
{0x0052, 0x0311, 0x0212},
{0x0052, 0x0323, 0x1e5a},
{0x0052, 0x0327, 0x0156},
{0x0052, 0x0331, 0x1e5e},
{0x0053, 0x0301, 0x015a},
{0x0053, 0x0302, 0x015c},
{0x0053, 0x0307, 0x1e60},
{0x0053, 0x030c, 0x0160},
{0x0053, 0x0323, 0x1e62},
{0x0053, 0x0326, 0x0218},
{0x0053, 0x0327, 0x015e},
{0x0054, 0x0307, 0x1e6a},
{0x0054, 0x030c, 0x0164},
{0x0054, 0x0323, 0x1e6c},
{0x0054, 0x0326, 0x021a},
{0x0054, 0x0327, 0x0162},
{0x0054, 0x032d, 0x1e70},
{0x0054, 0x0331, 0x1e6e},
{0x0055, 0x0300, 0x00d9},
{0x0055, 0x0301, 0x00da},
{0x0055, 0x0302, 0x00db},
{0x0055, 0x0303, 0x0168},
{0x0055, 0x0304, 0x016a},
{0x0055, 0x0306, 0x016c},
{0x0055, 0x0308, 0x00dc},
{0x0055, 0x0309, 0x1ee6},
{0x0055, 0x030a, 0x016e},
{0x0055, 0x030b, 0x0170},
{0x0055, 0x030c, 0x01d3},
{0x0055, 0x030f, 0x0214},
{0x0055, 0x0311, 0x0216},
{0x0055, 0x031b, 0x01af},
{0x0055, 0x0323, 0x1ee4},
{0x0055, 0x0324, 0x1e72},
{0x0055, 0x0328, 0x0172},
{0x0055, 0x032d, 0x1e76},
{0x0055, 0x0330, 0x1e74},
{0x0056, 0x0303, 0x1e7c},
{0x0056, 0x0323, 0x1e7e},
{0x0057, 0x0300, 0x1e80},
{0x0057, 0x0301, 0x1e82},
{0x0057, 0x0302, 0x0174},
{0x0057, 0x0307, 0x1e86},
{0x0057, 0x0308, 0x1e84},
{0x0057, 0x0323, 0x1e88},
{0x0058, 0x0307, 0x1e8a},
{0x0058, 0x0308, 0x1e8c},
{0x0059, 0x0300, 0x1ef2},
{0x0059, 0x0301, 0x00dd},
{0x0059, 0x0302, 0x0176},
{0x0059, 0x0303, 0x1ef8},
{0x0059, 0x0304, 0x0232},
{0x0059, 0x0307, 0x1e8e},
{0x0059, 0x0308, 0x0178},
{0x0059, 0x0309, 0x1ef6},
{0x0059, 0x0323, 0x1ef4},
{0x005a, 0x0301, 0x0179},
{0x005a, 0x0302, 0x1e90},
{0x005a, 0x0307, 0x017b},
{0x005a, 0x030c, 0x017d},
{0x005a, 0x0323, 0x1e92},
{0x005a, 0x0331, 0x1e94},
{0x0061, 0x0300, 0x00e0},
{0x0061, 0x0301, 0x00e1},
{0x0061, 0x0302, 0x00e2},
{0x0061, 0x0303, 0x00e3},
{0x0061, 0x0304, 0x0101},
{0x0061, 0x0306, 0x0103},
{0x0061, 0x0307, 0x0227},
{0x0061, 0x0308, 0x00e4},
{0x0061, 0x0309, 0x1ea3},
{0x0061, 0x030a, 0x00e5},
{0x0061, 0x030c, 0x01ce},
{0x0061, 0x030f, 0x0201},
{0x0061, 0x0311, 0x0203},
{0x0061, 0x0323, 0x1ea1},
{0x0061, 0x0325, 0x1e01},
{0x0061, 0x0328, 0x0105},
{0x0062, 0x0307, 0x1e03},
{0x0062, 0x0323, 0x1e05},
{0x0062, 0x0331, 0x1e07},
{0x0063, 0x0301, 0x0107},
{0x0063, 0x0302, 0x0109},
{0x0063, 0x0307, 0x010b},
{0x0063, 0x030c, 0x010d},
{0x0063, 0x0327, 0x00e7},
{0x0064, 0x0307, 0x1e0b},
{0x0064, 0x030c, 0x010f},
{0x0064, 0x0323, 0x1e0d},
{0x0064, 0x0327, 0x1e11},
{0x0064, 0x032d, 0x1e13},
{0x0064, 0x0331, 0x1e0f},
{0x0065, 0x0300, 0x00e8},
{0x0065, 0x0301, 0x00e9},
{0x0065, 0x0302, 0x00ea},
{0x0065, 0x0303, 0x1ebd},
{0x0065, 0x0304, 0x0113},
{0x0065, 0x0306, 0x0115},
{0x0065, 0x0307, 0x0117},
{0x0065, 0x0308, 0x00eb},
{0x0065, 0x0309, 0x1ebb},
{0x0065, 0x030c, 0x011b},
{0x0065, 0x030f, 0x0205},
{0x0065, 0x0311, 0x0207},
{0x0065, 0x0323, 0x1eb9},
{0x0065, 0x0327, 0x0229},
{0x0065, 0x0328, 0x0119},
{0x0065, 0x032d, 0x1e19},
{0x0065, 0x0330, 0x1e1b},
{0x0066, 0x0307, 0x1e1f},
{0x0067, 0x0301, 0x01f5},
{0x0067, 0x0302, 0x011d},
{0x0067, 0x0304, 0x1e21},
{0x0067, 0x0306, 0x011f},
{0x0067, 0x0307, 0x0121},
{0x0067, 0x030c, 0x01e7},
{0x0067, 0x0327, 0x0123},
{0x0068, 0x0302, 0x0125},
{0x0068, 0x0307, 0x1e23},
{0x0068, 0x0308, 0x1e27},
{0x0068, 0x030c, 0x021f},
{0x0068, 0x0323, 0x1e25},
{0x0068, 0x0327, 0x1e29},
{0x0068, 0x032e, 0x1e2b},
{0x0068, 0x0331, 0x1e96},
{0x0069, 0x0300, 0x00ec},
{0x0069, 0x0301, 0x00ed},
{0x0069, 0x0302, 0x00ee},
{0x0069, 0x0303, 0x0129},
{0x0069, 0x0304, 0x012b},
{0x0069, 0x0306, 0x012d},
{0x0069, 0x0308, 0x00ef},
{0x0069, 0x0309, 0x1ec9},
{0x0069, 0x030c, 0x01d0},
{0x0069, 0x030f, 0x0209},
{0x0069, 0x0311, 0x020b},
{0x0069, 0x0323, 0x1ecb},
{0x0069, 0x0328, 0x012f},
{0x0069, 0x0330, 0x1e2d},
{0x006a, 0x0302, 0x0135},
{0x006a, 0x030c, 0x01f0},
{0x006b, 0x0301, 0x1e31},
{0x006b, 0x030c, 0x01e9},
{0x006b, 0x0323, 0x1e33},
{0x006b, 0x0327, 0x0137},
{0x006b, 0x0331, 0x1e35},
{0x006c, 0x0301, 0x013a},
{0x006c, 0x030c, 0x013e},
{0x006c, 0x0323, 0x1e37},
{0x006c, 0x0327, 0x013c},
{0x006c, 0x032d, 0x1e3d},
{0x006c, 0x0331, 0x1e3b},
{0x006d, 0x0301, 0x1e3f},
{0x006d, 0x0307, 0x1e41},
{0x006d, 0x0323, 0x1e43},
{0x006e, 0x0300, 0x01f9},
{0x006e, 0x0301, 0x0144},
{0x006e, 0x0303, 0x00f1},
{0x006e, 0x0307, 0x1e45},
{0x006e, 0x030c, 0x0148},
{0x006e, 0x0323, 0x1e47},
{0x006e, 0x0327, 0x0146},
{0x006e, 0x032d, 0x1e4b},
{0x006e, 0x0331, 0x1e49},
{0x006f, 0x0300, 0x00f2},
{0x006f, 0x0301, 0x00f3},
{0x006f, 0x0302, 0x00f4},
{0x006f, 0x0303, 0x00f5},
{0x006f, 0x0304, 0x014d},
{0x006f, 0x0306, 0x014f},
{0x006f, 0x0307, 0x022f},
{0x006f, 0x0308, 0x00f6},
{0x006f, 0x0309, 0x1ecf},
{0x006f, 0x030b, 0x0151},
{0x006f, 0x030c, 0x01d2},
{0x006f, 0x030f, 0x020d},
{0x006f, 0x0311, 0x020f},
{0x006f, 0x031b, 0x01a1},
{0x006f, 0x0323, 0x1ecd},
{0x006f, 0x0328, 0x01eb},
{0x0070, 0x0301, 0x1e55},
{0x0070, 0x0307, 0x1e57},
{0x0072, 0x0301, 0x0155},
{0x0072, 0x0307, 0x1e59},
{0x0072, 0x030c, 0x0159},
{0x0072, 0x030f, 0x0211},
{0x0072, 0x0311, 0x0213},
{0x0072, 0x0323, 0x1e5b},
{0x0072, 0x0327, 0x0157},
{0x0072, 0x0331, 0x1e5f},
{0x0073, 0x0301, 0x015b},
{0x0073, 0x0302, 0x015d},
{0x0073, 0x0307, 0x1e61},
{0x0073, 0x030c, 0x0161},
{0x0073, 0x0323, 0x1e63},
{0x0073, 0x0326, 0x0219},
{0x0073, 0x0327, 0x015f},
{0x0074, 0x0307, 0x1e6b},
{0x0074, 0x0308, 0x1e97},
{0x0074, 0x030c, 0x0165},
{0x0074, 0x0323, 0x1e6d},
{0x0074, 0x0326, 0x021b},
{0x0074, 0x0327, 0x0163},
{0x0074, 0x032d, 0x1e71},
{0x0074, 0x0331, 0x1e6f},
{0x0075, 0x0300, 0x00f9},
{0x0075, 0x0301, 0x00fa},
{0x0075, 0x0302, 0x00fb},
{0x0075, 0x0303, 0x0169},
{0x0075, 0x0304, 0x016b},
{0x0075, 0x0306, 0x016d},
{0x0075, 0x0308, 0x00fc},
{0x0075, 0x0309, 0x1ee7},
{0x0075, 0x030a, 0x016f},
{0x0075, 0x030b, 0x0171},
{0x0075, 0x030c, 0x01d4},
{0x0075, 0x030f, 0x0215},
{0x0075, 0x0311, 0x0217},
{0x0075, 0x031b, 0x01b0},
{0x0075, 0x0323, 0x1ee5},
{0x0075, 0x0324, 0x1e73},
{0x0075, 0x0328, 0x0173},
{0x0075, 0x032d, 0x1e77},
{0x0075, 0x0330, 0x1e75},
{0x0076, 0x0303, 0x1e7d},
{0x0076, 0x0323, 0x1e7f},
{0x0077, 0x0300, 0x1e81},
{0x0077, 0x0301, 0x1e83},
{0x0077, 0x0302, 0x0175},
{0x0077, 0x0307, 0x1e87},
{0x0077, 0x0308, 0x1e85},
{0x0077, 0x030a, 0x1e98},
{0x0077, 0x0323, 0x1e89},
{0x0078, 0x0307, 0x1e8b},
{0x0078, 0x0308, 0x1e8d},
{0x0079, 0x0300, 0x1ef3},
{0x0079, 0x0301, 0x00fd},
{0x0079, 0x0302, 0x0177},
{0x0079, 0x0303, 0x1ef9},
{0x0079, 0x0304, 0x0233},
{0x0079, 0x0307, 0x1e8f},
{0x0079, 0x0308, 0x00ff},
{0x0079, 0x0309, 0x1ef7},
{0x0079, 0x030a, 0x1e99},
{0x0079, 0x0323, 0x1ef5},
{0x007a, 0x0301, 0x017a},
{0x007a, 0x0302, 0x1e91},
{0x007a, 0x0307, 0x017c},
{0x007a, 0x030c, 0x017e},
{0x007a, 0x0323, 0x1e93},
{0x007a, 0x0331, 0x1e95},
{0x00a8, 0x0300, 0x1fed},
{0x00a8, 0x0301, 0x0385},
{0x00a8, 0x0342, 0x1fc1},
{0x00c2, 0x0300, 0x1ea6},
{0x00c2, 0x0301, 0x1ea4},
{0x00c2, 0x0303, 0x1eaa},
{0x00c2, 0x0309, 0x1ea8},
{0x00c4, 0x0304, 0x01de},
{0x00c5, 0x0301, 0x01fa},
{0x00c6, 0x0301, 0x01fc},
{0x00c6, 0x0304, 0x01e2},
{0x00c7, 0x0301, 0x1e08},
{0x00ca, 0x0300, 0x1ec0},
{0x00ca, 0x0301, 0x1ebe},
{0x00ca, 0x0303, 0x1ec4},
{0x00ca, 0x0309, 0x1ec2},
{0x00cf, 0x0301, 0x1e2e},
{0x00d4, 0x0300, 0x1ed2},
{0x00d4, 0x0301, 0x1ed0},
{0x00d4, 0x0303, 0x1ed6},
{0x00d4, 0x0309, 0x1ed4},
{0x00d5, 0x0301, 0x1e4c},
{0x00d5, 0x0304, 0x022c},
{0x00d5, 0x0308, 0x1e4e},
{0x00d6, 0x0304, 0x022a},
{0x00d8, 0x0301, 0x01fe},
{0x00dc, 0x0300, 0x01db},
{0x00dc, 0x0301, 0x01d7},
{0x00dc, 0x0304, 0x01d5},
{0x00dc, 0x030c, 0x01d9},
{0x00e2, 0x0300, 0x1ea7},
{0x00e2, 0x0301, 0x1ea5},
{0x00e2, 0x0303, 0x1eab},
{0x00e2, 0x0309, 0x1ea9},
{0x00e4, 0x0304, 0x01df},
{0x00e5, 0x0301, 0x01fb},
{0x00e6, 0x0301, 0x01fd},
{0x00e6, 0x0304, 0x01e3},
{0x00e7, 0x0301, 0x1e09},
{0x00ea, 0x0300, 0x1ec1},
{0x00ea, 0x0301, 0x1ebf},
{0x00ea, 0x0303, 0x1ec5},
{0x00ea, 0x0309, 0x1ec3},
{0x00ef, 0x0301, 0x1e2f},
{0x00f4, 0x0300, 0x1ed3},
{0x00f4, 0x0301, 0x1ed1},
{0x00f4, 0x0303, 0x1ed7},
{0x00f4, 0x0309, 0x1ed5},
{0x00f5, 0x0301, 0x1e4d},
{0x00f5, 0x0304, 0x022d},
{0x00f5, 0x0308, 0x1e4f},
{0x00f6, 0x0304, 0x022b},
{0x00f8, 0x0301, 0x01ff},
{0x00fc, 0x0300, 0x01dc},
{0x00fc, 0x0301, 0x01d8},
{0x00fc, 0x0304, 0x01d6},
{0x00fc, 0x030c, 0x01da},
{0x0102, 0x0300, 0x1eb0},
{0x0102, 0x0301, 0x1eae},
{0x0102, 0x0303, 0x1eb4},
{0x0102, 0x0309, 0x1eb2},
{0x0103, 0x0300, 0x1eb1},
{0x0103, 0x0301, 0x1eaf},
{0x0103, 0x0303, 0x1eb5},
{0x0103, 0x0309, 0x1eb3},
{0x0112, 0x0300, 0x1e14},
{0x0112, 0x0301, 0x1e16},
{0x0113, 0x0300, 0x1e15},
{0x0113, 0x0301, 0x1e17},
{0x014c, 0x0300, 0x1e50},
{0x014c, 0x0301, 0x1e52},
{0x014d, 0x0300, 0x1e51},
{0x014d, 0x0301, 0x1e53},
{0x015a, 0x0307, 0x1e64},
{0x015b, 0x0307, 0x1e65},
{0x0160, 0x0307, 0x1e66},
{0x0161, 0x0307, 0x1e67},
{0x0168, 0x0301, 0x1e78},
{0x0169, 0x0301, 0x1e79},
{0x016a, 0x0308, 0x1e7a},
{0x016b, 0x0308, 0x1e7b},
{0x017f, 0x0307, 0x1e9b},
{0x01a0, 0x0300, 0x1edc},
{0x01a0, 0x0301, 0x1eda},
{0x01a0, 0x0303, 0x1ee0},
{0x01a0, 0x0309, 0x1ede},
{0x01a0, 0x0323, 0x1ee2},
{0x01a1, 0x0300, 0x1edd},
{0x01a1, 0x0301, 0x1edb},
{0x01a1, 0x0303, 0x1ee1},
{0x01a1, 0x0309, 0x1edf},
{0x01a1, 0x0323, 0x1ee3},
{0x01af, 0x0300, 0x1eea},
{0x01af, 0x0301, 0x1ee8},
{0x01af, 0x0303, 0x1eee},
{0x01af, 0x0309, 0x1eec},
{0x01af, 0x0323, 0x1ef0},
{0x01b0, 0x0300, 0x1eeb},
{0x01b0, 0x0301, 0x1ee9},
{0x01b0, 0x0303, 0x1eef},
{0x01b0, 0x0309, 0x1eed},
{0x01b0, 0x0323, 0x1ef1},
{0x01b7, 0x030c, 0x01ee},
{0x01ea, 0x0304, 0x01ec},
{0x01eb, 0x0304, 0x01ed},
{0x0226, 0x0304, 0x01e0},
{0x0227, 0x0304, 0x01e1},
{0x0228, 0x0306, 0x1e1c},
{0x0229, 0x0306, 0x1e1d},
{0x022e, 0x0304, 0x0230},
{0x022f, 0x0304, 0x0231},
{0x0292, 0x030c, 0x01ef},
{0x0391, 0x0300, 0x1fba},
{0x0391, 0x0301, 0x0386},
{0x0391, 0x0304, 0x1fb9},
{0x0391, 0x0306, 0x1fb8},
{0x0391, 0x0313, 0x1f08},
{0x0391, 0x0314, 0x1f09},
{0x0391, 0x0345, 0x1fbc},
{0x0395, 0x0300, 0x1fc8},
{0x0395, 0x0301, 0x0388},
{0x0395, 0x0313, 0x1f18},
{0x0395, 0x0314, 0x1f19},
{0x0397, 0x0300, 0x1fca},
{0x0397, 0x0301, 0x0389},
{0x0397, 0x0313, 0x1f28},
{0x0397, 0x0314, 0x1f29},
{0x0397, 0x0345, 0x1fcc},
{0x0399, 0x0300, 0x1fda},
{0x0399, 0x0301, 0x038a},
{0x0399, 0x0304, 0x1fd9},
{0x0399, 0x0306, 0x1fd8},
{0x0399, 0x0308, 0x03aa},
{0x0399, 0x0313, 0x1f38},
{0x0399, 0x0314, 0x1f39},
{0x039f, 0x0300, 0x1ff8},
{0x039f, 0x0301, 0x038c},
{0x039f, 0x0313, 0x1f48},
{0x039f, 0x0314, 0x1f49},
{0x03a1, 0x0314, 0x1fec},
{0x03a5, 0x0300, 0x1fea},
{0x03a5, 0x0301, 0x038e},
{0x03a5, 0x0304, 0x1fe9},
{0x03a5, 0x0306, 0x1fe8},
{0x03a5, 0x0308, 0x03ab},
{0x03a5, 0x0314, 0x1f59},
{0x03a9, 0x0300, 0x1ffa},
{0x03a9, 0x0301, 0x038f},
{0x03a9, 0x0313, 0x1f68},
{0x03a9, 0x0314, 0x1f69},
{0x03a9, 0x0345, 0x1ffc},
{0x03ac, 0x0345, 0x1fb4},
{0x03ae, 0x0345, 0x1fc4},
{0x03b1, 0x0300, 0x1f70},
{0x03b1, 0x0301, 0x03ac},
{0x03b1, 0x0304, 0x1fb1},
{0x03b1, 0x0306, 0x1fb0},
{0x03b1, 0x0313, 0x1f00},
{0x03b1, 0x0314, 0x1f01},
{0x03b1, 0x0342, 0x1fb6},
{0x03b1, 0x0345, 0x1fb3},
{0x03b5, 0x0300, 0x1f72},
{0x03b5, 0x0301, 0x03ad},
{0x03b5, 0x0313, 0x1f10},
{0x03b5, 0x0314, 0x1f11},
{0x03b7, 0x0300, 0x1f74},
{0x03b7, 0x0301, 0x03ae},
{0x03b7, 0x0313, 0x1f20},
{0x03b7, 0x0314, 0x1f21},
{0x03b7, 0x0342, 0x1fc6},
{0x03b7, 0x0345, 0x1fc3},
{0x03b9, 0x0300, 0x1f76},
{0x03b9, 0x0301, 0x03af},
{0x03b9, 0x0304, 0x1fd1},
{0x03b9, 0x0306, 0x1fd0},
{0x03b9, 0x0308, 0x03ca},
{0x03b9, 0x0313, 0x1f30},
{0x03b9, 0x0314, 0x1f31},
{0x03b9, 0x0342, 0x1fd6},
{0x03bf, 0x0300, 0x1f78},
{0x03bf, 0x0301, 0x03cc},
{0x03bf, 0x0313, 0x1f40},
{0x03bf, 0x0314, 0x1f41},
{0x03c1, 0x0313, 0x1fe4},
{0x03c1, 0x0314, 0x1fe5},
{0x03c5, 0x0300, 0x1f7a},
{0x03c5, 0x0301, 0x03cd},
{0x03c5, 0x0304, 0x1fe1},
{0x03c5, 0x0306, 0x1fe0},
{0x03c5, 0x0308, 0x03cb},
{0x03c5, 0x0313, 0x1f50},
{0x03c5, 0x0314, 0x1f51},
{0x03c5, 0x0342, 0x1fe6},
{0x03c9, 0x0300, 0x1f7c},
{0x03c9, 0x0301, 0x03ce},
{0x03c9, 0x0313, 0x1f60},
{0x03c9, 0x0314, 0x1f61},
{0x03c9, 0x0342, 0x1ff6},
{0x03c9, 0x0345, 0x1ff3},
{0x03ca, 0x0300, 0x1fd2},
{0x03ca, 0x0301, 0x0390},
{0x03ca, 0x0342, 0x1fd7},
{0x03cb, 0x0300, 0x1fe2},
{0x03cb, 0x0301, 0x03b0},
{0x03cb, 0x0342, 0x1fe7},
{0x03ce, 0x0345, 0x1ff4},
{0x03d2, 0x0301, 0x03d3},
{0x03d2, 0x0308, 0x03d4},
{0x0406, 0x0308, 0x0407},
{0x0410, 0x0306, 0x04d0},
{0x0410, 0x0308, 0x04d2},
{0x0413, 0x0301, 0x0403},
{0x0415, 0x0300, 0x0400},
{0x0415, 0x0306, 0x04d6},
{0x0415, 0x0308, 0x0401},
{0x0416, 0x0306, 0x04c1},
{0x0416, 0x0308, 0x04dc},
{0x0417, 0x0308, 0x04de},
{0x0418, 0x0300, 0x040d},
{0x0418, 0x0304, 0x04e2},
{0x0418, 0x0306, 0x0419},
{0x0418, 0x0308, 0x04e4},
{0x041a, 0x0301, 0x040c},
{0x041e, 0x0308, 0x04e6},
{0x0423, 0x0304, 0x04ee},
{0x0423, 0x0306, 0x040e},
{0x0423, 0x0308, 0x04f0},
{0x0423, 0x030b, 0x04f2},
{0x0427, 0x0308, 0x04f4},
{0x042b, 0x0308, 0x04f8},
{0x042d, 0x0308, 0x04ec},
{0x0430, 0x0306, 0x04d1},
{0x0430, 0x0308, 0x04d3},
{0x0433, 0x0301, 0x0453},
{0x0435, 0x0300, 0x0450},
{0x0435, 0x0306, 0x04d7},
{0x0435, 0x0308, 0x0451},
{0x0436, 0x0306, 0x04c2},
{0x0436, 0x0308, 0x04dd},
{0x0437, 0x0308, 0x04df},
{0x0438, 0x0300, 0x045d},
{0x0438, 0x0304, 0x04e3},
{0x0438, 0x0306, 0x0439},
{0x0438, 0x0308, 0x04e5},
{0x043a, 0x0301, 0x045c},
{0x043e, 0x0308, 0x04e7},
{0x0443, 0x0304, 0x04ef},
{0x0443, 0x0306, 0x045e},
{0x0443, 0x0308, 0x04f1},
{0x0443, 0x030b, 0x04f3},
{0x0447, 0x0308, 0x04f5},
{0x044b, 0x0308, 0x04f9},
{0x044d, 0x0308, 0x04ed},
{0x0456, 0x0308, 0x0457},
{0x0474, 0x030f, 0x0476},
{0x0475, 0x030f, 0x0477},
{0x04d8, 0x0308, 0x04da},
{0x04d9, 0x0308, 0x04db},
{0x04e8, 0x0308, 0x04ea},
{0x04e9, 0x0308, 0x04eb},
{0x0627, 0x0653, 0x0622},
{0x0627, 0x0654, 0x0623},
{0x0627, 0x0655, 0x0625},
{0x0648, 0x0654, 0x0624},
{0x064a, 0x0654, 0x0626},
{0x06c1, 0x0654, 0x06c2},
{0x06d2, 0x0654, 0x06d3},
{0x06d5, 0x0654, 0x06c0},
{0x0928, 0x093c, 0x0929},
{0x0930, 0x093c, 0x0931},
{0x0933, 0x093c, 0x0934},
{0x09c7, 0x09be, 0x09cb},
{0x09c7, 0x09d7, 0x09cc},
{0x0b47, 0x0b3e, 0x0b4b},
{0x0b47, 0x0b56, 0x0b48},
{0x0b47, 0x0b57, 0x0b4c},
{0x0b92, 0x0bd7, 0x0b94},
{0x0bc6, 0x0bbe, 0x0bca},
{0x0bc6, 0x0bd7, 0x0bcc},
{0x0bc7, 0x0bbe, 0x0bcb},
{0x0c46, 0x0c56, 0x0c48},
{0x0cbf, 0x0cd5, 0x0cc0},
{0x0cc6, 0x0cc2, 0x0cca},
{0x0cc6, 0x0cd5, 0x0cc7},
{0x0cc6, 0x0cd6, 0x0cc8},
{0x0cca, 0x0cd5, 0x0ccb},
{0x0d46, 0x0d3e, 0x0d4a},
{0x0d46, 0x0d57, 0x0d4c},
{0x0d47, 0x0d3e, 0x0d4b},
{0x0dd9, 0x0dca, 0x0dda},
{0x0dd9, 0x0dcf, 0x0ddc},
{0x0dd9, 0x0ddf, 0x0dde},
{0x0ddc, 0x0dca, 0x0ddd},
{0x1025, 0x102e, 0x1026},
{0x1b05, 0x1b35, 0x1b06},
{0x1b07, 0x1b35, 0x1b08},
{0x1b09, 0x1b35, 0x1b0a},
{0x1b0b, 0x1b35, 0x1b0c},
{0x1b0d, 0x1b35, 0x1b0e},
{0x1b11, 0x1b35, 0x1b12},
{0x1b3a, 0x1b35, 0x1b3b},
{0x1b3c, 0x1b35, 0x1b3d},
{0x1b3e, 0x1b35, 0x1b40},
{0x1b3f, 0x1b35, 0x1b41},
{0x1b42, 0x1b35, 0x1b43},
{0x1e36, 0x0304, 0x1e38},
{0x1e37, 0x0304, 0x1e39},
{0x1e5a, 0x0304, 0x1e5c},
{0x1e5b, 0x0304, 0x1e5d},
{0x1e62, 0x0307, 0x1e68},
{0x1e63, 0x0307, 0x1e69},
{0x1ea0, 0x0302, 0x1eac},
{0x1ea0, 0x0306, 0x1eb6},
{0x1ea1, 0x0302, 0x1ead},
{0x1ea1, 0x0306, 0x1eb7},
{0x1eb8, 0x0302, 0x1ec6},
{0x1eb9, 0x0302, 0x1ec7},
{0x1ecc, 0x0302, 0x1ed8},
{0x1ecd, 0x0302, 0x1ed9},
{0x1f00, 0x0300, 0x1f02},
{0x1f00, 0x0301, 0x1f04},
{0x1f00, 0x0342, 0x1f06},
{0x1f00, 0x0345, 0x1f80},
{0x1f01, 0x0300, 0x1f03},
{0x1f01, 0x0301, 0x1f05},
{0x1f01, 0x0342, 0x1f07},
{0x1f01, 0x0345, 0x1f81},
{0x1f02, 0x0345, 0x1f82},
{0x1f03, 0x0345, 0x1f83},
{0x1f04, 0x0345, 0x1f84},
{0x1f05, 0x0345, 0x1f85},
{0x1f06, 0x0345, 0x1f86},
{0x1f07, 0x0345, 0x1f87},
{0x1f08, 0x0300, 0x1f0a},
{0x1f08, 0x0301, 0x1f0c},
{0x1f08, 0x0342, 0x1f0e},
{0x1f08, 0x0345, 0x1f88},
{0x1f09, 0x0300, 0x1f0b},
{0x1f09, 0x0301, 0x1f0d},
{0x1f09, 0x0342, 0x1f0f},
{0x1f09, 0x0345, 0x1f89},
{0x1f0a, 0x0345, 0x1f8a},
{0x1f0b, 0x0345, 0x1f8b},
{0x1f0c, 0x0345, 0x1f8c},
{0x1f0d, 0x0345, 0x1f8d},
{0x1f0e, 0x0345, 0x1f8e},
{0x1f0f, 0x0345, 0x1f8f},
{0x1f10, 0x0300, 0x1f12},
{0x1f10, 0x0301, 0x1f14},
{0x1f11, 0x0300, 0x1f13},
{0x1f11, 0x0301, 0x1f15},
{0x1f18, 0x0300, 0x1f1a},
{0x1f18, 0x0301, 0x1f1c},
{0x1f19, 0x0300, 0x1f1b},
{0x1f19, 0x0301, 0x1f1d},
{0x1f20, 0x0300, 0x1f22},
{0x1f20, 0x0301, 0x1f24},
{0x1f20, 0x0342, 0x1f26},
{0x1f20, 0x0345, 0x1f90},
{0x1f21, 0x0300, 0x1f23},
{0x1f21, 0x0301, 0x1f25},
{0x1f21, 0x0342, 0x1f27},
{0x1f21, 0x0345, 0x1f91},
{0x1f22, 0x0345, 0x1f92},
{0x1f23, 0x0345, 0x1f93},
{0x1f24, 0x0345, 0x1f94},
{0x1f25, 0x0345, 0x1f95},
{0x1f26, 0x0345, 0x1f96},
{0x1f27, 0x0345, 0x1f97},
{0x1f28, 0x0300, 0x1f2a},
{0x1f28, 0x0301, 0x1f2c},
{0x1f28, 0x0342, 0x1f2e},
{0x1f28, 0x0345, 0x1f98},
{0x1f29, 0x0300, 0x1f2b},
{0x1f29, 0x0301, 0x1f2d},
{0x1f29, 0x0342, 0x1f2f},
{0x1f29, 0x0345, 0x1f99},
{0x1f2a, 0x0345, 0x1f9a},
{0x1f2b, 0x0345, 0x1f9b},
{0x1f2c, 0x0345, 0x1f9c},
{0x1f2d, 0x0345, 0x1f9d},
{0x1f2e, 0x0345, 0x1f9e},
{0x1f2f, 0x0345, 0x1f9f},
{0x1f30, 0x0300, 0x1f32},
{0x1f30, 0x0301, 0x1f34},
{0x1f30, 0x0342, 0x1f36},
{0x1f31, 0x0300, 0x1f33},
{0x1f31, 0x0301, 0x1f35},
{0x1f31, 0x0342, 0x1f37},
{0x1f38, 0x0300, 0x1f3a},
{0x1f38, 0x0301, 0x1f3c},
{0x1f38, 0x0342, 0x1f3e},
{0x1f39, 0x0300, 0x1f3b},
{0x1f39, 0x0301, 0x1f3d},
{0x1f39, 0x0342, 0x1f3f},
{0x1f40, 0x0300, 0x1f42},
{0x1f40, 0x0301, 0x1f44},
{0x1f41, 0x0300, 0x1f43},
{0x1f41, 0x0301, 0x1f45},
{0x1f48, 0x0300, 0x1f4a},
{0x1f48, 0x0301, 0x1f4c},
{0x1f49, 0x0300, 0x1f4b},
{0x1f49, 0x0301, 0x1f4d},
{0x1f50, 0x0300, 0x1f52},
{0x1f50, 0x0301, 0x1f54},
{0x1f50, 0x0342, 0x1f56},
{0x1f51, 0x0300, 0x1f53},
{0x1f51, 0x0301, 0x1f55},
{0x1f51, 0x0342, 0x1f57},
{0x1f59, 0x0300, 0x1f5b},
{0x1f59, 0x0301, 0x1f5d},
{0x1f59, 0x0342, 0x1f5f},
{0x1f60, 0x0300, 0x1f62},
{0x1f60, 0x0301, 0x1f64},
{0x1f60, 0x0342, 0x1f66},
{0x1f60, 0x0345, 0x1fa0},
{0x1f61, 0x0300, 0x1f63},
{0x1f61, 0x0301, 0x1f65},
{0x1f61, 0x0342, 0x1f67},
{0x1f61, 0x0345, 0x1fa1},
{0x1f62, 0x0345, 0x1fa2},
{0x1f63, 0x0345, 0x1fa3},
{0x1f64, 0x0345, 0x1fa4},
{0x1f65, 0x0345, 0x1fa5},
{0x1f66, 0x0345, 0x1fa6},
{0x1f67, 0x0345, 0x1fa7},
{0x1f68, 0x0300, 0x1f6a},
{0x1f68, 0x0301, 0x1f6c},
{0x1f68, 0x0342, 0x1f6e},
{0x1f68, 0x0345, 0x1fa8},
{0x1f69, 0x0300, 0x1f6b},
{0x1f69, 0x0301, 0x1f6d},
{0x1f69, 0x0342, 0x1f6f},
{0x1f69, 0x0345, 0x1fa9},
{0x1f6a, 0x0345, 0x1faa},
{0x1f6b, 0x0345, 0x1fab},
{0x1f6c, 0x0345, 0x1fac},
{0x1f6d, 0x0345, 0x1fad},
{0x1f6e, 0x0345, 0x1fae},
{0x1f6f, 0x0345, 0x1faf},
{0x1f70, 0x0345, 0x1fb2},
{0x1f74, 0x0345, 0x1fc2},
{0x1f7c, 0x0345, 0x1ff2},
{0x1fb6, 0x0345, 0x1fb7},
{0x1fbf, 0x0300, 0x1fcd},
{0x1fbf, 0x0301, 0x1fce},
{0x1fbf, 0x0342, 0x1fcf},
{0x1fc6, 0x0345, 0x1fc7},
{0x1ff6, 0x0345, 0x1ff7},
{0x1ffe, 0x0300, 0x1fdd},
{0x1ffe, 0x0301, 0x1fde},
{0x1ffe, 0x0342, 0x1fdf},
{0x2190, 0x0338, 0x219a},
{0x2192, 0x0338, 0x219b},
{0x2194, 0x0338, 0x21ae},
{0x21d0, 0x0338, 0x21cd},
{0x21d2, 0x0338, 0x21cf},
{0x21d4, 0x0338, 0x21ce},
{0x2203, 0x0338, 0x2204},
{0x2208, 0x0338, 0x2209},
{0x220b, 0x0338, 0x220c},
{0x2223, 0x0338, 0x2224},
{0x2225, 0x0338, 0x2226},
{0x223c, 0x0338, 0x2241},
{0x2243, 0x0338, 0x2244},
{0x2245, 0x0338, 0x2247},
{0x2248, 0x0338, 0x2249},
{0x224d, 0x0338, 0x226d},
{0x2261, 0x0338, 0x2262},
{0x2264, 0x0338, 0x2270},
{0x2265, 0x0338, 0x2271},
{0x2272, 0x0338, 0x2274},
{0x2273, 0x0338, 0x2275},
{0x2276, 0x0338, 0x2278},
{0x2277, 0x0338, 0x2279},
{0x227a, 0x0338, 0x2280},
{0x227b, 0x0338, 0x2281},
{0x227c, 0x0338, 0x22e0},
{0x227d, 0x0338, 0x22e1},
{0x2282, 0x0338, 0x2284},
{0x2283, 0x0338, 0x2285},
{0x2286, 0x0338, 0x2288},
{0x2287, 0x0338, 0x2289},
{0x2291, 0x0338, 0x22e2},
{0x2292, 0x0338, 0x22e3},
{0x22a2, 0x0338, 0x22ac},
{0x22a8, 0x0338, 0x22ad},
{0x22a9, 0x0338, 0x22ae},
{0x22ab, 0x0338, 0x22af},
{0x22b2, 0x0338, 0x22ea},
{0x22b3, 0x0338, 0x22eb},
{0x22b4, 0x0338, 0x22ec},
{0x22b5, 0x0338, 0x22ed},
{0x3046, 0x3099, 0x3094},
{0x304b, 0x3099, 0x304c},
{0x304d, 0x3099, 0x304e},
{0x304f, 0x3099, 0x3050},
{0x3051, 0x3099, 0x3052},
{0x3053, 0x3099, 0x3054},
{0x3055, 0x3099, 0x3056},
{0x3057, 0x3099, 0x3058},
{0x3059, 0x3099, 0x305a},
{0x305b, 0x3099, 0x305c},
{0x305d, 0x3099, 0x305e},
{0x305f, 0x3099, 0x3060},
{0x3061, 0x3099, 0x3062},
{0x3064, 0x3099, 0x3065},
{0x3066, 0x3099, 0x3067},
{0x3068, 0x3099, 0x3069},
{0x306f, 0x3099, 0x3070},
{0x306f, 0x309a, 0x3071},
{0x3072, 0x3099, 0x3073},
{0x3072, 0x309a, 0x3074},
{0x3075, 0x3099, 0x3076},
{0x3075, 0x309a, 0x3077},
{0x3078, 0x3099, 0x3079},
{0x3078, 0x309a, 0x307a},
{0x307b, 0x3099, 0x307c},
{0x307b, 0x309a, 0x307d},
{0x309d, 0x3099, 0x309e},
{0x30a6, 0x3099, 0x30f4},
{0x30ab, 0x3099, 0x30ac},
{0x30ad, 0x3099, 0x30ae},
{0x30af, 0x3099, 0x30b0},
{0x30b1, 0x3099, 0x30b2},
{0x30b3, 0x3099, 0x30b4},
{0x30b5, 0x3099, 0x30b6},
{0x30b7, 0x3099, 0x30b8},
{0x30b9, 0x3099, 0x30ba},
{0x30bb, 0x3099, 0x30bc},
{0x30bd, 0x3099, 0x30be},
{0x30bf, 0x3099, 0x30c0},
{0x30c1, 0x3099, 0x30c2},
{0x30c4, 0x3099, 0x30c5},
{0x30c6, 0x3099, 0x30c7},
{0x30c8, 0x3099, 0x30c9},
{0x30cf, 0x3099, 0x30d0},
{0x30cf, 0x309a, 0x30d1},
{0x30d2, 0x3099, 0x30d3},
{0x30d2, 0x309a, 0x30d4},
{0x30d5, 0x3099, 0x30d6},
{0x30d5, 0x309a, 0x30d7},
{0x30d8, 0x3099, 0x30d9},
{0x30d8, 0x309a, 0x30da},
{0x30db, 0x3099, 0x30dc},
{0x30db, 0x309a, 0x30dd},
{0x30ef, 0x3099, 0x30f7},
{0x30f0, 0x3099, 0x30f8},
{0x30f1, 0x3099, 0x30f9},
{0x30f2, 0x3099, 0x30fa},
{0x30fd, 0x3099, 0x30fe},
{0x11099, 0x110ba, 0x1109a},
{0x1109b, 0x110ba, 0x1109c},
{0x110a5, 0x110ba, 0x110ab},
{0x11131, 0x11127, 0x1112e},
{0x11132, 0x11127, 0x1112f},
{0x11347, 0x1133e, 0x1134b},
{0x11347, 0x11357, 0x1134c},
{0x114b9, 0x114b0, 0x114bc},
{0x114b9, 0x114ba, 0x114bb},
{0x114b9, 0x114bd, 0x114be},
{0x115b8, 0x115af, 0x115ba},
{0x115b9, 0x115af, 0x115bb},
{0x11935, 0x11930, 0x11938},

2071
unicode/canonical_decomp.h Normal file

File diff suppressed because it is too large Load Diff

398
unicode/combining_classes.h Normal file
View File

@ -0,0 +1,398 @@
/*
* Autogenerated by read_ucd.py from The Unicode Standard 15.0.0
*
* List the canonical combining class of each Unicode character, if it is
* not zero. This controls how combining marks can be reordered by the
* Unicode normalisation algorithms.
*
* Used by utils/unicode-norm.c.
*/
{0x0300, 0x0314, 230},
{0x0315, 0x0315, 232},
{0x0316, 0x0319, 220},
{0x031a, 0x031a, 232},
{0x031b, 0x031b, 216},
{0x031c, 0x0320, 220},
{0x0321, 0x0322, 202},
{0x0323, 0x0326, 220},
{0x0327, 0x0328, 202},
{0x0329, 0x0333, 220},
{0x0334, 0x0338, 1},
{0x0339, 0x033c, 220},
{0x033d, 0x0344, 230},
{0x0345, 0x0345, 240},
{0x0346, 0x0346, 230},
{0x0347, 0x0349, 220},
{0x034a, 0x034c, 230},
{0x034d, 0x034e, 220},
{0x0350, 0x0352, 230},
{0x0353, 0x0356, 220},
{0x0357, 0x0357, 230},
{0x0358, 0x0358, 232},
{0x0359, 0x035a, 220},
{0x035b, 0x035b, 230},
{0x035c, 0x035c, 233},
{0x035d, 0x035e, 234},
{0x035f, 0x035f, 233},
{0x0360, 0x0361, 234},
{0x0362, 0x0362, 233},
{0x0363, 0x036f, 230},
{0x0483, 0x0487, 230},
{0x0591, 0x0591, 220},
{0x0592, 0x0595, 230},
{0x0596, 0x0596, 220},
{0x0597, 0x0599, 230},
{0x059a, 0x059a, 222},
{0x059b, 0x059b, 220},
{0x059c, 0x05a1, 230},
{0x05a2, 0x05a7, 220},
{0x05a8, 0x05a9, 230},
{0x05aa, 0x05aa, 220},
{0x05ab, 0x05ac, 230},
{0x05ad, 0x05ad, 222},
{0x05ae, 0x05ae, 228},
{0x05af, 0x05af, 230},
{0x05b0, 0x05b0, 10},
{0x05b1, 0x05b1, 11},
{0x05b2, 0x05b2, 12},
{0x05b3, 0x05b3, 13},
{0x05b4, 0x05b4, 14},
{0x05b5, 0x05b5, 15},
{0x05b6, 0x05b6, 16},
{0x05b7, 0x05b7, 17},
{0x05b8, 0x05b8, 18},
{0x05b9, 0x05ba, 19},
{0x05bb, 0x05bb, 20},
{0x05bc, 0x05bc, 21},
{0x05bd, 0x05bd, 22},
{0x05bf, 0x05bf, 23},
{0x05c1, 0x05c1, 24},
{0x05c2, 0x05c2, 25},
{0x05c4, 0x05c4, 230},
{0x05c5, 0x05c5, 220},
{0x05c7, 0x05c7, 18},
{0x0610, 0x0617, 230},
{0x0618, 0x0618, 30},
{0x0619, 0x0619, 31},
{0x061a, 0x061a, 32},
{0x064b, 0x064b, 27},
{0x064c, 0x064c, 28},
{0x064d, 0x064d, 29},
{0x064e, 0x064e, 30},
{0x064f, 0x064f, 31},
{0x0650, 0x0650, 32},
{0x0651, 0x0651, 33},
{0x0652, 0x0652, 34},
{0x0653, 0x0654, 230},
{0x0655, 0x0656, 220},
{0x0657, 0x065b, 230},
{0x065c, 0x065c, 220},
{0x065d, 0x065e, 230},
{0x065f, 0x065f, 220},
{0x0670, 0x0670, 35},
{0x06d6, 0x06dc, 230},
{0x06df, 0x06e2, 230},
{0x06e3, 0x06e3, 220},
{0x06e4, 0x06e4, 230},
{0x06e7, 0x06e8, 230},
{0x06ea, 0x06ea, 220},
{0x06eb, 0x06ec, 230},
{0x06ed, 0x06ed, 220},
{0x0711, 0x0711, 36},
{0x0730, 0x0730, 230},
{0x0731, 0x0731, 220},
{0x0732, 0x0733, 230},
{0x0734, 0x0734, 220},
{0x0735, 0x0736, 230},
{0x0737, 0x0739, 220},
{0x073a, 0x073a, 230},
{0x073b, 0x073c, 220},
{0x073d, 0x073d, 230},
{0x073e, 0x073e, 220},
{0x073f, 0x0741, 230},
{0x0742, 0x0742, 220},
{0x0743, 0x0743, 230},
{0x0744, 0x0744, 220},
{0x0745, 0x0745, 230},
{0x0746, 0x0746, 220},
{0x0747, 0x0747, 230},
{0x0748, 0x0748, 220},
{0x0749, 0x074a, 230},
{0x07eb, 0x07f1, 230},
{0x07f2, 0x07f2, 220},
{0x07f3, 0x07f3, 230},
{0x07fd, 0x07fd, 220},
{0x0816, 0x0819, 230},
{0x081b, 0x0823, 230},
{0x0825, 0x0827, 230},
{0x0829, 0x082d, 230},
{0x0859, 0x085b, 220},
{0x0898, 0x0898, 230},
{0x0899, 0x089b, 220},
{0x089c, 0x089f, 230},
{0x08ca, 0x08ce, 230},
{0x08cf, 0x08d3, 220},
{0x08d4, 0x08e1, 230},
{0x08e3, 0x08e3, 220},
{0x08e4, 0x08e5, 230},
{0x08e6, 0x08e6, 220},
{0x08e7, 0x08e8, 230},
{0x08e9, 0x08e9, 220},
{0x08ea, 0x08ec, 230},
{0x08ed, 0x08ef, 220},
{0x08f0, 0x08f0, 27},
{0x08f1, 0x08f1, 28},
{0x08f2, 0x08f2, 29},
{0x08f3, 0x08f5, 230},
{0x08f6, 0x08f6, 220},
{0x08f7, 0x08f8, 230},
{0x08f9, 0x08fa, 220},
{0x08fb, 0x08ff, 230},
{0x093c, 0x093c, 7},
{0x094d, 0x094d, 9},
{0x0951, 0x0951, 230},
{0x0952, 0x0952, 220},
{0x0953, 0x0954, 230},
{0x09bc, 0x09bc, 7},
{0x09cd, 0x09cd, 9},
{0x09fe, 0x09fe, 230},
{0x0a3c, 0x0a3c, 7},
{0x0a4d, 0x0a4d, 9},
{0x0abc, 0x0abc, 7},
{0x0acd, 0x0acd, 9},
{0x0b3c, 0x0b3c, 7},
{0x0b4d, 0x0b4d, 9},
{0x0bcd, 0x0bcd, 9},
{0x0c3c, 0x0c3c, 7},
{0x0c4d, 0x0c4d, 9},
{0x0c55, 0x0c55, 84},
{0x0c56, 0x0c56, 91},
{0x0cbc, 0x0cbc, 7},
{0x0ccd, 0x0ccd, 9},
{0x0d3b, 0x0d3c, 9},
{0x0d4d, 0x0d4d, 9},
{0x0dca, 0x0dca, 9},
{0x0e38, 0x0e39, 103},
{0x0e3a, 0x0e3a, 9},
{0x0e48, 0x0e4b, 107},
{0x0eb8, 0x0eb9, 118},
{0x0eba, 0x0eba, 9},
{0x0ec8, 0x0ecb, 122},
{0x0f18, 0x0f19, 220},
{0x0f35, 0x0f35, 220},
{0x0f37, 0x0f37, 220},
{0x0f39, 0x0f39, 216},
{0x0f71, 0x0f71, 129},
{0x0f72, 0x0f72, 130},
{0x0f74, 0x0f74, 132},
{0x0f7a, 0x0f7d, 130},
{0x0f80, 0x0f80, 130},
{0x0f82, 0x0f83, 230},
{0x0f84, 0x0f84, 9},
{0x0f86, 0x0f87, 230},
{0x0fc6, 0x0fc6, 220},
{0x1037, 0x1037, 7},
{0x1039, 0x103a, 9},
{0x108d, 0x108d, 220},
{0x135d, 0x135f, 230},
{0x1714, 0x1715, 9},
{0x1734, 0x1734, 9},
{0x17d2, 0x17d2, 9},
{0x17dd, 0x17dd, 230},
{0x18a9, 0x18a9, 228},
{0x1939, 0x1939, 222},
{0x193a, 0x193a, 230},
{0x193b, 0x193b, 220},
{0x1a17, 0x1a17, 230},
{0x1a18, 0x1a18, 220},
{0x1a60, 0x1a60, 9},
{0x1a75, 0x1a7c, 230},
{0x1a7f, 0x1a7f, 220},
{0x1ab0, 0x1ab4, 230},
{0x1ab5, 0x1aba, 220},
{0x1abb, 0x1abc, 230},
{0x1abd, 0x1abd, 220},
{0x1abf, 0x1ac0, 220},
{0x1ac1, 0x1ac2, 230},
{0x1ac3, 0x1ac4, 220},
{0x1ac5, 0x1ac9, 230},
{0x1aca, 0x1aca, 220},
{0x1acb, 0x1ace, 230},
{0x1b34, 0x1b34, 7},
{0x1b44, 0x1b44, 9},
{0x1b6b, 0x1b6b, 230},
{0x1b6c, 0x1b6c, 220},
{0x1b6d, 0x1b73, 230},
{0x1baa, 0x1bab, 9},
{0x1be6, 0x1be6, 7},
{0x1bf2, 0x1bf3, 9},
{0x1c37, 0x1c37, 7},
{0x1cd0, 0x1cd2, 230},
{0x1cd4, 0x1cd4, 1},
{0x1cd5, 0x1cd9, 220},
{0x1cda, 0x1cdb, 230},
{0x1cdc, 0x1cdf, 220},
{0x1ce0, 0x1ce0, 230},
{0x1ce2, 0x1ce8, 1},
{0x1ced, 0x1ced, 220},
{0x1cf4, 0x1cf4, 230},
{0x1cf8, 0x1cf9, 230},
{0x1dc0, 0x1dc1, 230},
{0x1dc2, 0x1dc2, 220},
{0x1dc3, 0x1dc9, 230},
{0x1dca, 0x1dca, 220},
{0x1dcb, 0x1dcc, 230},
{0x1dcd, 0x1dcd, 234},
{0x1dce, 0x1dce, 214},
{0x1dcf, 0x1dcf, 220},
{0x1dd0, 0x1dd0, 202},
{0x1dd1, 0x1df5, 230},
{0x1df6, 0x1df6, 232},
{0x1df7, 0x1df8, 228},
{0x1df9, 0x1df9, 220},
{0x1dfa, 0x1dfa, 218},
{0x1dfb, 0x1dfb, 230},
{0x1dfc, 0x1dfc, 233},
{0x1dfd, 0x1dfd, 220},
{0x1dfe, 0x1dfe, 230},
{0x1dff, 0x1dff, 220},
{0x20d0, 0x20d1, 230},
{0x20d2, 0x20d3, 1},
{0x20d4, 0x20d7, 230},
{0x20d8, 0x20da, 1},
{0x20db, 0x20dc, 230},
{0x20e1, 0x20e1, 230},
{0x20e5, 0x20e6, 1},
{0x20e7, 0x20e7, 230},
{0x20e8, 0x20e8, 220},
{0x20e9, 0x20e9, 230},
{0x20ea, 0x20eb, 1},
{0x20ec, 0x20ef, 220},
{0x20f0, 0x20f0, 230},
{0x2cef, 0x2cf1, 230},
{0x2d7f, 0x2d7f, 9},
{0x2de0, 0x2dff, 230},
{0x302a, 0x302a, 218},
{0x302b, 0x302b, 228},
{0x302c, 0x302c, 232},
{0x302d, 0x302d, 222},
{0x302e, 0x302f, 224},
{0x3099, 0x309a, 8},
{0xa66f, 0xa66f, 230},
{0xa674, 0xa67d, 230},
{0xa69e, 0xa69f, 230},
{0xa6f0, 0xa6f1, 230},
{0xa806, 0xa806, 9},
{0xa82c, 0xa82c, 9},
{0xa8c4, 0xa8c4, 9},
{0xa8e0, 0xa8f1, 230},
{0xa92b, 0xa92d, 220},
{0xa953, 0xa953, 9},
{0xa9b3, 0xa9b3, 7},
{0xa9c0, 0xa9c0, 9},
{0xaab0, 0xaab0, 230},
{0xaab2, 0xaab3, 230},
{0xaab4, 0xaab4, 220},
{0xaab7, 0xaab8, 230},
{0xaabe, 0xaabf, 230},
{0xaac1, 0xaac1, 230},
{0xaaf6, 0xaaf6, 9},
{0xabed, 0xabed, 9},
{0xfb1e, 0xfb1e, 26},
{0xfe20, 0xfe26, 230},
{0xfe27, 0xfe2d, 220},
{0xfe2e, 0xfe2f, 230},
{0x101fd, 0x101fd, 220},
{0x102e0, 0x102e0, 220},
{0x10376, 0x1037a, 230},
{0x10a0d, 0x10a0d, 220},
{0x10a0f, 0x10a0f, 230},
{0x10a38, 0x10a38, 230},
{0x10a39, 0x10a39, 1},
{0x10a3a, 0x10a3a, 220},
{0x10a3f, 0x10a3f, 9},
{0x10ae5, 0x10ae5, 230},
{0x10ae6, 0x10ae6, 220},
{0x10d24, 0x10d27, 230},
{0x10eab, 0x10eac, 230},
{0x10efd, 0x10eff, 220},
{0x10f46, 0x10f47, 220},
{0x10f48, 0x10f4a, 230},
{0x10f4b, 0x10f4b, 220},
{0x10f4c, 0x10f4c, 230},
{0x10f4d, 0x10f50, 220},
{0x10f82, 0x10f82, 230},
{0x10f83, 0x10f83, 220},
{0x10f84, 0x10f84, 230},
{0x10f85, 0x10f85, 220},
{0x11046, 0x11046, 9},
{0x11070, 0x11070, 9},
{0x1107f, 0x1107f, 9},
{0x110b9, 0x110b9, 9},
{0x110ba, 0x110ba, 7},
{0x11100, 0x11102, 230},
{0x11133, 0x11134, 9},
{0x11173, 0x11173, 7},
{0x111c0, 0x111c0, 9},
{0x111ca, 0x111ca, 7},
{0x11235, 0x11235, 9},
{0x11236, 0x11236, 7},
{0x112e9, 0x112e9, 7},
{0x112ea, 0x112ea, 9},
{0x1133b, 0x1133c, 7},
{0x1134d, 0x1134d, 9},
{0x11366, 0x1136c, 230},
{0x11370, 0x11374, 230},
{0x11442, 0x11442, 9},
{0x11446, 0x11446, 7},
{0x1145e, 0x1145e, 230},
{0x114c2, 0x114c2, 9},
{0x114c3, 0x114c3, 7},
{0x115bf, 0x115bf, 9},
{0x115c0, 0x115c0, 7},
{0x1163f, 0x1163f, 9},
{0x116b6, 0x116b6, 9},
{0x116b7, 0x116b7, 7},
{0x1172b, 0x1172b, 9},
{0x11839, 0x11839, 9},
{0x1183a, 0x1183a, 7},
{0x1193d, 0x1193e, 9},
{0x11943, 0x11943, 7},
{0x119e0, 0x119e0, 9},
{0x11a34, 0x11a34, 9},
{0x11a47, 0x11a47, 9},
{0x11a99, 0x11a99, 9},
{0x11c3f, 0x11c3f, 9},
{0x11d42, 0x11d42, 7},
{0x11d44, 0x11d45, 9},
{0x11d97, 0x11d97, 9},
{0x11f41, 0x11f42, 9},
{0x16af0, 0x16af4, 1},
{0x16b30, 0x16b36, 230},
{0x16ff0, 0x16ff1, 6},
{0x1bc9e, 0x1bc9e, 1},
{0x1d165, 0x1d166, 216},
{0x1d167, 0x1d169, 1},
{0x1d16d, 0x1d16d, 226},
{0x1d16e, 0x1d172, 216},
{0x1d17b, 0x1d182, 220},
{0x1d185, 0x1d189, 230},
{0x1d18a, 0x1d18b, 220},
{0x1d1aa, 0x1d1ad, 230},
{0x1d242, 0x1d244, 230},
{0x1e000, 0x1e006, 230},
{0x1e008, 0x1e018, 230},
{0x1e01b, 0x1e021, 230},
{0x1e023, 0x1e024, 230},
{0x1e026, 0x1e02a, 230},
{0x1e08f, 0x1e08f, 230},
{0x1e130, 0x1e136, 230},
{0x1e2ae, 0x1e2ae, 230},
{0x1e2ec, 0x1e2ef, 230},
{0x1e4ec, 0x1e4ed, 232},
{0x1e4ee, 0x1e4ee, 220},
{0x1e4ef, 0x1e4ef, 230},
{0x1e8d0, 0x1e8d6, 220},
{0x1e944, 0x1e949, 230},
{0x1e94a, 0x1e94a, 7},

View File

@ -20,7 +20,9 @@ import zipfile
UCDRecord = collections.namedtuple('UCDRecord', [
'c',
'General_Category',
'Canonical_Combining_Class',
'Bidi_Class',
'Decomposition_Type',
'Decomposition_Mapping',
])
@ -107,6 +109,12 @@ class Main:
self.write_wide_chars_list(fh)
with open("ambiguous_wide_chars.h", "w") as fh:
self.write_ambiguous_wide_chars_list(fh)
with open("combining_classes.h", "w") as fh:
self.write_combining_class_table(fh)
with open("canonical_decomp.h", "w") as fh:
self.write_canonical_decomp_table(fh)
with open("canonical_comp.h", "w") as fh:
self.write_canonical_comp_table(fh)
def find_unicode_version(self):
"""Find out the version of Unicode.
@ -166,14 +174,21 @@ class Main:
# Decode some of the raw fields into more cooked
# forms.
cclass = int(cclass)
# For the moment, we only care about decomposition
# mappings that consist of a single hex number (i.e.
# are singletons and not compatibility mappings)
try:
dm = [int(decomp, 16)]
except ValueError:
dm = []
# Separate the decomposition field into decomposition
# type and mapping.
if decomp == "":
dtype = decomp = None
elif "<" not in decomp:
dtype = 'canonical'
else:
assert decomp.startswith("<")
dtype, decomp = decomp[1:].split(">", 1)
decomp = decomp.lstrip(" ")
# And decode the mapping part from hex strings to integers.
if decomp is not None:
decomp = [int(w, 16) for w in decomp.split(" ")]
# And yield a UCDRecord for each code point in our
# range.
@ -181,8 +196,10 @@ class Main:
yield UCDRecord(
c=codepoint,
General_Category=category,
Canonical_Combining_Class=cclass,
Bidi_Class=bidiclass,
Decomposition_Mapping=dm,
Decomposition_Type=dtype,
Decomposition_Mapping=decomp,
)
@property
@ -231,6 +248,16 @@ class Main:
for c in cs:
yield c, fields[1]
@property
def CompositionExclusions(self):
"""Composition exclusions from CompositionExclusions.txt.
Each yielded item is just a code point.
"""
with self.open_ucd_file("CompositionExclusions.txt") as fh:
for line in lines(fh):
yield int(line, 16)
def write_file_header_comment(self, fh, description):
print("/*", file=fh)
print(" * Autogenerated by read_ucd.py from",
@ -311,7 +338,8 @@ Used by terminal/bidi.c.
equivalents = {}
for rec in self.UnicodeData:
if len(rec.Decomposition_Mapping) == 1:
if (rec.Decomposition_Type == 'canonical' and
len(rec.Decomposition_Mapping) == 1):
c = rec.c
c2 = rec.Decomposition_Mapping[0]
equivalents[c] = c2
@ -389,5 +417,78 @@ Used by utils/wcwidth.c.
""")
self.write_width_table(fh, {'A'})
def write_combining_class_table(self, fh):
self.write_file_header_comment(fh, """
List the canonical combining class of each Unicode character, if it is
not zero. This controls how combining marks can be reordered by the
Unicode normalisation algorithms.
Used by utils/unicode-norm.c.
""")
cclasses = {}
for rec in self.UnicodeData:
cc = rec.Canonical_Combining_Class
if cc != 0:
cclasses[rec.c] = cc
for (start, end), cclass in map_to_ranges(cclasses):
print(f"{{0x{start:04x}, 0x{end:04x}, {cclass:d}}},", file=fh)
def write_canonical_decomp_table(self, fh):
self.write_file_header_comment(fh, """
List the canonical decomposition of every Unicode character that has
one. This consists of up to two characters, but those may need
decomposition in turn.
Used by utils/unicode-norm.c.
""")
decomps = {}
for rec in self.UnicodeData:
if rec.Decomposition_Type != 'canonical':
continue
# Fill in a zero code point as the second character, if
# it's only one character long
decomps[rec.c] = (rec.Decomposition_Mapping + [0])[:2]
for c, (d1, d2) in sorted(decomps.items()):
d2s = f"0x{d2:04x}" if d2 else "0"
print(f"{{0x{c:04x}, 0x{d1:04x}, {d2s}}},", file=fh)
def write_canonical_comp_table(self, fh):
self.write_file_header_comment(fh, """
List the pairs of Unicode characters that canonically recompose to a
single character in NFC.
Used by utils/unicode-norm.c.
""")
exclusions = set(self.CompositionExclusions)
nonstarters = set(rec.c for rec in self.UnicodeData
if rec.Canonical_Combining_Class != 0)
decomps = {}
for rec in self.UnicodeData:
if rec.Decomposition_Type != 'canonical':
continue # we don't want compatibility decompositions
if len(rec.Decomposition_Mapping) != 2:
continue # we don't want singletons either
if rec.c in exclusions:
continue # we don't want anything explicitly excluded
if (rec.c in nonstarters or
rec.Decomposition_Mapping[0] in nonstarters):
continue # we don't want non-starter decompositions
decomps[tuple(rec.Decomposition_Mapping)] = rec.c
for (d0, d1), c in sorted(decomps.items()):
print(f"{{0x{d0:04x}, 0x{d1:04x}, 0x{c:04x}}},", file=fh)
if __name__ == '__main__':
Main().run()