/************************************************************************ * * ------------ * Description: * ------------ * This is an implementation of Unicode's Bidirectional Algorithm * (known as UAX #9). * * http://www.unicode.org/reports/tr9/ * * Author: Ahmad Khalifa * * (www.arabeyes.org - under MIT license) * ************************************************************************/ /* * TODO: * ===== * - Explicit marks need to be handled (they are not 100% now) * - Ligatures */ #include /* definition of wchar_t*/ #include "misc.h" #define LMASK 0x3F /* Embedding Level mask */ #define OMASK 0xC0 /* Override mask */ #define OISL 0x80 /* Override is L */ #define OISR 0x40 /* Override is R */ /* For standalone compilation in a testing mode. * Still depends on the PuTTY headers for snewn and sfree, but can avoid * _linking_ with any other PuTTY code. */ #ifdef TEST_GETTYPE #define safemalloc malloc #define safefree free #endif /* Shaping Helpers */ #define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? \ shapetypes[(xh)-SHAPE_FIRST].type : SU) /*))*/ #define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b) #define SFINAL(xh) ((xh)+1) #define SINITIAL(xh) ((xh)+2) #define SMEDIAL(ch) ((ch)+3) #define leastGreaterOdd(x) ( ((x)+1) | 1 ) #define leastGreaterEven(x) ( ((x)+2) &~ 1 ) typedef struct bidi_char { unsigned int origwc, wc; unsigned short index; } bidi_char; /* function declarations */ void flipThisRun(bidi_char *from, unsigned char* level, int max, int count); int findIndexOfRun(unsigned char* level , int start, int count, int tlevel); unsigned char getType(int ch); unsigned char setOverrideBits(unsigned char level, unsigned char override); int getPreviousLevel(unsigned char* level, int from); int do_shape(bidi_char *line, bidi_char *to, int count); int do_bidi(bidi_char *line, int count); void doMirror(unsigned int *ch); /* character types */ enum { L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON }; /* Shaping Types */ enum { SL, /* Left-Joining, doesn't exist in U+0600 - U+06FF */ SR, /* Right-Joining, ie has Isolated, Final */ SD, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */ SU, /* Non-Joining */ SC /* Join-Causing, like U+0640 (TATWEEL) */ }; typedef struct { char type; wchar_t form_b; } shape_node; /* Kept near the actual table, for verification. */ #define SHAPE_FIRST 0x621 #define SHAPE_LAST (SHAPE_FIRST + lenof(shapetypes) - 1) const shape_node shapetypes[] = { /* index, Typ, Iso, Ligature Index*/ /* 621 */ {SU, 0xFE80}, /* 622 */ {SR, 0xFE81}, /* 623 */ {SR, 0xFE83}, /* 624 */ {SR, 0xFE85}, /* 625 */ {SR, 0xFE87}, /* 626 */ {SD, 0xFE89}, /* 627 */ {SR, 0xFE8D}, /* 628 */ {SD, 0xFE8F}, /* 629 */ {SR, 0xFE93}, /* 62A */ {SD, 0xFE95}, /* 62B */ {SD, 0xFE99}, /* 62C */ {SD, 0xFE9D}, /* 62D */ {SD, 0xFEA1}, /* 62E */ {SD, 0xFEA5}, /* 62F */ {SR, 0xFEA9}, /* 630 */ {SR, 0xFEAB}, /* 631 */ {SR, 0xFEAD}, /* 632 */ {SR, 0xFEAF}, /* 633 */ {SD, 0xFEB1}, /* 634 */ {SD, 0xFEB5}, /* 635 */ {SD, 0xFEB9}, /* 636 */ {SD, 0xFEBD}, /* 637 */ {SD, 0xFEC1}, /* 638 */ {SD, 0xFEC5}, /* 639 */ {SD, 0xFEC9}, /* 63A */ {SD, 0xFECD}, /* 63B */ {SU, 0x0}, /* 63C */ {SU, 0x0}, /* 63D */ {SU, 0x0}, /* 63E */ {SU, 0x0}, /* 63F */ {SU, 0x0}, /* 640 */ {SC, 0x0}, /* 641 */ {SD, 0xFED1}, /* 642 */ {SD, 0xFED5}, /* 643 */ {SD, 0xFED9}, /* 644 */ {SD, 0xFEDD}, /* 645 */ {SD, 0xFEE1}, /* 646 */ {SD, 0xFEE5}, /* 647 */ {SD, 0xFEE9}, /* 648 */ {SR, 0xFEED}, /* 649 */ {SR, 0xFEEF}, /* SD */ /* 64A */ {SD, 0xFEF1}, /* 64B */ {SU, 0x0}, /* 64C */ {SU, 0x0}, /* 64D */ {SU, 0x0}, /* 64E */ {SU, 0x0}, /* 64F */ {SU, 0x0}, /* 650 */ {SU, 0x0}, /* 651 */ {SU, 0x0}, /* 652 */ {SU, 0x0}, /* 653 */ {SU, 0x0}, /* 654 */ {SU, 0x0}, /* 655 */ {SU, 0x0}, /* 656 */ {SU, 0x0}, /* 657 */ {SU, 0x0}, /* 658 */ {SU, 0x0}, /* 659 */ {SU, 0x0}, /* 65A */ {SU, 0x0}, /* 65B */ {SU, 0x0}, /* 65C */ {SU, 0x0}, /* 65D */ {SU, 0x0}, /* 65E */ {SU, 0x0}, /* 65F */ {SU, 0x0}, /* 660 */ {SU, 0x0}, /* 661 */ {SU, 0x0}, /* 662 */ {SU, 0x0}, /* 663 */ {SU, 0x0}, /* 664 */ {SU, 0x0}, /* 665 */ {SU, 0x0}, /* 666 */ {SU, 0x0}, /* 667 */ {SU, 0x0}, /* 668 */ {SU, 0x0}, /* 669 */ {SU, 0x0}, /* 66A */ {SU, 0x0}, /* 66B */ {SU, 0x0}, /* 66C */ {SU, 0x0}, /* 66D */ {SU, 0x0}, /* 66E */ {SU, 0x0}, /* 66F */ {SU, 0x0}, /* 670 */ {SU, 0x0}, /* 671 */ {SR, 0xFB50}, /* 672 */ {SU, 0x0}, /* 673 */ {SU, 0x0}, /* 674 */ {SU, 0x0}, /* 675 */ {SU, 0x0}, /* 676 */ {SU, 0x0}, /* 677 */ {SU, 0x0}, /* 678 */ {SU, 0x0}, /* 679 */ {SD, 0xFB66}, /* 67A */ {SD, 0xFB5E}, /* 67B */ {SD, 0xFB52}, /* 67C */ {SU, 0x0}, /* 67D */ {SU, 0x0}, /* 67E */ {SD, 0xFB56}, /* 67F */ {SD, 0xFB62}, /* 680 */ {SD, 0xFB5A}, /* 681 */ {SU, 0x0}, /* 682 */ {SU, 0x0}, /* 683 */ {SD, 0xFB76}, /* 684 */ {SD, 0xFB72}, /* 685 */ {SU, 0x0}, /* 686 */ {SD, 0xFB7A}, /* 687 */ {SD, 0xFB7E}, /* 688 */ {SR, 0xFB88}, /* 689 */ {SU, 0x0}, /* 68A */ {SU, 0x0}, /* 68B */ {SU, 0x0}, /* 68C */ {SR, 0xFB84}, /* 68D */ {SR, 0xFB82}, /* 68E */ {SR, 0xFB86}, /* 68F */ {SU, 0x0}, /* 690 */ {SU, 0x0}, /* 691 */ {SR, 0xFB8C}, /* 692 */ {SU, 0x0}, /* 693 */ {SU, 0x0}, /* 694 */ {SU, 0x0}, /* 695 */ {SU, 0x0}, /* 696 */ {SU, 0x0}, /* 697 */ {SU, 0x0}, /* 698 */ {SR, 0xFB8A}, /* 699 */ {SU, 0x0}, /* 69A */ {SU, 0x0}, /* 69B */ {SU, 0x0}, /* 69C */ {SU, 0x0}, /* 69D */ {SU, 0x0}, /* 69E */ {SU, 0x0}, /* 69F */ {SU, 0x0}, /* 6A0 */ {SU, 0x0}, /* 6A1 */ {SU, 0x0}, /* 6A2 */ {SU, 0x0}, /* 6A3 */ {SU, 0x0}, /* 6A4 */ {SD, 0xFB6A}, /* 6A5 */ {SU, 0x0}, /* 6A6 */ {SD, 0xFB6E}, /* 6A7 */ {SU, 0x0}, /* 6A8 */ {SU, 0x0}, /* 6A9 */ {SD, 0xFB8E}, /* 6AA */ {SU, 0x0}, /* 6AB */ {SU, 0x0}, /* 6AC */ {SU, 0x0}, /* 6AD */ {SD, 0xFBD3}, /* 6AE */ {SU, 0x0}, /* 6AF */ {SD, 0xFB92}, /* 6B0 */ {SU, 0x0}, /* 6B1 */ {SD, 0xFB9A}, /* 6B2 */ {SU, 0x0}, /* 6B3 */ {SD, 0xFB96}, /* 6B4 */ {SU, 0x0}, /* 6B5 */ {SU, 0x0}, /* 6B6 */ {SU, 0x0}, /* 6B7 */ {SU, 0x0}, /* 6B8 */ {SU, 0x0}, /* 6B9 */ {SU, 0x0}, /* 6BA */ {SR, 0xFB9E}, /* 6BB */ {SD, 0xFBA0}, /* 6BC */ {SU, 0x0}, /* 6BD */ {SU, 0x0}, /* 6BE */ {SD, 0xFBAA}, /* 6BF */ {SU, 0x0}, /* 6C0 */ {SR, 0xFBA4}, /* 6C1 */ {SD, 0xFBA6}, /* 6C2 */ {SU, 0x0}, /* 6C3 */ {SU, 0x0}, /* 6C4 */ {SU, 0x0}, /* 6C5 */ {SR, 0xFBE0}, /* 6C6 */ {SR, 0xFBD9}, /* 6C7 */ {SR, 0xFBD7}, /* 6C8 */ {SR, 0xFBDB}, /* 6C9 */ {SR, 0xFBE2}, /* 6CA */ {SU, 0x0}, /* 6CB */ {SR, 0xFBDE}, /* 6CC */ {SD, 0xFBFC}, /* 6CD */ {SU, 0x0}, /* 6CE */ {SU, 0x0}, /* 6CF */ {SU, 0x0}, /* 6D0 */ {SU, 0x0}, /* 6D1 */ {SU, 0x0}, /* 6D2 */ {SR, 0xFBAE}, }; /* * Flips the text buffer, according to max level, and * all higher levels * * Input: * from: text buffer, on which to apply flipping * level: resolved levels buffer * max: the maximum level found in this line (should be unsigned char) * count: line size in bidi_char */ void flipThisRun(bidi_char *from, unsigned char *level, int max, int count) { int i, j, k, tlevel; bidi_char temp; j = i = 0; while (i j; k--, j++) { temp = from[k]; from[k] = from[j]; from[j] = temp; } } } /* * Finds the index of a run with level equals tlevel */ int findIndexOfRun(unsigned char* level , int start, int count, int tlevel) { int i; for (i=start; i 1) { k = (i + j) / 2; if (ch < lookup[k].first) j = k; else if (ch > lookup[k].last) i = k; else return lookup[k].type; } /* * If we reach here, the character was not in any of the * intervals listed in the lookup table. This means we return * ON (`Other Neutrals'). This is the appropriate code for any * character genuinely not listed in the Unicode table, and * also the table above has deliberately left out any * characters _explicitly_ listed as ON (to save space!). */ return ON; } /* * Function exported to front ends to allow them to identify * bidi-active characters (in case, for example, the platform's * text display function can't conveniently be prevented from doing * its own bidi and so special treatment is required for characters * that would cause the bidi algorithm to activate). * * This function is passed a single Unicode code point, and returns * nonzero if the presence of this code point can possibly cause * the bidi algorithm to do any reordering. Thus, any string * composed entirely of characters for which is_rtl() returns zero * should be safe to pass to a bidi-active platform display * function without fear. * * (is_rtl() must therefore also return true for any character * which would be affected by Arabic shaping, but this isn't * important because all such characters are right-to-left so it * would have flagged them anyway.) */ int is_rtl(int c) { /* * After careful reading of the Unicode bidi algorithm (URL as * given at the top of this file) I believe that the only * character classes which can possibly cause trouble are R, * AL, RLE and RLO. I think that any string containing no * character in any of those classes will be displayed * uniformly left-to-right by the Unicode bidi algorithm. */ const int mask = (1< 0) { unsigned char current = level[--from]; while (from >= 0 && level[from] == current) from--; if (from >= 0) return level[from]; return -1; } else return -1; } /* The Main shaping function, and the only one to be used * by the outside world. * * line: buffer to apply shaping to. this must be passed by doBidi() first * to: output buffer for the shaped data * count: number of characters in line */ int do_shape(bidi_char *line, bidi_char *to, int count) { int i, tempShape; bool ligFlag = false; for (i=0; i 0) switch (line[i-1].wc) { case 0x622: ligFlag = true; if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEF6; else to[i].wc = 0xFEF5; break; case 0x623: ligFlag = true; if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEF8; else to[i].wc = 0xFEF7; break; case 0x625: ligFlag = true; if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEFA; else to[i].wc = 0xFEF9; break; case 0x627: ligFlag = true; if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEFC; else to[i].wc = 0xFEFB; break; } if (ligFlag) { to[i-1].wc = 0x20; ligFlag = false; break; } } if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) { tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU); if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC)) to[i].wc = SMEDIAL((SISOLATED(line[i].wc))); else to[i].wc = SFINAL((SISOLATED(line[i].wc))); break; } tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU); if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC)) to[i].wc = SINITIAL((SISOLATED(line[i].wc))); else to[i].wc = SISOLATED(line[i].wc); break; } } return 1; } /* * The Main Bidi Function, and the only function that should * be used by the outside world. * * line: a buffer of size count containing text to apply * the Bidirectional algorithm to. */ int do_bidi(bidi_char *line, int count) { unsigned char* types; unsigned char* levels; unsigned char paragraphLevel; unsigned char currentEmbedding; unsigned char currentOverride; unsigned char tempType; int i, j; bool yes, bover; /* Check the presence of R or AL types as optimization */ yes = false; for (i=0; i= 0) { if (types[j] == AL) { types[i] = AN; break; } else if (types[j] == R || types[j] == L) { break; } j--; } } } /* Rule (W3) * W3. Change all ALs to R. * * Optimization: on Rule Xn, we might set a flag on AL type * to prevent this loop in L R lines only... */ for (i=0; i 0 && types[i-1] == EN) { types[i] = EN; continue; } else if (i < count-1 && types[i+1] == EN) { types[i] = EN; continue; } else if (i < count-1 && types[i+1] == ET) { j=i; while (j = 0) { if (types[j] == L) { types[i] = L; break; } else if (types[j] == R || types[j] == AL) { break; } j--; } } } /* Rule (N1) * N1. A sequence of neutrals takes the direction of the surrounding * strong text if the text on both sides has the same direction. European * and Arabic numbers are treated as though they were R. */ if (count >= 2 && types[0] == ON) { if ((types[1] == R) || (types[1] == EN) || (types[1] == AN)) types[0] = R; else if (types[1] == L) types[0] = L; } for (i=1; i<(count-1); i++) { if (types[i] == ON) { if (types[i-1] == L) { j=i; while (j<(count-1) && types[j] == ON) { j++; } if (types[j] == L) { while (i= 2 && types[count-1] == ON) { if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN) types[count-1] = R; else if (types[count-2] == L) types[count-1] = L; } /* Rule (N2) * N2. Any remaining neutrals take the embedding direction. */ for (i=0; i0 && (getType(line[j].wc) == WS)) { j--; } if (j < (count-1)) { for (j++; j=i ; j--) { levels[j] = paragraphLevel; } } } else if (tempType == B || tempType == S) { levels[i] = paragraphLevel; } } /* Rule (L4) NOT IMPLEMENTED * L4. A character that possesses the mirrored property as specified by * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the * resolved directionality of that character is R. */ /* Note: this is implemented before L2 for efficiency */ for (i=0; i tempType) tempType = levels[i]; i++; } /* maximum level in tempType. */ while (tempType > 0) { /* loop from highest level to the least odd, */ /* which i assume is 1 */ flipThisRun(line, levels, tempType, count); tempType--; } /* Rule (L3) NOT IMPLEMENTED * L3. Combining marks applied to a right-to-left base character will at * this point precede their base character. If the rendering engine * expects them to follow the base characters in the final display * process, then the ordering of the marks and the base character must * be reversed. */ sfree(types); sfree(levels); return R; } /* * Bad, Horrible function * takes a pointer to a character that is checked for * having a mirror glyph. */ void doMirror(unsigned int *ch) { if ((*ch & 0xFF00) == 0) { switch (*ch) { case 0x0028: *ch = 0x0029; break; case 0x0029: *ch = 0x0028; break; case 0x003C: *ch = 0x003E; break; case 0x003E: *ch = 0x003C; break; case 0x005B: *ch = 0x005D; break; case 0x005D: *ch = 0x005B; break; case 0x007B: *ch = 0x007D; break; case 0x007D: *ch = 0x007B; break; case 0x00AB: *ch = 0x00BB; break; case 0x00BB: *ch = 0x00AB; break; } } else if ((*ch & 0xFF00) == 0x2000) { switch (*ch) { case 0x2039: *ch = 0x203A; break; case 0x203A: *ch = 0x2039; break; case 0x2045: *ch = 0x2046; break; case 0x2046: *ch = 0x2045; break; case 0x207D: *ch = 0x207E; break; case 0x207E: *ch = 0x207D; break; case 0x208D: *ch = 0x208E; break; case 0x208E: *ch = 0x208D; break; } } else if ((*ch & 0xFF00) == 0x2200) { switch (*ch) { case 0x2208: *ch = 0x220B; break; case 0x2209: *ch = 0x220C; break; case 0x220A: *ch = 0x220D; break; case 0x220B: *ch = 0x2208; break; case 0x220C: *ch = 0x2209; break; case 0x220D: *ch = 0x220A; break; case 0x2215: *ch = 0x29F5; break; case 0x223C: *ch = 0x223D; break; case 0x223D: *ch = 0x223C; break; case 0x2243: *ch = 0x22CD; break; case 0x2252: *ch = 0x2253; break; case 0x2253: *ch = 0x2252; break; case 0x2254: *ch = 0x2255; break; case 0x2255: *ch = 0x2254; break; case 0x2264: *ch = 0x2265; break; case 0x2265: *ch = 0x2264; break; case 0x2266: *ch = 0x2267; break; case 0x2267: *ch = 0x2266; break; case 0x2268: *ch = 0x2269; break; case 0x2269: *ch = 0x2268; break; case 0x226A: *ch = 0x226B; break; case 0x226B: *ch = 0x226A; break; case 0x226E: *ch = 0x226F; break; case 0x226F: *ch = 0x226E; break; case 0x2270: *ch = 0x2271; break; case 0x2271: *ch = 0x2270; break; case 0x2272: *ch = 0x2273; break; case 0x2273: *ch = 0x2272; break; case 0x2274: *ch = 0x2275; break; case 0x2275: *ch = 0x2274; break; case 0x2276: *ch = 0x2277; break; case 0x2277: *ch = 0x2276; break; case 0x2278: *ch = 0x2279; break; case 0x2279: *ch = 0x2278; break; case 0x227A: *ch = 0x227B; break; case 0x227B: *ch = 0x227A; break; case 0x227C: *ch = 0x227D; break; case 0x227D: *ch = 0x227C; break; case 0x227E: *ch = 0x227F; break; case 0x227F: *ch = 0x227E; break; case 0x2280: *ch = 0x2281; break; case 0x2281: *ch = 0x2280; break; case 0x2282: *ch = 0x2283; break; case 0x2283: *ch = 0x2282; break; case 0x2284: *ch = 0x2285; break; case 0x2285: *ch = 0x2284; break; case 0x2286: *ch = 0x2287; break; case 0x2287: *ch = 0x2286; break; case 0x2288: *ch = 0x2289; break; case 0x2289: *ch = 0x2288; break; case 0x228A: *ch = 0x228B; break; case 0x228B: *ch = 0x228A; break; case 0x228F: *ch = 0x2290; break; case 0x2290: *ch = 0x228F; break; case 0x2291: *ch = 0x2292; break; case 0x2292: *ch = 0x2291; break; case 0x2298: *ch = 0x29B8; break; case 0x22A2: *ch = 0x22A3; break; case 0x22A3: *ch = 0x22A2; break; case 0x22A6: *ch = 0x2ADE; break; case 0x22A8: *ch = 0x2AE4; break; case 0x22A9: *ch = 0x2AE3; break; case 0x22AB: *ch = 0x2AE5; break; case 0x22B0: *ch = 0x22B1; break; case 0x22B1: *ch = 0x22B0; break; case 0x22B2: *ch = 0x22B3; break; case 0x22B3: *ch = 0x22B2; break; case 0x22B4: *ch = 0x22B5; break; case 0x22B5: *ch = 0x22B4; break; case 0x22B6: *ch = 0x22B7; break; case 0x22B7: *ch = 0x22B6; break; case 0x22C9: *ch = 0x22CA; break; case 0x22CA: *ch = 0x22C9; break; case 0x22CB: *ch = 0x22CC; break; case 0x22CC: *ch = 0x22CB; break; case 0x22CD: *ch = 0x2243; break; case 0x22D0: *ch = 0x22D1; break; case 0x22D1: *ch = 0x22D0; break; case 0x22D6: *ch = 0x22D7; break; case 0x22D7: *ch = 0x22D6; break; case 0x22D8: *ch = 0x22D9; break; case 0x22D9: *ch = 0x22D8; break; case 0x22DA: *ch = 0x22DB; break; case 0x22DB: *ch = 0x22DA; break; case 0x22DC: *ch = 0x22DD; break; case 0x22DD: *ch = 0x22DC; break; case 0x22DE: *ch = 0x22DF; break; case 0x22DF: *ch = 0x22DE; break; case 0x22E0: *ch = 0x22E1; break; case 0x22E1: *ch = 0x22E0; break; case 0x22E2: *ch = 0x22E3; break; case 0x22E3: *ch = 0x22E2; break; case 0x22E4: *ch = 0x22E5; break; case 0x22E5: *ch = 0x22E4; break; case 0x22E6: *ch = 0x22E7; break; case 0x22E7: *ch = 0x22E6; break; case 0x22E8: *ch = 0x22E9; break; case 0x22E9: *ch = 0x22E8; break; case 0x22EA: *ch = 0x22EB; break; case 0x22EB: *ch = 0x22EA; break; case 0x22EC: *ch = 0x22ED; break; case 0x22ED: *ch = 0x22EC; break; case 0x22F0: *ch = 0x22F1; break; case 0x22F1: *ch = 0x22F0; break; case 0x22F2: *ch = 0x22FA; break; case 0x22F3: *ch = 0x22FB; break; case 0x22F4: *ch = 0x22FC; break; case 0x22F6: *ch = 0x22FD; break; case 0x22F7: *ch = 0x22FE; break; case 0x22FA: *ch = 0x22F2; break; case 0x22FB: *ch = 0x22F3; break; case 0x22FC: *ch = 0x22F4; break; case 0x22FD: *ch = 0x22F6; break; case 0x22FE: *ch = 0x22F7; break; } } else if ((*ch & 0xFF00) == 0x2300) { switch (*ch) { case 0x2308: *ch = 0x2309; break; case 0x2309: *ch = 0x2308; break; case 0x230A: *ch = 0x230B; break; case 0x230B: *ch = 0x230A; break; case 0x2329: *ch = 0x232A; break; case 0x232A: *ch = 0x2329; break; } } else if ((*ch & 0xFF00) == 0x2700) { switch (*ch) { case 0x2768: *ch = 0x2769; break; case 0x2769: *ch = 0x2768; break; case 0x276A: *ch = 0x276B; break; case 0x276B: *ch = 0x276A; break; case 0x276C: *ch = 0x276D; break; case 0x276D: *ch = 0x276C; break; case 0x276E: *ch = 0x276F; break; case 0x276F: *ch = 0x276E; break; case 0x2770: *ch = 0x2771; break; case 0x2771: *ch = 0x2770; break; case 0x2772: *ch = 0x2773; break; case 0x2773: *ch = 0x2772; break; case 0x2774: *ch = 0x2775; break; case 0x2775: *ch = 0x2774; break; case 0x27D5: *ch = 0x27D6; break; case 0x27D6: *ch = 0x27D5; break; case 0x27DD: *ch = 0x27DE; break; case 0x27DE: *ch = 0x27DD; break; case 0x27E2: *ch = 0x27E3; break; case 0x27E3: *ch = 0x27E2; break; case 0x27E4: *ch = 0x27E5; break; case 0x27E5: *ch = 0x27E4; break; case 0x27E6: *ch = 0x27E7; break; case 0x27E7: *ch = 0x27E6; break; case 0x27E8: *ch = 0x27E9; break; case 0x27E9: *ch = 0x27E8; break; case 0x27EA: *ch = 0x27EB; break; case 0x27EB: *ch = 0x27EA; break; } } else if ((*ch & 0xFF00) == 0x2900) { switch (*ch) { case 0x2983: *ch = 0x2984; break; case 0x2984: *ch = 0x2983; break; case 0x2985: *ch = 0x2986; break; case 0x2986: *ch = 0x2985; break; case 0x2987: *ch = 0x2988; break; case 0x2988: *ch = 0x2987; break; case 0x2989: *ch = 0x298A; break; case 0x298A: *ch = 0x2989; break; case 0x298B: *ch = 0x298C; break; case 0x298C: *ch = 0x298B; break; case 0x298D: *ch = 0x2990; break; case 0x298E: *ch = 0x298F; break; case 0x298F: *ch = 0x298E; break; case 0x2990: *ch = 0x298D; break; case 0x2991: *ch = 0x2992; break; case 0x2992: *ch = 0x2991; break; case 0x2993: *ch = 0x2994; break; case 0x2994: *ch = 0x2993; break; case 0x2995: *ch = 0x2996; break; case 0x2996: *ch = 0x2995; break; case 0x2997: *ch = 0x2998; break; case 0x2998: *ch = 0x2997; break; case 0x29B8: *ch = 0x2298; break; case 0x29C0: *ch = 0x29C1; break; case 0x29C1: *ch = 0x29C0; break; case 0x29C4: *ch = 0x29C5; break; case 0x29C5: *ch = 0x29C4; break; case 0x29CF: *ch = 0x29D0; break; case 0x29D0: *ch = 0x29CF; break; case 0x29D1: *ch = 0x29D2; break; case 0x29D2: *ch = 0x29D1; break; case 0x29D4: *ch = 0x29D5; break; case 0x29D5: *ch = 0x29D4; break; case 0x29D8: *ch = 0x29D9; break; case 0x29D9: *ch = 0x29D8; break; case 0x29DA: *ch = 0x29DB; break; case 0x29DB: *ch = 0x29DA; break; case 0x29F5: *ch = 0x2215; break; case 0x29F8: *ch = 0x29F9; break; case 0x29F9: *ch = 0x29F8; break; case 0x29FC: *ch = 0x29FD; break; case 0x29FD: *ch = 0x29FC; break; } } else if ((*ch & 0xFF00) == 0x2A00) { switch (*ch) { case 0x2A2B: *ch = 0x2A2C; break; case 0x2A2C: *ch = 0x2A2B; break; case 0x2A2D: *ch = 0x2A2C; break; case 0x2A2E: *ch = 0x2A2D; break; case 0x2A34: *ch = 0x2A35; break; case 0x2A35: *ch = 0x2A34; break; case 0x2A3C: *ch = 0x2A3D; break; case 0x2A3D: *ch = 0x2A3C; break; case 0x2A64: *ch = 0x2A65; break; case 0x2A65: *ch = 0x2A64; break; case 0x2A79: *ch = 0x2A7A; break; case 0x2A7A: *ch = 0x2A79; break; case 0x2A7D: *ch = 0x2A7E; break; case 0x2A7E: *ch = 0x2A7D; break; case 0x2A7F: *ch = 0x2A80; break; case 0x2A80: *ch = 0x2A7F; break; case 0x2A81: *ch = 0x2A82; break; case 0x2A82: *ch = 0x2A81; break; case 0x2A83: *ch = 0x2A84; break; case 0x2A84: *ch = 0x2A83; break; case 0x2A8B: *ch = 0x2A8C; break; case 0x2A8C: *ch = 0x2A8B; break; case 0x2A91: *ch = 0x2A92; break; case 0x2A92: *ch = 0x2A91; break; case 0x2A93: *ch = 0x2A94; break; case 0x2A94: *ch = 0x2A93; break; case 0x2A95: *ch = 0x2A96; break; case 0x2A96: *ch = 0x2A95; break; case 0x2A97: *ch = 0x2A98; break; case 0x2A98: *ch = 0x2A97; break; case 0x2A99: *ch = 0x2A9A; break; case 0x2A9A: *ch = 0x2A99; break; case 0x2A9B: *ch = 0x2A9C; break; case 0x2A9C: *ch = 0x2A9B; break; case 0x2AA1: *ch = 0x2AA2; break; case 0x2AA2: *ch = 0x2AA1; break; case 0x2AA6: *ch = 0x2AA7; break; case 0x2AA7: *ch = 0x2AA6; break; case 0x2AA8: *ch = 0x2AA9; break; case 0x2AA9: *ch = 0x2AA8; break; case 0x2AAA: *ch = 0x2AAB; break; case 0x2AAB: *ch = 0x2AAA; break; case 0x2AAC: *ch = 0x2AAD; break; case 0x2AAD: *ch = 0x2AAC; break; case 0x2AAF: *ch = 0x2AB0; break; case 0x2AB0: *ch = 0x2AAF; break; case 0x2AB3: *ch = 0x2AB4; break; case 0x2AB4: *ch = 0x2AB3; break; case 0x2ABB: *ch = 0x2ABC; break; case 0x2ABC: *ch = 0x2ABB; break; case 0x2ABD: *ch = 0x2ABE; break; case 0x2ABE: *ch = 0x2ABD; break; case 0x2ABF: *ch = 0x2AC0; break; case 0x2AC0: *ch = 0x2ABF; break; case 0x2AC1: *ch = 0x2AC2; break; case 0x2AC2: *ch = 0x2AC1; break; case 0x2AC3: *ch = 0x2AC4; break; case 0x2AC4: *ch = 0x2AC3; break; case 0x2AC5: *ch = 0x2AC6; break; case 0x2AC6: *ch = 0x2AC5; break; case 0x2ACD: *ch = 0x2ACE; break; case 0x2ACE: *ch = 0x2ACD; break; case 0x2ACF: *ch = 0x2AD0; break; case 0x2AD0: *ch = 0x2ACF; break; case 0x2AD1: *ch = 0x2AD2; break; case 0x2AD2: *ch = 0x2AD1; break; case 0x2AD3: *ch = 0x2AD4; break; case 0x2AD4: *ch = 0x2AD3; break; case 0x2AD5: *ch = 0x2AD6; break; case 0x2AD6: *ch = 0x2AD5; break; case 0x2ADE: *ch = 0x22A6; break; case 0x2AE3: *ch = 0x22A9; break; case 0x2AE4: *ch = 0x22A8; break; case 0x2AE5: *ch = 0x22AB; break; case 0x2AEC: *ch = 0x2AED; break; case 0x2AED: *ch = 0x2AEC; break; case 0x2AF7: *ch = 0x2AF8; break; case 0x2AF8: *ch = 0x2AF7; break; case 0x2AF9: *ch = 0x2AFA; break; case 0x2AFA: *ch = 0x2AF9; break; } } else if ((*ch & 0xFF00) == 0x3000) { switch (*ch) { case 0x3008: *ch = 0x3009; break; case 0x3009: *ch = 0x3008; break; case 0x300A: *ch = 0x300B; break; case 0x300B: *ch = 0x300A; break; case 0x300C: *ch = 0x300D; break; case 0x300D: *ch = 0x300C; break; case 0x300E: *ch = 0x300F; break; case 0x300F: *ch = 0x300E; break; case 0x3010: *ch = 0x3011; break; case 0x3011: *ch = 0x3010; break; case 0x3014: *ch = 0x3015; break; case 0x3015: *ch = 0x3014; break; case 0x3016: *ch = 0x3017; break; case 0x3017: *ch = 0x3016; break; case 0x3018: *ch = 0x3019; break; case 0x3019: *ch = 0x3018; break; case 0x301A: *ch = 0x301B; break; case 0x301B: *ch = 0x301A; break; } } else if ((*ch & 0xFF00) == 0xFF00) { switch (*ch) { case 0xFF08: *ch = 0xFF09; break; case 0xFF09: *ch = 0xFF08; break; case 0xFF1C: *ch = 0xFF1E; break; case 0xFF1E: *ch = 0xFF1C; break; case 0xFF3B: *ch = 0xFF3D; break; case 0xFF3D: *ch = 0xFF3B; break; case 0xFF5B: *ch = 0xFF5D; break; case 0xFF5D: *ch = 0xFF5B; break; case 0xFF5F: *ch = 0xFF60; break; case 0xFF60: *ch = 0xFF5F; break; case 0xFF62: *ch = 0xFF63; break; case 0xFF63: *ch = 0xFF62; break; } } } #ifdef TEST_GETTYPE #include #include int main(int argc, char **argv) { static const struct { int type; char *name; } typetoname[] = { #define TYPETONAME(X) { X , #X } TYPETONAME(L), TYPETONAME(LRE), TYPETONAME(LRO), TYPETONAME(R), TYPETONAME(AL), TYPETONAME(RLE), TYPETONAME(RLO), TYPETONAME(PDF), TYPETONAME(EN), TYPETONAME(ES), TYPETONAME(ET), TYPETONAME(AN), TYPETONAME(CS), TYPETONAME(NSM), TYPETONAME(BN), TYPETONAME(B), TYPETONAME(S), TYPETONAME(WS), TYPETONAME(ON), #undef TYPETONAME }; int i; for (i = 1; i < argc; i++) { unsigned long chr = strtoul(argv[i], NULL, 0); int type = getType(chr); assert(typetoname[type].type == type); printf("U+%04x: %s\n", (unsigned)chr, typetoname[type].name); } return 0; } #endif