/* * Implementation of the Unicode bidirectional and Arabic shaping * algorithms for PuTTY. * * Original version written and kindly contributed to this code base * by Ahmad Khalifa of Arabeyes. The bidi part was almost completely * rewritten in 2021 by Simon Tatham to bring it up to date, but the * shaping part is still the one by the original authors. * * Implementation notes: * * Algorithm version * ----------------- * * This algorithm is up to date with Unicode Standard Annex #9 * revision 46: * * https://www.unicode.org/reports/tr9/tr9-46.html * * and passes the full conformance test suite in Unicode 15.0.0. * * Paragraph and line handling * --------------------------- * * The full Unicode bidi algorithm expects to receive text containing * multiple paragraphs, together with a decision about how those * paragraphs are broken up into lines. It calculates embedding levels * a whole paragraph at a time without considering the line breaks, * but then the final reordering of the text for display is done to * each _line_ independently based on the levels computed for the text * in that line. * * This algorithm omits all of that, because it's intended for use as * a display-time transformation of a text terminal, which doesn't * preserve enough semantic information to decide what's a paragraph * break and what is not. So a piece of input text provided to this * algorithm is always expected to consist of exactly one paragraph * *and* exactly one line. * * Embeddings, overrides and isolates * ---------------------------------- * * This implementation has full support for all the Unicode special * control characters that modify bidi behaviour, such as * * U+202A LEFT-TO-RIGHT EMBEDDING * U+202B RIGHT-TO-LEFT EMBEDDING * U+202D LEFT-TO-RIGHT OVERRIDE * U+202E RIGHT-TO-LEFT OVERRIDE * U+202C POP DIRECTIONAL FORMATTING * U+2068 FIRST STRONG ISOLATE * U+2066 LEFT-TO-RIGHT ISOLATE * U+2067 RIGHT-TO-LEFT ISOLATE * U+2069 POP DIRECTIONAL ISOLATE * * However, at present, the terminal emulator that is a client of this * code has no way to pass those in (because they're dropped during * escape sequence processing and don't get stored in the terminal * state). Nonetheless, the code is all here, so if the terminal * emulator becomes able to record those characters at some later * point, we'll be all set to take account of them during bidi. * * But the _main_ purpose of supporting the full bidi algorithm is * simply that that's the easiest way to be sure it's correct, because * if you support the whole thing, you can run the full conformance * test suite. (And I don't 100% believe that restricting to the * subset of _tests_ valid with a reduced character set will test the * full set of _functionality_ relevant to the reduced set.) * * Retained formatting characters * ------------------------------ * * The standard bidi algorithm, in step X9, deletes assorted * formatting characters from the text: all the embedding and override * section initiator characters, the Pop Directional Formatting * character that closes one of those sections again, and any * character labelled as Boundary Neutral. So the characters it * returns are not a _full_ reordering of the input; some input * characters vanish completely. * * This would be fine, if it were not for the fact that - as far as I * can see - _exactly one_ Unicode code point in the discarded * category has a wcwidth() of more than 0, namely U+00AD SOFT HYPHEN * which is a printing character for terminal purposes but has a bidi * class of BN. * * Therefore, we must implement a modified version of the algorithm, * as described in section 5.2 of TR9, which retains those formatting * characters so that a client can find out where they ended up in the * reordering. * * Section 5.2 describes a set of modifications to the algorithm that * are _intended_ to achieve this without changing the rest of the * behaviour: that is, if you take the output of the modified * algorithm and delete all the characters that the standard algorithm * would have removed, you should end up with the remaining characters * in the same order that the standard algorithm would have delivered. * However, section 5.2 admits the possibility of error, and says "in * case of any deviation the explicit algorithm is the normative * statement for conformance". And indeed, in one or two places I * found I had to make my own tweaks to the section 5.2 description in * order to get the whole test suite to pass, because I think the 5.2 * modifications if taken literally don't quite achieve that. My * justification is that sentence of 5.2: in case of doubt, the right * thing is to make the code behave the same as the official * algorithm. * * It's possible that there might still be some undiscovered * discrepancies between the behaviour of the standard and modified * algorithms. So, just in case, I've kept in this code the ability to * implement the _standard_ algorithm too! If you compile with * -DREMOVE_FORMATTING_CHARS, this code should go back to implementing * the literal UAX#9 bidi algorithm - so you can run your suspect * input through both versions, making it much easier to figure out * why they differ, and in which of the many stages of the algorithm * the difference was introduced. * * However, beware that when compiling in this mode, the do_bidi * interface to the terminal will stop working, and just abort() when * called! The only useful thing you can do with this mode is to run * the companion program bidi_test.c. */ #include /* definition of wchar_t */ #include "putty.h" #include "misc.h" #include "bidi.h" typedef struct { char type; wchar_t form_b; } shape_node; /* Kept near the actual table, for verification. */ #define SHAPE_FIRST 0x621 #define SHAPE_LAST (SHAPE_FIRST + lenof(shapetypes) - 1) static const shape_node shapetypes[] = { /* index, Typ, Iso, Ligature Index*/ /* 621 */ {SU, 0xFE80}, /* 622 */ {SR, 0xFE81}, /* 623 */ {SR, 0xFE83}, /* 624 */ {SR, 0xFE85}, /* 625 */ {SR, 0xFE87}, /* 626 */ {SD, 0xFE89}, /* 627 */ {SR, 0xFE8D}, /* 628 */ {SD, 0xFE8F}, /* 629 */ {SR, 0xFE93}, /* 62A */ {SD, 0xFE95}, /* 62B */ {SD, 0xFE99}, /* 62C */ {SD, 0xFE9D}, /* 62D */ {SD, 0xFEA1}, /* 62E */ {SD, 0xFEA5}, /* 62F */ {SR, 0xFEA9}, /* 630 */ {SR, 0xFEAB}, /* 631 */ {SR, 0xFEAD}, /* 632 */ {SR, 0xFEAF}, /* 633 */ {SD, 0xFEB1}, /* 634 */ {SD, 0xFEB5}, /* 635 */ {SD, 0xFEB9}, /* 636 */ {SD, 0xFEBD}, /* 637 */ {SD, 0xFEC1}, /* 638 */ {SD, 0xFEC5}, /* 639 */ {SD, 0xFEC9}, /* 63A */ {SD, 0xFECD}, /* 63B */ {SU, 0x0}, /* 63C */ {SU, 0x0}, /* 63D */ {SU, 0x0}, /* 63E */ {SU, 0x0}, /* 63F */ {SU, 0x0}, /* 640 */ {SC, 0x0}, /* 641 */ {SD, 0xFED1}, /* 642 */ {SD, 0xFED5}, /* 643 */ {SD, 0xFED9}, /* 644 */ {SD, 0xFEDD}, /* 645 */ {SD, 0xFEE1}, /* 646 */ {SD, 0xFEE5}, /* 647 */ {SD, 0xFEE9}, /* 648 */ {SR, 0xFEED}, /* 649 */ {SR, 0xFEEF}, /* SD */ /* 64A */ {SD, 0xFEF1}, /* 64B */ {SU, 0x0}, /* 64C */ {SU, 0x0}, /* 64D */ {SU, 0x0}, /* 64E */ {SU, 0x0}, /* 64F */ {SU, 0x0}, /* 650 */ {SU, 0x0}, /* 651 */ {SU, 0x0}, /* 652 */ {SU, 0x0}, /* 653 */ {SU, 0x0}, /* 654 */ {SU, 0x0}, /* 655 */ {SU, 0x0}, /* 656 */ {SU, 0x0}, /* 657 */ {SU, 0x0}, /* 658 */ {SU, 0x0}, /* 659 */ {SU, 0x0}, /* 65A */ {SU, 0x0}, /* 65B */ {SU, 0x0}, /* 65C */ {SU, 0x0}, /* 65D */ {SU, 0x0}, /* 65E */ {SU, 0x0}, /* 65F */ {SU, 0x0}, /* 660 */ {SU, 0x0}, /* 661 */ {SU, 0x0}, /* 662 */ {SU, 0x0}, /* 663 */ {SU, 0x0}, /* 664 */ {SU, 0x0}, /* 665 */ {SU, 0x0}, /* 666 */ {SU, 0x0}, /* 667 */ {SU, 0x0}, /* 668 */ {SU, 0x0}, /* 669 */ {SU, 0x0}, /* 66A */ {SU, 0x0}, /* 66B */ {SU, 0x0}, /* 66C */ {SU, 0x0}, /* 66D */ {SU, 0x0}, /* 66E */ {SU, 0x0}, /* 66F */ {SU, 0x0}, /* 670 */ {SU, 0x0}, /* 671 */ {SR, 0xFB50}, /* 672 */ {SU, 0x0}, /* 673 */ {SU, 0x0}, /* 674 */ {SU, 0x0}, /* 675 */ {SU, 0x0}, /* 676 */ {SU, 0x0}, /* 677 */ {SU, 0x0}, /* 678 */ {SU, 0x0}, /* 679 */ {SD, 0xFB66}, /* 67A */ {SD, 0xFB5E}, /* 67B */ {SD, 0xFB52}, /* 67C */ {SU, 0x0}, /* 67D */ {SU, 0x0}, /* 67E */ {SD, 0xFB56}, /* 67F */ {SD, 0xFB62}, /* 680 */ {SD, 0xFB5A}, /* 681 */ {SU, 0x0}, /* 682 */ {SU, 0x0}, /* 683 */ {SD, 0xFB76}, /* 684 */ {SD, 0xFB72}, /* 685 */ {SU, 0x0}, /* 686 */ {SD, 0xFB7A}, /* 687 */ {SD, 0xFB7E}, /* 688 */ {SR, 0xFB88}, /* 689 */ {SU, 0x0}, /* 68A */ {SU, 0x0}, /* 68B */ {SU, 0x0}, /* 68C */ {SR, 0xFB84}, /* 68D */ {SR, 0xFB82}, /* 68E */ {SR, 0xFB86}, /* 68F */ {SU, 0x0}, /* 690 */ {SU, 0x0}, /* 691 */ {SR, 0xFB8C}, /* 692 */ {SU, 0x0}, /* 693 */ {SU, 0x0}, /* 694 */ {SU, 0x0}, /* 695 */ {SU, 0x0}, /* 696 */ {SU, 0x0}, /* 697 */ {SU, 0x0}, /* 698 */ {SR, 0xFB8A}, /* 699 */ {SU, 0x0}, /* 69A */ {SU, 0x0}, /* 69B */ {SU, 0x0}, /* 69C */ {SU, 0x0}, /* 69D */ {SU, 0x0}, /* 69E */ {SU, 0x0}, /* 69F */ {SU, 0x0}, /* 6A0 */ {SU, 0x0}, /* 6A1 */ {SU, 0x0}, /* 6A2 */ {SU, 0x0}, /* 6A3 */ {SU, 0x0}, /* 6A4 */ {SD, 0xFB6A}, /* 6A5 */ {SU, 0x0}, /* 6A6 */ {SD, 0xFB6E}, /* 6A7 */ {SU, 0x0}, /* 6A8 */ {SU, 0x0}, /* 6A9 */ {SD, 0xFB8E}, /* 6AA */ {SU, 0x0}, /* 6AB */ {SU, 0x0}, /* 6AC */ {SU, 0x0}, /* 6AD */ {SD, 0xFBD3}, /* 6AE */ {SU, 0x0}, /* 6AF */ {SD, 0xFB92}, /* 6B0 */ {SU, 0x0}, /* 6B1 */ {SD, 0xFB9A}, /* 6B2 */ {SU, 0x0}, /* 6B3 */ {SD, 0xFB96}, /* 6B4 */ {SU, 0x0}, /* 6B5 */ {SU, 0x0}, /* 6B6 */ {SU, 0x0}, /* 6B7 */ {SU, 0x0}, /* 6B8 */ {SU, 0x0}, /* 6B9 */ {SU, 0x0}, /* 6BA */ {SR, 0xFB9E}, /* 6BB */ {SD, 0xFBA0}, /* 6BC */ {SU, 0x0}, /* 6BD */ {SU, 0x0}, /* 6BE */ {SD, 0xFBAA}, /* 6BF */ {SU, 0x0}, /* 6C0 */ {SR, 0xFBA4}, /* 6C1 */ {SD, 0xFBA6}, /* 6C2 */ {SU, 0x0}, /* 6C3 */ {SU, 0x0}, /* 6C4 */ {SU, 0x0}, /* 6C5 */ {SR, 0xFBE0}, /* 6C6 */ {SR, 0xFBD9}, /* 6C7 */ {SR, 0xFBD7}, /* 6C8 */ {SR, 0xFBDB}, /* 6C9 */ {SR, 0xFBE2}, /* 6CA */ {SU, 0x0}, /* 6CB */ {SR, 0xFBDE}, /* 6CC */ {SD, 0xFBFC}, /* 6CD */ {SU, 0x0}, /* 6CE */ {SU, 0x0}, /* 6CF */ {SU, 0x0}, /* 6D0 */ {SU, 0x0}, /* 6D1 */ {SU, 0x0}, /* 6D2 */ {SR, 0xFBAE}, }; /* * Returns the bidi character type of ch. */ unsigned char bidi_getType(int ch) { static const struct { int first, last, type; } lookup[] = { #include "unicode/bidi_type.h" }; int i, j, k; i = -1; j = lenof(lookup); while (j - i > 1) { k = (i + j) / 2; if (ch < lookup[k].first) j = k; else if (ch > lookup[k].last) i = k; else return lookup[k].type; } /* * If we reach here, the character was not in any of the * intervals listed in the lookup table. This means we return * ON (`Other Neutrals'). This is the appropriate code for any * character genuinely not listed in the Unicode table, and * also the table above has deliberately left out any * characters _explicitly_ listed as ON (to save space!). */ return ON; } /* * Return the mirrored version of a glyph. * * FIXME: there are also glyphs which the text rendering engine is * supposed to display left-right reflected, since no mirrored glyph * exists in Unicode itself to indicate the reflected form. Those are * listed in comments in BidiMirroring.txt. Many of them are * mathematical, e.g. the square root sign, or set difference * operator, or integral sign. No API currently exists here to * communicate the need for that reflected display back to the client. */ static unsigned mirror_glyph(unsigned int ch) { static const struct { unsigned src, dst; } mirror_pairs[] = { #include "unicode/bidi_mirror.h" }; int i, j, k; i = -1; j = lenof(mirror_pairs); while (j - i > 1) { k = (i + j) / 2; if (ch < mirror_pairs[k].src) j = k; else if (ch > mirror_pairs[k].src) i = k; else return mirror_pairs[k].dst; } return ch; } /* * Identify the bracket characters treated specially by bidi rule * BD19, and return their paired character(s). */ typedef enum { BT_NONE, BT_OPEN, BT_CLOSE } BracketType; typedef struct BracketTypeData { unsigned partner, equiv_partner; BracketType type; } BracketTypeData; static BracketTypeData bracket_type(unsigned int ch) { static const struct { unsigned src; BracketTypeData payload; } bracket_pairs[] = { #include "unicode/bidi_brackets.h" }; int i, j, k; i = -1; j = lenof(bracket_pairs); while (j - i > 1) { k = (i + j) / 2; if (ch < bracket_pairs[k].src) { j = k; } else if (ch > bracket_pairs[k].src) { i = k; } else { return bracket_pairs[k].payload; } } static const BracketTypeData null = { 0, 0, BT_NONE }; return null; } /* * Function exported to front ends to allow them to identify * bidi-active characters (in case, for example, the platform's * text display function can't conveniently be prevented from doing * its own bidi and so special treatment is required for characters * that would cause the bidi algorithm to activate). * * This function is passed a single Unicode code point, and returns * nonzero if the presence of this code point can possibly cause * the bidi algorithm to do any reordering. Thus, any string * composed entirely of characters for which is_rtl() returns zero * should be safe to pass to a bidi-active platform display * function without fear. * * (is_rtl() must therefore also return true for any character * which would be affected by Arabic shaping, but this isn't * important because all such characters are right-to-left so it * would have flagged them anyway.) */ bool is_rtl(int c) { return typeIsBidiActive(bidi_getType(c)); } /* The Main shaping function, and the only one to be used * by the outside world. * * line: buffer to apply shaping to. this must be passed by doBidi() first * to: output buffer for the shaped data * count: number of characters in line */ int do_shape(bidi_char *line, bidi_char *to, int count) { int i, tempShape; bool ligFlag = false; for (i=0; i 0) switch (line[i-1].wc) { case 0x622: ligFlag = true; if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEF6; else to[i].wc = 0xFEF5; break; case 0x623: ligFlag = true; if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEF8; else to[i].wc = 0xFEF7; break; case 0x625: ligFlag = true; if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEFA; else to[i].wc = 0xFEF9; break; case 0x627: ligFlag = true; if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEFC; else to[i].wc = 0xFEFB; break; } if (ligFlag) { to[i-1].wc = 0x20; ligFlag = false; break; } } if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) { tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU); if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC)) to[i].wc = SMEDIAL((SISOLATED(line[i].wc))); else to[i].wc = SFINAL((SISOLATED(line[i].wc))); break; } tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU); if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC)) to[i].wc = SINITIAL((SISOLATED(line[i].wc))); else to[i].wc = SISOLATED(line[i].wc); break; } } return 1; } typedef enum { DO_NEUTRAL, DO_LTR, DO_RTL } DirectionalOverride; typedef struct DSStackEntry { /* * An entry in the directional status stack (rule section X). */ unsigned char level; bool isolate; DirectionalOverride override; } DSStackEntry; typedef struct BracketStackEntry { /* * An entry in the bracket-pair-tracking stack (rule BD16). */ unsigned ch; size_t c; } BracketStackEntry; typedef struct IsolatingRunSequence { size_t start, end; BidiType sos, eos, embeddingDirection; } IsolatingRunSequence; #define MAX_DEPTH 125 /* specified in the standard */ struct BidiContext { /* * Storage space preserved between runs, all allocated to the same * length (internal_array_sizes). */ size_t internal_array_sizes; BidiType *types, *origTypes; unsigned char *levels; size_t *irsindices, *bracketpos; bool *irsdone; /* * Separately allocated with its own size field */ IsolatingRunSequence *irslist; size_t irslistsize; /* * Rewritten to point to the input to the currently active run of * the bidi algorithm */ bidi_char *text; size_t textlen; /* * State within a run of the algorithm */ BidiType paragraphOverride; DSStackEntry dsstack[MAX_DEPTH + 2]; size_t ds_sp; size_t overflowIsolateCount, overflowEmbeddingCount, validIsolateCount; unsigned char paragraphLevel; size_t *irs; size_t irslen; BidiType sos, eos, embeddingDirection; BracketStackEntry bstack[63]; /* constant size specified in rule BD16 */ }; BidiContext *bidi_new_context(void) { BidiContext *ctx = snew(BidiContext); memset(ctx, 0, sizeof(BidiContext)); return ctx; } void bidi_free_context(BidiContext *ctx) { sfree(ctx->types); sfree(ctx->origTypes); sfree(ctx->levels); sfree(ctx->irsindices); sfree(ctx->irsdone); sfree(ctx->bracketpos); sfree(ctx->irslist); sfree(ctx); } static void ensure_arrays(BidiContext *ctx, size_t textlen) { if (textlen <= ctx->internal_array_sizes) return; ctx->internal_array_sizes = textlen; ctx->types = sresize(ctx->types, ctx->internal_array_sizes, BidiType); ctx->origTypes = sresize(ctx->origTypes, ctx->internal_array_sizes, BidiType); ctx->levels = sresize(ctx->levels, ctx->internal_array_sizes, unsigned char); ctx->irsindices = sresize(ctx->irsindices, ctx->internal_array_sizes, size_t); ctx->irsdone = sresize(ctx->irsdone, ctx->internal_array_sizes, bool); ctx->bracketpos = sresize(ctx->bracketpos, ctx->internal_array_sizes, size_t); } static void setup_types(BidiContext *ctx) { for (size_t i = 0; i < ctx->textlen; i++) ctx->types[i] = ctx->origTypes[i] = bidi_getType(ctx->text[i].wc); } static bool text_needs_bidi(BidiContext *ctx) { /* * Initial optimisation: check for any bidi-active character at * all in an input line. If there aren't any, we can skip the * whole algorithm. * * Also include the paragraph override in this check! */ for (size_t i = 0; i < ctx->textlen; i++) if (typeIsBidiActive(ctx->types[i])) return true; return typeIsBidiActive(ctx->paragraphOverride); } static size_t find_matching_pdi(const BidiType *types, size_t i, size_t size) { /* Assuming that types[i] is an isolate initiator, find its * matching PDI by rule BD9. */ unsigned counter = 1; i++; for (; i < size; i++) { BidiType t = types[i]; if (typeIsIsolateInitiator(t)) { counter++; } else if (t == PDI) { counter--; if (counter == 0) return i; } } /* If no PDI was found, return the length of the array. */ return size; } static unsigned char rule_p2_p3(const BidiType *types, size_t size) { /* * Rule P2. Find the first strong type (L, R or AL), ignoring * anything inside an isolated segment. * * Rule P3. If that type is R or AL, choose a paragraph embeddding * level of 1, otherwise 0. */ for (size_t i = 0; i < size; i++) { BidiType t = types[i]; if (typeIsIsolateInitiator(t)) i = find_matching_pdi(types, i, size); else if (typeIsStrong(t)) return (t == L ? 0 : 1); } return 0; /* default if no strong type found */ } static void set_paragraph_level(BidiContext *ctx) { if (ctx->paragraphOverride == L) ctx->paragraphLevel = 0; else if (ctx->paragraphOverride == R) ctx->paragraphLevel = 1; else ctx->paragraphLevel = rule_p2_p3(ctx->types, ctx->textlen); } static inline unsigned char nextOddLevel(unsigned char x) { return (x+1)|1; } static inline unsigned char nextEvenLevel(unsigned char x) { return (x|1)+1; } static inline void push(BidiContext *ctx, unsigned char level, DirectionalOverride override, bool isolate) { ctx->ds_sp++; assert(ctx->ds_sp < lenof(ctx->dsstack)); ctx->dsstack[ctx->ds_sp].level = level; ctx->dsstack[ctx->ds_sp].override = override; ctx->dsstack[ctx->ds_sp].isolate = isolate; } static inline void pop(BidiContext *ctx) { assert(ctx->ds_sp > 0); ctx->ds_sp--; } static void process_explicit_embeddings(BidiContext *ctx) { /* * Rule X1 initialisation. */ ctx->ds_sp = (size_t)-1; push(ctx, ctx->paragraphLevel, DO_NEUTRAL, false); ctx->overflowIsolateCount = 0; ctx->overflowEmbeddingCount = 0; ctx->validIsolateCount = 0; #define stk (&ctx->dsstack[ctx->ds_sp]) for (size_t i = 0; i < ctx->textlen; i++) { BidiType t = ctx->types[i]; switch (t) { case RLE: case LRE: case RLO: case LRO: { /* Rules X2-X5 */ unsigned char newLevel; DirectionalOverride override; #ifndef REMOVE_FORMATTING_CHARS ctx->levels[i] = stk->level; #endif switch (t) { case RLE: /* rule X2 */ newLevel = nextOddLevel(stk->level); override = DO_NEUTRAL; break; case LRE: /* rule X3 */ newLevel = nextEvenLevel(stk->level); override = DO_NEUTRAL; break; case RLO: /* rule X4 */ newLevel = nextOddLevel(stk->level); override = DO_RTL; break; case LRO: /* rule X5 */ newLevel = nextEvenLevel(stk->level); override = DO_LTR; break; default: unreachable("how did this get past the outer switch?"); } if (newLevel <= MAX_DEPTH && ctx->overflowIsolateCount == 0 && ctx->overflowEmbeddingCount == 0) { /* Embedding code is valid. Push a stack entry. */ push(ctx, newLevel, override, false); } else { /* Embedding code is an overflow one. */ if (ctx->overflowIsolateCount == 0) ctx->overflowEmbeddingCount++; } break; } case RLI: case LRI: case FSI: { /* Rules X5a, X5b, X5c */ if (t == FSI) { /* Rule X5c: decide whether this should be treated * like RLI or LRI */ size_t pdi = find_matching_pdi(ctx->types, i, ctx->textlen); unsigned char level = rule_p2_p3(ctx->types + (i + 1), pdi - (i + 1)); t = (level == 1 ? RLI : LRI); } ctx->levels[i] = stk->level; if (stk->override != DO_NEUTRAL) ctx->types[i] = (stk->override == DO_LTR ? L : stk->override == DO_RTL ? R : t); unsigned char newLevel = (t == RLI ? nextOddLevel(stk->level) : nextEvenLevel(stk->level)); if (newLevel <= MAX_DEPTH && ctx->overflowIsolateCount == 0 && ctx->overflowEmbeddingCount == 0) { /* Isolate code is valid. Push a stack entry. */ push(ctx, newLevel, DO_NEUTRAL, true); ctx->validIsolateCount++; } else { /* Isolate code is an overflow one. */ ctx->overflowIsolateCount++; } break; } case PDI: { /* Rule X6a */ if (ctx->overflowIsolateCount > 0) { ctx->overflowIsolateCount--; } else if (ctx->validIsolateCount == 0) { /* Do nothing: spurious isolate-pop */ } else { /* Valid isolate-pop. We expect that the stack must * therefore contain at least one isolate==true entry, * so pop everything up to and including it. */ ctx->overflowEmbeddingCount = 0; while (!stk->isolate) pop(ctx); pop(ctx); ctx->validIsolateCount--; } ctx->levels[i] = stk->level; if (stk->override != DO_NEUTRAL) ctx->types[i] = (stk->override == DO_LTR ? L : R); break; } case PDF: { /* Rule X7 */ if (ctx->overflowIsolateCount > 0) { /* Do nothing if we've overflowed on isolates */ } else if (ctx->overflowEmbeddingCount > 0) { ctx->overflowEmbeddingCount--; } else if (ctx->ds_sp > 0 && !stk->isolate) { pop(ctx); } else { /* Do nothing: spurious embedding-pop */ } #ifndef REMOVE_FORMATTING_CHARS ctx->levels[i] = stk->level; #endif break; } case B: { /* Rule X8: if an explicit paragraph separator appears in * this text at all then it does not participate in any of * the above, and just gets assigned the paragraph level. * * PS, it had better be right at the end of the text, * because we have not implemented rule P1 in this code. */ assert(i == ctx->textlen - 1); ctx->levels[i] = ctx->paragraphLevel; break; } case BN: { /* * The section 5.2 adjustment to rule X6 says that we * apply it to BN just like any other class. But I think * this can't possibly give the same results as the * unmodified algorithm. * * Proof: adding RLO BN or LRO BN at the end of a * paragraph should not change the output of the standard * algorithm, because the override doesn't affect the BN * in rule X6, and then rule X9 removes both. But with the * modified rule X6, the BN is changed into R or L, and * then rule X9 doesn't remove it, and then you've added a * strong type that will set eos for the level run just * before the override. And whatever the standard * algorithm set eos to, _one_ of these override sequences * will disagree with it. * * So I think we just set the BN's level, and don't change * its type. */ ctx->levels[i] = stk->level; break; } default: { /* Rule X6. */ ctx->levels[i] = stk->level; if (stk->override != DO_NEUTRAL) ctx->types[i] = (stk->override == DO_LTR ? L : R); break; } } } #undef stk } static void remove_embedding_characters(BidiContext *ctx) { #ifndef REMOVE_FORMATTING_CHARS /* * Rule X9, as modified by section 5.2: turn embedding (but not * isolate) characters into BN. */ for (size_t i = 0; i < ctx->textlen; i++) { BidiType t = ctx->types[i]; if (typeIsRemovedDuringProcessing(t)) { ctx->types[i] = BN; /* * My own adjustment to the section 5.2 mods: a sequence * of contiguous BN generated by this setup should never * be at different levels from each other. * * An example where this goes wrong is if you open two * LREs in sequence, then close them again: * * ... LRE LRE PDF PDF ... * * The initial level assignment gives level 0 to the outer * LRE/PDF pair, and level 2 to the inner one. The * standard algorithm would remove all four, so this * doesn't matter, and you end up with no break in the * surrounding level run. But if you just rewrite the * types of all those characters to BN and leave the * levels in that state, then the modified algorithm will * leave the middle two BN at level 2, dividing what * should have been a long level run at level 0 into two * separate ones. */ if (i > 0 && ctx->types[i-1] == BN) ctx->levels[i] = ctx->levels[i-1]; } } #else /* * Rule X9, original version: completely remove embedding * start/end characters and also boundary neutrals. */ size_t outpos = 0; for (size_t i = 0; i < ctx->textlen; i++) { BidiType t = ctx->types[i]; if (!typeIsRemovedDuringProcessing(t)) { ctx->text[outpos] = ctx->text[i]; ctx->levels[outpos] = ctx->levels[i]; ctx->types[outpos] = ctx->types[i]; ctx->origTypes[outpos] = ctx->origTypes[i]; outpos++; } } ctx->textlen = outpos; #endif } typedef void (*irs_fn_t)(BidiContext *ctx); static void find_isolating_run_sequences(BidiContext *ctx, irs_fn_t process) { /* * Rule X10 / BD13. Now that we've assigned an embedding level to * each character in the text, we have to divide the text into * subsequences on which to do the next stage of processing. * * In earlier issues of the bidi algorithm, these subsequences * were contiguous in the original text, and each one was a 'level * run': a maximal contiguous subsequence of characters all at the * same embedding level. * * But now we have isolates, and the point of an (isolate * initiator ... PDI) sequence is that the whole sequence should * be treated like a single BN for the purposes of formatting * everything outside it. As a result, we now have to recombine * our level runs into longer sequences, on the principle that if * a level run ends with an isolate initiator, then we bring it * together with whatever later level run starts with the matching * PDI. * * These subsequences are no longer contiguous (the whole point is * that between the isolate initiator and the PDI is some other * text that we've skipped over). They're called 'isolating run * sequences'. */ memset(ctx->irsdone, 0, ctx->textlen); size_t i = 0; size_t n_irs = 0; size_t indexpos = 0; while (i < ctx->textlen) { if (ctx->irsdone[i]) { i++; continue; } /* * Found a character not already processed. Start a new * sequence here. */ sgrowarray(ctx->irslist, ctx->irslistsize, n_irs); IsolatingRunSequence *irs = &ctx->irslist[n_irs++]; irs->start = indexpos; size_t j = i; size_t irslevel = ctx->levels[i]; while (j < ctx->textlen) { /* * We expect that all level runs in this sequence will be * at the same level as each other, by construction of how * we set up the levels from the isolates in the first * place. */ assert(ctx->levels[j] == irslevel); do { ctx->irsdone[j] = true; ctx->irsindices[indexpos++] = j++; } while (j < ctx->textlen && ctx->levels[j] == irslevel); if (!typeIsIsolateInitiator(ctx->types[j-1])) break; /* this IRS is ended */ j = find_matching_pdi(ctx->types, j-1, ctx->textlen); } irs->end = indexpos; /* * Determine the start-of-sequence and end-of-sequence types * for this sequence. * * These depend on the embedding levels of surrounding text. * But processing each run can change those levels. That's why * we have to use a two-pass strategy here, first identifying * all the isolating run sequences using the input level data, * and not processing any of them until we know where they all * are. */ size_t p; unsigned char level_inside, level_outside, level_max; p = i; level_inside = ctx->levels[p]; level_outside = ctx->paragraphLevel; while (p > 0) { p--; if (ctx->types[p] != BN) { level_outside = ctx->levels[p]; break; } } level_max = max(level_inside, level_outside); irs->sos = (level_max % 2 ? R : L); p = ctx->irsindices[irs->end - 1]; level_inside = ctx->levels[p]; level_outside = ctx->paragraphLevel; if (typeIsIsolateInitiator(ctx->types[p])) { /* Special case: if an isolating run sequence ends in an * unmatched isolate initiator, then level_outside is * taken to be the paragraph embedding level and the * loop below is skipped. */ } else { while (p+1 < ctx->textlen) { p++; if (ctx->types[p] != BN) { level_outside = ctx->levels[p]; break; } } } level_max = max(level_inside, level_outside); irs->eos = (level_max % 2 ? R : L); irs->embeddingDirection = (irslevel % 2 ? R : L); /* * Now we've listed in ctx->irsindices[] the index of every * character that's part of this isolating run sequence, and * recorded an entry in irslist containing the interval of * indices relevant to this IRS, plus its assorted metadata. * We've also marked those locations in the input text as done * in ctx->irsdone, so that we'll skip over them when the * outer iteration reaches them later. */ } for (size_t k = 0; k < n_irs; k++) { IsolatingRunSequence *irs = &ctx->irslist[k]; ctx->irs = ctx->irsindices + irs->start; ctx->irslen = irs->end - irs->start; ctx->sos = irs->sos; ctx->eos = irs->eos; ctx->embeddingDirection = irs->embeddingDirection; process(ctx); } /* Reset irslen to 0 when we've finished. This means any other * functions that absentmindedly try to use irslen at all will end * up doing nothing at all, which should be easier to detect and * debug than if they run on subtly the wrong subset of the * text. */ ctx->irslen = 0; } static void remove_nsm(BidiContext *ctx) { /* Rule W1: NSM gains the type of the previous character, or sos * at the start of the run, with the exception that isolation * boundaries turn into ON. */ BidiType prevType = ctx->sos; for (size_t c = 0; c < ctx->irslen; c++) { size_t i = ctx->irs[c]; BidiType t = ctx->types[i]; if (t == NSM) { ctx->types[i] = prevType; } else if (typeIsIsolateInitiatorOrPDI(t)) { prevType = ON; #ifndef REMOVE_FORMATTING_CHARS } else if (t == BN) { /* section 5.2 adjustment: these don't affect prevType */ #endif } else { prevType = t; } } } static void change_en_to_an(BidiContext *ctx) { /* Rule W2: EN becomes AN if the previous strong type is AL. (The * spec says that the 'previous strong type' is counted as sos at * the start of the run, although it hardly matters, since sos * can't be AL.) */ BidiType prevStrongType = ctx->sos; for (size_t c = 0; c < ctx->irslen; c++) { size_t i = ctx->irs[c]; BidiType t = ctx->types[i]; if (t == EN && prevStrongType == AL) { ctx->types[i] = AN; } else if (typeIsStrong(t)) { prevStrongType = t; } } } static void change_al_to_r(BidiContext *ctx) { /* Rule W3: AL becomes R unconditionally. (The only difference * between the two types was their effect on nearby numbers, which * was dealt with in rule W2, so now we're done with the * distinction.) */ for (size_t c = 0; c < ctx->irslen; c++) { size_t i = ctx->irs[c]; if (ctx->types[i] == AL) ctx->types[i] = R; } } static void eliminate_separators_between_numbers(BidiContext *ctx) { /* Rule W4: a single numeric separator between two numbers of the * same type compatible with that separator takes the type of the * number. ES is a separator type compatible only with EN; CS is a * separator type compatible with either EN or AN. * * Section 5.2 adjustment: intervening BNs do not break this, so * instead of simply looking at types[irs[c-1]] and types[irs[c+1]], * we must track the last three indices we saw that were not BN. */ size_t i1 = 0, i2 = 0; BidiType t0 = ON, t1 = ON, t2 = ON; for (size_t c = 0; c < ctx->irslen; c++) { size_t i = ctx->irs[c]; BidiType t = ctx->types[i]; #ifndef REMOVE_FORMATTING_CHARS if (t == BN) continue; #endif i1 = i2; i2 = i; t0 = t1; t1 = t2; t2 = t; if (t0 == t2 && ((t1 == ES && t0 == EN) || (t1 == CS && (t0 == EN || t0 == AN)))) { ctx->types[i1] = t0; } } } static void eliminate_et_next_to_en(BidiContext *ctx) { /* Rule W5: a sequence of ET adjacent to an EN take the type EN. * This is easiest to implement with one loop in each direction. * * Section 5.2 adjustment: include BN with ET. (We don't need to * #ifdef that out, because in the standard algorithm, we won't * have any BN left in any case.) */ bool modifying = false; for (size_t c = 0; c < ctx->irslen; c++) { size_t i = ctx->irs[c]; BidiType t = ctx->types[i]; if (t == EN) { modifying = true; } else if (modifying && typeIsETOrBN(t)) { ctx->types[i] = EN; } else { modifying = false; } } for (size_t c = ctx->irslen; c-- > 0 ;) { size_t i = ctx->irs[c]; BidiType t = ctx->types[i]; if (t == EN) { modifying = true; } else if (modifying && typeIsETOrBN(t)) { ctx->types[i] = EN; } else { modifying = false; } } } static void eliminate_separators_and_terminators(BidiContext *ctx) { /* Rule W6: all separators and terminators change to ON. * * (The spec is not quite clear on which bidi types are included * in this; one assumes ES, ET and CS, but what about S? I _think_ * the answer is that this is a rule in the W section, so it's * implicitly supposed to only apply to types designated as weakly * directional, so not S.) */ #ifndef REMOVE_FORMATTING_CHARS /* * Section 5.2 adjustment: this also applies to any BN adjacent on * either side to one of these types, which is easiest to * implement with a separate double-loop converting those to an * arbitrary one of the affected types, say CS. * * This double loop can be completely skipped in the standard * algorithm. */ bool modifying = false; for (size_t c = 0; c < ctx->irslen; c++) { size_t i = ctx->irs[c]; BidiType t = ctx->types[i]; if (typeIsWeakSeparatorOrTerminator(t)) { modifying = true; } else if (modifying && t == BN) { ctx->types[i] = CS; } else { modifying = false; } } for (size_t c = ctx->irslen; c-- > 0 ;) { size_t i = ctx->irs[c]; BidiType t = ctx->types[i]; if (typeIsWeakSeparatorOrTerminator(t)) { modifying = true; } else if (modifying && t == BN) { ctx->types[i] = CS; } else { modifying = false; } } #endif /* Now the main part of rule W6 */ for (size_t c = 0; c < ctx->irslen; c++) { size_t i = ctx->irs[c]; BidiType t = ctx->types[i]; if (typeIsWeakSeparatorOrTerminator(t)) ctx->types[i] = ON; } } static void change_en_to_l(BidiContext *ctx) { /* Rule W7: EN becomes L if the previous strong type (or sos) is L. */ BidiType prevStrongType = ctx->sos; for (size_t c = 0; c < ctx->irslen; c++) { size_t i = ctx->irs[c]; BidiType t = ctx->types[i]; if (t == EN && prevStrongType == L) { ctx->types[i] = L; } else if (typeIsStrong(t)) { prevStrongType = t; } } } typedef void (*bracket_pair_fn)(BidiContext *ctx, size_t copen, size_t cclose); static void find_bracket_pairs(BidiContext *ctx, bracket_pair_fn process) { const size_t NO_BRACKET = ~(size_t)0; /* * Rule BD16. */ size_t sp = 0; for (size_t c = 0; c < ctx->irslen; c++) ctx->bracketpos[c] = NO_BRACKET; for (size_t c = 0; c < ctx->irslen; c++) { size_t i = ctx->irs[c]; unsigned wc = ctx->text[i].wc; BracketTypeData bt = bracket_type(wc); if (bt.type == BT_OPEN) { if (sp >= lenof(ctx->bstack)) { /* * Stack overflow. The spec says we simply give up at * this point. */ goto found_all_pairs; } ctx->bstack[sp].ch = wc; ctx->bstack[sp].c = c; sp++; } else if (bt.type == BT_CLOSE) { size_t new_sp = sp; /* * Search up the stack for an entry containing a matching * open bracket. If we find it, pop that entry and * everything deeper, and record a matching pair. If we * reach the bottom of the stack without finding anything, * leave sp where it started. */ while (new_sp-- > 0) { if (ctx->bstack[new_sp].ch == bt.partner || ctx->bstack[new_sp].ch == bt.equiv_partner) { /* Found a stack element matching this one */ size_t cstart = ctx->bstack[new_sp].c; ctx->bracketpos[cstart] = c; sp = new_sp; break; } } } } found_all_pairs: for (size_t c = 0; c < ctx->irslen; c++) { if (ctx->bracketpos[c] != NO_BRACKET) { process(ctx, c, ctx->bracketpos[c]); } } } static BidiType get_bracket_type(BidiContext *ctx, size_t copen, size_t cclose) { /* * Rule N0: a pair of matched brackets containing at least one * strong type takes on the current embedding direction, unless * all of these are true at once: * * (a) there are no strong types inside the brackets matching the * current embedding direction * (b) there _is_ at least one strong type inside the brackets * that is _opposite_ to the current embedding direction * (c) the strong type preceding the open bracket is also * opposite to the current embedding direction * * in which case they take on the opposite direction. * * For these purposes, number types (EN and AN) count as R. */ bool foundOppositeTypeInside = false; for (size_t c = copen + 1; c < cclose; c++) { size_t i = ctx->irs[c]; BidiType t = ctx->types[i]; if (typeIsStrongOrNumber(t)) { t = t == L ? L : R; /* numbers count as R */ if (t == ctx->embeddingDirection) { /* Found something inside the brackets matching the * current level, so (a) is violated. */ return ctx->embeddingDirection; } else { foundOppositeTypeInside = true; } } } if (!foundOppositeTypeInside) { /* No strong types at all inside the brackets, so return ON to * indicate that we're not messing with their type at all. */ return ON; } /* There was an opposite strong type in the brackets. Look * backwards to the preceding strong type, and go with that, * whichever it is. */ for (size_t c = copen; c-- > 0 ;) { size_t i = ctx->irs[c]; BidiType t = ctx->types[i]; if (typeIsStrongOrNumber(t)) { t = t == L ? L : R; /* numbers count as R */ return t; } } /* Fallback: if the preceding strong type was not found, go with * sos. */ return ctx->sos; } static void reset_bracket_type(BidiContext *ctx, size_t c, BidiType t) { /* Final bullet point of rule N0: when we change the type of a * bracket, the same change applies to any contiguous sequence of * characters after it whose _original_ bidi type was NSM. */ do { ctx->types[ctx->irs[c++]] = t; #ifndef REMOVE_FORMATTING_CHARS while (c < ctx->irslen && ctx->origTypes[ctx->irs[c]] == BN) { /* Section 5.2 adjustment: skip past BN in the process. */ c++; } #endif } while (c < ctx->irslen && ctx->origTypes[ctx->irs[c]] == NSM); } static void resolve_brackets(BidiContext *ctx, size_t copen, size_t cclose) { if (typeIsNeutral(ctx->types[ctx->irs[copen]]) && typeIsNeutral(ctx->types[ctx->irs[cclose]])) { BidiType t = get_bracket_type(ctx, copen, cclose); if (t != ON) { reset_bracket_type(ctx, copen, t); reset_bracket_type(ctx, cclose, t); } } } static void remove_ni(BidiContext *ctx) { /* * Rules N1 and N2 together: neutral or isolate characters take * the direction of the surrounding strong text if the nearest * strong characters on each side match, and otherwise, they take * the embedding direction. */ const size_t NO_INDEX = ~(size_t)0; BidiType prevStrongType = ctx->sos; size_t c_ni_start = NO_INDEX; for (size_t c = 0; c <= ctx->irslen; c++) { BidiType t; if (c < ctx->irslen) { size_t i = ctx->irs[c]; t = ctx->types[i]; } else { /* One extra loop iteration, using eos to resolve the * final sequence of NI if any */ t = ctx->eos; } if (typeIsStrongOrNumber(t)) { t = t == L ? L : R; /* numbers count as R */ if (c_ni_start != NO_INDEX) { /* There are some NI we have to fix up */ BidiType ni_type = (t == prevStrongType ? t : ctx->embeddingDirection); for (size_t c2 = c_ni_start; c2 < c; c2++) { size_t i2 = ctx->irs[c2]; BidiType t2 = ctx->types[i2]; if (typeIsNeutralOrIsolate(t2)) ctx->types[i2] = ni_type; } } prevStrongType = t; c_ni_start = NO_INDEX; } else if (typeIsNeutralOrIsolate(t) && c_ni_start == NO_INDEX) { c_ni_start = c; } } } static void resolve_implicit_levels(BidiContext *ctx) { /* Rules I1 and I2 */ for (size_t c = 0; c < ctx->irslen; c++) { size_t i = ctx->irs[c]; unsigned char level = ctx->levels[i]; BidiType t = ctx->types[i]; if (level % 2 == 0) { /* Rule I1 */ if (t == R) ctx->levels[i] += 1; else if (t == AN || t == EN) ctx->levels[i] += 2; } else { /* Rule I2 */ if (t == L || t == AN || t == EN) ctx->levels[i] += 1; } } } static void process_isolating_run_sequence(BidiContext *ctx) { /* Section W: resolve weak types */ remove_nsm(ctx); change_en_to_an(ctx); change_al_to_r(ctx); eliminate_separators_between_numbers(ctx); eliminate_et_next_to_en(ctx); eliminate_separators_and_terminators(ctx); change_en_to_l(ctx); /* Section N: resolve neutral types (and isolates) */ find_bracket_pairs(ctx, resolve_brackets); remove_ni(ctx); /* Section I: resolve implicit levels */ resolve_implicit_levels(ctx); } static void reset_whitespace_and_separators(BidiContext *ctx) { /* * Rule L1: segment and paragraph separators, plus whitespace * preceding them, all reset to the paragraph embedding level. * This also applies to whitespace at the very end. * * This is done using the original types, not the versions that * the rest of this algorithm has been merrily mutating. */ bool modifying = true; for (size_t i = ctx->textlen; i-- > 0 ;) { BidiType t = ctx->origTypes[i]; if (typeIsSegmentOrParaSeparator(t)) { ctx->levels[i] = ctx->paragraphLevel; modifying = true; } else if (modifying) { if (typeIsWhitespaceOrIsolate(t)) { ctx->levels[i] = ctx->paragraphLevel; } else if (!typeIsRemovedDuringProcessing(t)) { modifying = false; } } } #ifndef REMOVE_FORMATTING_CHARS /* * Section 5.2 adjustment: types removed by rule X9 take the level * of the character to their left. */ for (size_t i = 0; i < ctx->textlen; i++) { BidiType t = ctx->origTypes[i]; if (typeIsRemovedDuringProcessing(t)) { /* Section 5.2 adjustment */ ctx->levels[i] = (i > 0 ? ctx->levels[i-1] : ctx->paragraphLevel); } } #endif /* ! REMOVE_FORMATTING_CHARS */ } static void reverse(BidiContext *ctx, size_t start, size_t end) { for (size_t i = start, j = end; i < j; i++, j--) { bidi_char tmp = ctx->text[i]; ctx->text[i] = ctx->text[j]; ctx->text[j] = tmp; } } static void mirror_glyphs(BidiContext *ctx) { /* * Rule L3: any character with a mirror-image pair at an odd * embedding level is replaced by its mirror image. * * This is specified in the standard as happening _after_ rule L2 * (the actual reordering of the text). But it's much easier to * implement it before, while our levels[] array still matches up * to the text order. */ for (size_t i = 0; i < ctx->textlen; i++) { if (ctx->levels[i] % 2) ctx->text[i].wc = mirror_glyph(ctx->text[i].wc); } } static void reverse_sequences(BidiContext *ctx) { /* * Rule L2: every maximal contiguous sequence of characters at a * given level or higher is reversed. */ unsigned level = 0; for (size_t i = 0; i < ctx->textlen; i++) level = max(level, ctx->levels[i]); for (; level >= 1; level--) { for (size_t i = 0; i < ctx->textlen; i++) { if (ctx->levels[i] >= level) { size_t start = i; while (i+1 < ctx->textlen && ctx->levels[i+1] >= level) i++; reverse(ctx, start, i); } } } } /* * The Main Bidi Function. The two wrappers below it present different * external APIs for different purposes, but everything comes through * here. * * text: a buffer of size textlen containing text to apply the * Bidirectional algorithm to. */ static void do_bidi_new(BidiContext *ctx, bidi_char *text, size_t textlen) { ensure_arrays(ctx, textlen); ctx->text = text; ctx->textlen = textlen; setup_types(ctx); /* Quick initial test: see if we need to bother with any work at all */ if (!text_needs_bidi(ctx)) return; set_paragraph_level(ctx); process_explicit_embeddings(ctx); remove_embedding_characters(ctx); find_isolating_run_sequences(ctx, process_isolating_run_sequence); /* If this implementation distinguished paragraphs from lines, * then this would be the point where we repeat the remainder of * the algorithm once for each line in the paragraph. */ reset_whitespace_and_separators(ctx); mirror_glyphs(ctx); reverse_sequences(ctx); } size_t do_bidi_test(BidiContext *ctx, bidi_char *text, size_t textlen, int override) { ctx->paragraphOverride = (override > 0 ? L : override < 0 ? R : ON); do_bidi_new(ctx, text, textlen); return ctx->textlen; } void do_bidi(BidiContext *ctx, bidi_char *text, size_t textlen) { #ifdef REMOVE_FORMATTING_CHARACTERS abort(); /* can't use the standard algorithm in a live terminal */ #else ctx->paragraphOverride = ON; do_bidi_new(ctx, text, textlen); #endif }