// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * * Copyright (C) 1999-2015, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: ubidiln.c * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 1999aug06 * created by: Markus W. Scherer, updated by Matitiahu Allouche */ #include "cmemory.h" #include "unicode/utypes.h" #include "unicode/ustring.h" #include "unicode/uchar.h" #include "unicode/ubidi.h" #include "ubidiimp.h" #include "uassert.h" /* * General remarks about the functions in this file: * * These functions deal with the aspects of potentially mixed-directional * text in a single paragraph or in a line of a single paragraph * which has already been processed according to * the Unicode 6.3 BiDi algorithm as defined in * https://www.unicode.org/reports/tr9/ , version 28, * also described in The Unicode Standard, Version 6.3.0 . * * This means that there is a UBiDi object with a levels * and a dirProps array. * paraLevel and direction are also set. * Only if the length of the text is zero, then levels==dirProps==nullptr. * * The overall directionality of the paragraph * or line is used to bypass the reordering steps if possible. * Even purely RTL text does not need reordering there because * the ubidi_getLogical/VisualIndex() functions can compute the * index on the fly in such a case. * * The implementation of the access to same-level-runs and of the reordering * do attempt to provide better performance and less memory usage compared to * a direct implementation of especially rule (L2) with an array of * one (32-bit) integer per text character. * * Here, the levels array is scanned as soon as necessary, and a vector of * same-level-runs is created. Reordering then is done on this vector. * For each run of text positions that were resolved to the same level, * only 8 bytes are stored: the first text position of the run and the visual * position behind the run after reordering. * One sign bit is used to hold the directionality of the run. * This is inefficient if there are many very short runs. If the average run * length is <2, then this uses more memory. * * In a further attempt to save memory, the levels array is never changed * after all the resolution rules (Xn, Wn, Nn, In). * Many functions have to consider the field trailingWSStart: * if it is less than length, then there is an implicit trailing run * at the paraLevel, * which is not reflected in the levels array. * This allows a line UBiDi object to use the same levels array as * its paragraph parent object. * * When a UBiDi object is created for a line of a paragraph, then the * paragraph's levels and dirProps arrays are reused by way of setting * a pointer into them, not by copying. This again saves memory and forbids to * change the now shared levels for (L1). */ /* handle trailing WS (L1) -------------------------------------------------- */ /* * setTrailingWSStart() sets the start index for a trailing * run of WS in the line. This is necessary because we do not modify * the paragraph's levels array that we just point into. * Using trailingWSStart is another form of performing (L1). * * To make subsequent operations easier, we also include the run * before the WS if it is at the paraLevel - we merge the two here. * * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is * set correctly for the line even when contextual multiple paragraphs. */ static void setTrailingWSStart(UBiDi *pBiDi) { … } /* ubidi_setLine ------------------------------------------------------------ */ U_CAPI void U_EXPORT2 ubidi_setLine(const UBiDi *pParaBiDi, int32_t start, int32_t limit, UBiDi *pLineBiDi, UErrorCode *pErrorCode) { … } U_CAPI UBiDiLevel U_EXPORT2 ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) { … } U_CAPI const UBiDiLevel * U_EXPORT2 ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { … } U_CAPI void U_EXPORT2 ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, int32_t *pLogicalLimit, UBiDiLevel *pLevel) { … } /* runs API functions ------------------------------------------------------- */ U_CAPI int32_t U_EXPORT2 ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { … } U_CAPI UBiDiDirection U_EXPORT2 ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, int32_t *pLogicalStart, int32_t *pLength) { … } /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */ static void getSingleRun(UBiDi *pBiDi, UBiDiLevel level) { … } /* reorder the runs array (L2) ---------------------------------------------- */ /* * Reorder the same-level runs in the runs array. * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. * All the visualStart fields=logical start before reordering. * The "odd" bits are not set yet. * * Reordering with this data structure lends itself to some handy shortcuts: * * Since each run is moved but not modified, and since at the initial maxLevel * each sequence of same-level runs consists of only one run each, we * don't need to do anything there and can predecrement maxLevel. * In many simple cases, the reordering is thus done entirely in the * index mapping. * Also, reordering occurs only down to the lowest odd level that occurs, * which is minLevel|1. However, if the lowest level itself is odd, then * in the last reordering the sequence of the runs at this level or higher * will be all runs, and we don't need the elaborate loop to search for them. * This is covered by ++minLevel instead of minLevel|=1 followed * by an extra reorder-all after the reorder-some loop. * About a trailing WS run: * Such a run would need special treatment because its level is not * reflected in levels[] if this is not a paragraph object. * Instead, all characters from trailingWSStart on are implicitly at * paraLevel. * However, for all maxLevel>paraLevel, this run will never be reordered * and does not need to be taken into account. maxLevel==paraLevel is only reordered * if minLevel==paraLevel is odd, which is done in the extra segment. * This means that for the main reordering loop we don't need to consider * this run and can --runCount. If it is later part of the all-runs * reordering, then runCount is adjusted accordingly. */ static void reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { … } /* compute the runs array --------------------------------------------------- */ static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex) { … } /* * Compute the runs array from the levels array. * After ubidi_getRuns() returns true, runCount is guaranteed to be >0 * and the runs are reordered. * Odd-level runs have visualStart on their visual right edge and * they progress visually to the left. * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the * sum of appropriate LRM/RLM_BEFORE/AFTER flags. * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the * negative number of BiDi control characters within this run. */ U_CFUNC UBool ubidi_getRuns(UBiDi *pBiDi, UErrorCode*) { … } static UBool prepareReorder(const UBiDiLevel *levels, int32_t length, int32_t *indexMap, UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) { … } /* reorder a line based on a levels array (L2) ------------------------------ */ U_CAPI void U_EXPORT2 ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { … } U_CAPI void U_EXPORT2 ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { … } /* API functions for logical<->visual mapping ------------------------------- */ U_CAPI int32_t U_EXPORT2 ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { … } U_CAPI int32_t U_EXPORT2 ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { … } U_CAPI void U_EXPORT2 ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { … } U_CAPI void U_EXPORT2 ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { … } U_CAPI void U_EXPORT2 ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) { … }