// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** * Copyright (c) 2002-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ // // rbbitblb.cpp // #include "unicode/utypes.h" #if !UCONFIG_NO_BREAK_ITERATION #include "unicode/unistr.h" #include "rbbitblb.h" #include "rbbirb.h" #include "rbbiscan.h" #include "rbbisetb.h" #include "rbbidata.h" #include "cstring.h" #include "uassert.h" #include "uvectr32.h" #include "cmemory.h" U_NAMESPACE_BEGIN const int32_t kMaxStateFor8BitsTable = …; RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) : … { … } RBBITableBuilder::~RBBITableBuilder() { … } //----------------------------------------------------------------------------- // // RBBITableBuilder::buildForwardTable - This is the main function for building // the DFA state transition table from the RBBI rules parse tree. // //----------------------------------------------------------------------------- void RBBITableBuilder::buildForwardTable() { … } //----------------------------------------------------------------------------- // // calcNullable. Impossible to explain succinctly. See Aho, section 3.9 // //----------------------------------------------------------------------------- void RBBITableBuilder::calcNullable(RBBINode *n) { … } //----------------------------------------------------------------------------- // // calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9 // //----------------------------------------------------------------------------- void RBBITableBuilder::calcFirstPos(RBBINode *n) { … } //----------------------------------------------------------------------------- // // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 // //----------------------------------------------------------------------------- void RBBITableBuilder::calcLastPos(RBBINode *n) { … } //----------------------------------------------------------------------------- // // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 // //----------------------------------------------------------------------------- void RBBITableBuilder::calcFollowPos(RBBINode *n) { … } //----------------------------------------------------------------------------- // // addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged // as roots of a rule to a destination vector. // //----------------------------------------------------------------------------- void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) { … } //----------------------------------------------------------------------------- // // calcChainedFollowPos. Modify the previously calculated followPos sets // to implement rule chaining. NOT described by Aho // //----------------------------------------------------------------------------- void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNode) { … } //----------------------------------------------------------------------------- // // bofFixup. Fixup for state tables that include {bof} beginning of input testing. // Do an swizzle similar to chaining, modifying the followPos set of // the bofNode to include the followPos nodes from other {bot} nodes // scattered through the tree. // // This function has much in common with calcChainedFollowPos(). // //----------------------------------------------------------------------------- void RBBITableBuilder::bofFixup() { … } //----------------------------------------------------------------------------- // // buildStateTable() Determine the set of runtime DFA states and the // transition tables for these states, by the algorithm // of fig. 3.44 in Aho. // // Most of the comments are quotes of Aho's psuedo-code. // //----------------------------------------------------------------------------- void RBBITableBuilder::buildStateTable() { … } /** * mapLookAheadRules * */ void RBBITableBuilder::mapLookAheadRules() { … } //----------------------------------------------------------------------------- // // flagAcceptingStates Identify accepting states. // First get a list of all of the end marker nodes. // Then, for each state s, // if s contains one of the end marker nodes in its list of tree positions then // s is an accepting state. // //----------------------------------------------------------------------------- void RBBITableBuilder::flagAcceptingStates() { … } //----------------------------------------------------------------------------- // // flagLookAheadStates Very similar to flagAcceptingStates, above. // //----------------------------------------------------------------------------- void RBBITableBuilder::flagLookAheadStates() { … } //----------------------------------------------------------------------------- // // flagTaggedStates // //----------------------------------------------------------------------------- void RBBITableBuilder::flagTaggedStates() { … } //----------------------------------------------------------------------------- // // mergeRuleStatusVals // // Update the global table of rule status {tag} values // The rule builder has a global vector of status values that are common // for all tables. Merge the ones from this table into the global set. // //----------------------------------------------------------------------------- void RBBITableBuilder::mergeRuleStatusVals() { … } //----------------------------------------------------------------------------- // // sortedAdd Add a value to a vector of sorted values (ints). // Do not replicate entries; if the value is already there, do not // add a second one. // Lazily create the vector if it does not already exist. // //----------------------------------------------------------------------------- void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) { … } //----------------------------------------------------------------------------- // // setAdd Set operation on UVector // dest = dest union source // Elements may only appear once and must be sorted. // //----------------------------------------------------------------------------- void RBBITableBuilder::setAdd(UVector *dest, UVector *source) { … } //----------------------------------------------------------------------------- // // setEqual Set operation on UVector. // Compare for equality. // Elements must be sorted. // //----------------------------------------------------------------------------- UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) { … } //----------------------------------------------------------------------------- // // printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos // for each node in the tree. // //----------------------------------------------------------------------------- #ifdef RBBI_DEBUG void RBBITableBuilder::printPosSets(RBBINode *n) { if (n==nullptr) { return; } printf("\n"); RBBINode::printNodeHeader(); RBBINode::printNode(n); RBBIDebugPrintf(" Nullable: %s\n", n->fNullable?"true":"false"); RBBIDebugPrintf(" firstpos: "); printSet(n->fFirstPosSet); RBBIDebugPrintf(" lastpos: "); printSet(n->fLastPosSet); RBBIDebugPrintf(" followpos: "); printSet(n->fFollowPos); printPosSets(n->fLeftChild); printPosSets(n->fRightChild); } #endif // // findDuplCharClassFrom() // bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) { … } // // removeColumn() // void RBBITableBuilder::removeColumn(int32_t column) { … } /* * findDuplicateState */ bool RBBITableBuilder::findDuplicateState(IntPair *states) { … } bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) { … } void RBBITableBuilder::removeState(IntPair duplStates) { … } void RBBITableBuilder::removeSafeState(IntPair duplStates) { … } /* * RemoveDuplicateStates */ int32_t RBBITableBuilder::removeDuplicateStates() { … } //----------------------------------------------------------------------------- // // getTableSize() Calculate the size of the runtime form of this // state transition table. // //----------------------------------------------------------------------------- int32_t RBBITableBuilder::getTableSize() const { … } bool RBBITableBuilder::use8BitsForTable() const { … } //----------------------------------------------------------------------------- // // exportTable() export the state transition table in the format required // by the runtime engine. getTableSize() bytes of memory // must be available at the output address "where". // //----------------------------------------------------------------------------- void RBBITableBuilder::exportTable(void *where) { … } /** * Synthesize a safe state table from the main state table. */ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) { … } //----------------------------------------------------------------------------- // // getSafeTableSize() Calculate the size of the runtime form of this // safe state table. // //----------------------------------------------------------------------------- int32_t RBBITableBuilder::getSafeTableSize() const { … } bool RBBITableBuilder::use8BitsForSafeTable() const { … } //----------------------------------------------------------------------------- // // exportSafeTable() export the state transition table in the format required // by the runtime engine. getTableSize() bytes of memory // must be available at the output address "where". // //----------------------------------------------------------------------------- void RBBITableBuilder::exportSafeTable(void *where) { … } //----------------------------------------------------------------------------- // // printSet Debug function. Print the contents of a UVector // //----------------------------------------------------------------------------- #ifdef RBBI_DEBUG void RBBITableBuilder::printSet(UVector *s) { int32_t i; for (i=0; i<s->size(); i++) { const RBBINode *v = static_cast<const RBBINode *>(s->elementAt(i)); RBBIDebugPrintf("%5d", v==nullptr? -1 : v->fSerialNum); } RBBIDebugPrintf("\n"); } #endif //----------------------------------------------------------------------------- // // printStates Debug Function. Dump the fully constructed state transition table. // //----------------------------------------------------------------------------- #ifdef RBBI_DEBUG void RBBITableBuilder::printStates() { int c; // input "character" int n; // state number RBBIDebugPrintf("state | i n p u t s y m b o l s \n"); RBBIDebugPrintf(" | Acc LA Tag"); for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { RBBIDebugPrintf(" %3d", c); } RBBIDebugPrintf("\n"); RBBIDebugPrintf(" |---------------"); for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { RBBIDebugPrintf("----"); } RBBIDebugPrintf("\n"); for (n=0; n<fDStates->size(); n++) { RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); RBBIDebugPrintf(" %3d | " , n); RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx); for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { RBBIDebugPrintf(" %3d", sd->fDtran->elementAti(c)); } RBBIDebugPrintf("\n"); } RBBIDebugPrintf("\n\n"); } #endif //----------------------------------------------------------------------------- // // printSafeTable Debug Function. Dump the fully constructed safe table. // //----------------------------------------------------------------------------- #ifdef RBBI_DEBUG void RBBITableBuilder::printReverseTable() { int c; // input "character" int n; // state number RBBIDebugPrintf(" Safe Reverse Table \n"); if (fSafeTable == nullptr) { RBBIDebugPrintf(" --- nullptr ---\n"); return; } RBBIDebugPrintf("state | i n p u t s y m b o l s \n"); RBBIDebugPrintf(" | Acc LA Tag"); for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { RBBIDebugPrintf(" %2d", c); } RBBIDebugPrintf("\n"); RBBIDebugPrintf(" |---------------"); for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { RBBIDebugPrintf("---"); } RBBIDebugPrintf("\n"); for (n=0; n<fSafeTable->size(); n++) { UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n); RBBIDebugPrintf(" %3d | " , n); RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { RBBIDebugPrintf(" %2d", rowString->charAt(c)); } RBBIDebugPrintf("\n"); } RBBIDebugPrintf("\n\n"); } #endif //----------------------------------------------------------------------------- // // printRuleStatusTable Debug Function. Dump the common rule status table // //----------------------------------------------------------------------------- #ifdef RBBI_DEBUG void RBBITableBuilder::printRuleStatusTable() { int32_t thisRecord = 0; int32_t nextRecord = 0; int i; UVector *tbl = fRB->fRuleStatusVals; RBBIDebugPrintf("index | tags \n"); RBBIDebugPrintf("-------------------\n"); while (nextRecord < tbl->size()) { thisRecord = nextRecord; nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1; RBBIDebugPrintf("%4d ", thisRecord); for (i=thisRecord+1; i<nextRecord; i++) { RBBIDebugPrintf(" %5d", tbl->elementAti(i)); } RBBIDebugPrintf("\n"); } RBBIDebugPrintf("\n\n"); } #endif //----------------------------------------------------------------------------- // // RBBIStateDescriptor Methods. This is a very struct-like class // Most access is directly to the fields. // //----------------------------------------------------------------------------- RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) { … } RBBIStateDescriptor::~RBBIStateDescriptor() { … } U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */