/* * This file has been copied from commit e7ac713d^ in the GNU grep git * repository. A few small changes have been made to adapt the code to * Git. */ /* kwset.c - search for any of a set of keywords. Copyright 1989, 1998, 2000, 2005 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see <https://www.gnu.org/licenses/>. */ /* Written August 1989 by Mike Haertel. The author may be reached (Email) at the address [email protected], or (US mail) as Mike Haertel c/o Free Software Foundation. */ /* The algorithm implemented by these routines bears a startling resemblance to one discovered by Beate Commentz-Walter, although it is not identical. See "A String Matching Algorithm Fast on the Average," Technical Report, IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900 Heidelberg, Germany. See also Aho, A.V., and M. Corasick, "Efficient String Matching: An Aid to Bibliographic Search," CACM June 1975, Vol. 18, No. 6, which describes the failure function used below. */ #include "git-compat-util.h" #include "kwset.h" #include "compat/obstack.h" #define NCHAR … /* adapter for `xmalloc()`, which takes `size_t`, not `long` */ static void *obstack_chunk_alloc(long size) { … } #define obstack_chunk_free … #define U(c) … /* For case-insensitive kwset */ const unsigned char tolower_trans_tbl[256] = …; /* Balanced tree of edges and labels leaving a given trie node. */ struct tree { … }; /* Node of a trie representing a set of reversed keywords. */ struct trie { … }; /* Structure returned opaquely to the caller, containing everything. */ struct kwset { … }; /* Allocate and initialize a keyword set object, returning an opaque pointer to it. Return NULL if memory is not available. */ kwset_t kwsalloc (unsigned char const *trans) { … } /* This upper bound is valid for CHAR_BIT >= 4 and exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */ #define DEPTH_SIZE … /* Add the given string to the contents of the keyword set. Return NULL for success, an error message otherwise. */ const char * kwsincr (kwset_t kws, char const *text, size_t len) { … } /* Enqueue the trie nodes referenced from the given tree in the given queue. */ static void enqueue (struct tree *tree, struct trie **last) { … } /* Compute the Aho-Corasick failure function for the trie nodes referenced from the given tree, given the failure function for their parent as well as a last resort failure node. */ static void treefails (register struct tree const *tree, struct trie const *fail, struct trie *recourse) { … } /* Set delta entries for the links of the given tree such that the preexisting delta value is larger than the current depth. */ static void treedelta (register struct tree const *tree, register unsigned int depth, unsigned char delta[]) { … } /* Return true if A has every label in B. */ static int hasevery (register struct tree const *a, register struct tree const *b) { … } /* Compute a vector, indexed by character code, of the trie nodes referenced from the given tree. */ static void treenext (struct tree const *tree, struct trie *next[]) { … } /* Compute the shift for each trie node, as well as the delta table and next cache for the given keyword set. */ const char * kwsprep (kwset_t kws) { … } /* Fast boyer-moore search. */ static size_t bmexec (kwset_t kws, char const *text, size_t size) { … } /* Hairy multiple string search. */ static size_t cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch) { … } /* Search through the given text for a match of any member of the given keyword set. Return a pointer to the first character of the matching substring, or NULL if no match is found. If FOUNDLEN is non-NULL store in the referenced location the length of the matching substring. Similarly, if FOUNDIDX is non-NULL, store in the referenced location the index number of the particular keyword matched. */ size_t kwsexec (kwset_t kws, char const *text, size_t size, struct kwsmatch *kwsmatch) { … } /* Free the components of the given keyword set. */ void kwsfree (kwset_t kws) { … }