// Copyright 2008 The RE2 Authors. All Rights Reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Determine whether this library should match PCRE exactly // for a particular Regexp. (If so, the testing framework can // check that it does.) // // This library matches PCRE except in these cases: // * the regexp contains a repetition of an empty string, // like (a*)* or (a*)+. In this case, PCRE will treat // the repetition sequence as ending with an empty string, // while this library does not. // * Perl and PCRE differ on whether \v matches \n. // For historical reasons, this library implements the Perl behavior. // * Perl and PCRE allow $ in one-line mode to match either the very // end of the text or just before a \n at the end of the text. // This library requires it to match only the end of the text. // * Similarly, Perl and PCRE do not allow ^ in multi-line mode to // match the end of the text if the last character is a \n. // This library does allow it. // // Regexp::MimicsPCRE checks for any of these conditions. #include "absl/log/absl_log.h" #include "re2/regexp.h" #include "re2/walker-inl.h" namespace re2 { // Returns whether re might match an empty string. static bool CanBeEmptyString(Regexp *re); // Walker class to compute whether library handles a regexp // exactly as PCRE would. See comment at top for conditions. class PCREWalker : public Regexp::Walker<bool> { … }; // Called after visiting each of re's children and accumulating // the return values in child_args. So child_args contains whether // this library mimics PCRE for those subexpressions. bool PCREWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args, int nchild_args) { … } // Returns whether this regexp's behavior will mimic PCRE's exactly. bool Regexp::MimicsPCRE() { … } // Walker class to compute whether a Regexp can match an empty string. // It is okay to overestimate. For example, \b\B cannot match an empty // string, because \b and \B are mutually exclusive, but this isn't // that smart and will say it can. Spurious empty strings // will reduce the number of regexps we sanity check against PCRE, // but they won't break anything. class EmptyStringWalker : public Regexp::Walker<bool> { … }; // Called after visiting re's children. child_args contains the return // value from each of the children's PostVisits (i.e., whether each child // can match an empty string). Returns whether this clause can match an // empty string. bool EmptyStringWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args, int nchild_args) { … } // Returns whether re can match an empty string. static bool CanBeEmptyString(Regexp* re) { … } } // namespace re2