// Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef FUZZTEST_FUZZTEST_INTERNAL_DOMAINS_IN_GRAMMAR_IMPL_H_ #define FUZZTEST_FUZZTEST_INTERNAL_DOMAINS_IN_GRAMMAR_IMPL_H_ #include <cstddef> #include <optional> #include <string> #include <tuple> #include <utility> #include <variant> #include <vector> #include "absl/container/flat_hash_map.h" #include "absl/random/bit_gen_ref.h" #include "absl/random/distributions.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "./fuzztest/internal/domains/container_of_impl.h" #include "./fuzztest/internal/domains/domain_base.h" #include "./fuzztest/internal/domains/in_regexp_impl.h" #include "./fuzztest/internal/logging.h" #include "./fuzztest/internal/meta.h" #include "./fuzztest/internal/serialization.h" #include "./fuzztest/internal/type_support.h" namespace fuzztest::internal::grammar { // We use a generic and simple data structure to represent the AST tree. The key // member in the data structure is the type id, from which we can tell the // structure information about the ASTNode. We then provide three generic Domain // types for Vector (a list of AST nodes of the same types), Variant (a variant // of several AST nodes), and Tuple (a tuple of AST nodes). In the code // generation, we generate an InGrammar Domain by aggregating the specific // version of the these generic Domain types. ASTTypeId; struct ASTNode { … }; template <typename T> bool CheckASTNodeTypeIdAndChildType(const ASTNode& astnode, ASTTypeId type_id) { … } template <typename T> bool CheckASTCorpusStructure(const IRObject& obj) { … } IRObject WrapASTIntoIRObject(const ASTNode& astnode, IRObject parsed_child); template <ASTTypeId id, const absl::string_view& value> class StringLiteralDomain { … }; template <ASTTypeId id, const absl::string_view& value> class RegexLiteralDomain { … }; // Use for random AST generation. To avoid inifite recursive generation (i.e., // for grammar rules like `expr: expr '+' expr | literal`), we limit the // generation with budget. We first assign the budget to be `kMaxGenerationNum`. // Every time we generate a AST node, we decrement the budget by one. When we // are running out of the budget (the budget is less or equal than 0), the // generation will be in FallBack mode. The FallBack Mode ensures the generation // ends. Specifically, every grammar rule that has more than 1 production rules // has a fallback index. When every grammar rule chooses the fallback index // during generation (aka, the FallBack Mode is on), generation will guarantee // to end. The fallback index is precalculated by the code generator. The idea // is simple: We define a symbol (terminal or non-terminal) as safe if it // generates a finite string in the fallback mode. First we mark all terminals // as safe. If a non-terminal has a production rule that consists of only safe // symbols, we use the index of production rule as the fallback index and mark // the non-terminal is safe. We repeat the process until every grammar rule has // a fallback index. inline constexpr int kMaxGenerationNum = …; template <ASTTypeId id, typename ElementT, int min = 0, int max = 10000> class VectorDomain { … }; // Maximum number of elements allowed in a vector. inline constexpr int kMaxElementNum = …; Vector; Optional; NonEmptyVector; template <ASTTypeId id, typename... ElementT> class TupleDomain { … }; template <ASTTypeId id, int fallback_index, typename... ElementT> class VariantDomain { … }; void GroupElementByASTType( ASTNode& astnode, absl::flat_hash_map<ASTTypeId, std::vector<ASTNode*>>& groups); template <typename TopDomain> class InGrammarImpl : public domain_implementor::DomainBase<InGrammarImpl<TopDomain>, std::string, ASTNode> { … }; } // namespace fuzztest::internal::grammar #endif // FUZZTEST_FUZZTEST_INTERNAL_DOMAINS_IN_GRAMMAR_IMPL_H_