chromium/third_party/perfetto/src/trace_processor/util/glob.h

/*
 * Copyright (C) 2022 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
#define SRC_TRACE_PROCESSOR_UTIL_GLOB_H_

#include <optional>
#include <string_view>
#include <vector>

#include "perfetto/ext/base/small_vector.h"
#include "perfetto/ext/base/string_splitter.h"
#include "perfetto/ext/base/string_view.h"

namespace perfetto {
namespace trace_processor {
namespace util {

// Lightweight implementation of matching on UNIX glob patterns, maintaining
// compatibility of syntax and semantics used by SQLite.
//
// Usage:
//  GlobMatcher matcher = GlobMatcher::FromPattern("*foo*");
//  for (auto string : strings) {
//    if (matcher.Matches(string)) {
//      <do something>
//    }
//  }
//
// This is a class instead of a free function to allow preprocessing the
// pattern (e.g. to compute Kleene star offsets). This can create big savings
// because trace processsor needs to match the same pattern on many strings
// when filtering tables.
//
// Implementation:
// The algorithm used in this class is similar to the "alternative"
// algorithm proposed in [1].
//
// We preprocess the pattern (in the constructor) to split the pattern on *,
// accounting for character classes. This breaks the pattern in "segments": our
// name for the parts of the pattern between the stars.
//
// Then at match time, we go through each segment and check if it matches part
// of the string. The number of character matched defines the search start-point
// for the next segment. As described in [1], we don't need to do any
// backtracking which removes the exponential component of the algorithm and
// consequently simplifies the code.
//
// The subtle parts are:
// 1) the first and last segments - they need to be "anchored" to the
//    beginning and end of the string respectively. If not, they fail the match
//    straight away.
// 2) leading/trailing stars: they counteract the above point and "unanchor"
//    the first and last segments respectively by allowing them to happen
//    somewhere after/before the beginning/end.
//
// [1] https://research.swtch.com/glob
class GlobMatcher {};

}  // namespace util
}  // namespace trace_processor
}  // namespace perfetto

#endif  // SRC_TRACE_PROCESSOR_UTIL_GLOB_H_