chromium/tools/binary_size/libsupersize/viewer/caspian/caspian_web.cc

// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Command-line interface for checking the integrity of .size files.
// Intended to be called from WebAssembly code.

#include <stdint.h>
#include <stdlib.h>

#include <iostream>
#include <memory>
#include <sstream>
#include <string>
#include <string_view>

#include "third_party/jsoncpp/source/include/json/json.h"
#include "third_party/re2/src/re2/re2.h"
#include "tools/binary_size/libsupersize/viewer/caspian/diff.h"
#include "tools/binary_size/libsupersize/viewer/caspian/file_format.h"
#include "tools/binary_size/libsupersize/viewer/caspian/lens.h"
#include "tools/binary_size/libsupersize/viewer/caspian/model.h"
#include "tools/binary_size/libsupersize/viewer/caspian/tree_builder.h"

namespace caspian {
namespace {

class FilterBuffer {
 public:
  FilterBuffer() = default;
  FilterBuffer(const FilterBuffer&) = delete;
  FilterBuffer& operator=(const FilterBuffer&) = delete;

  size_t remaining() { return kFilterBufferSize - (cursor_ - data_); }
  void Reset() { cursor_ = data_; }
  std::string_view Get() { return std::string_view(data_, cursor_ - data_); }

  void Append(char c) {
    if (remaining() > 0) {
      cursor_[0] = c;
      ++cursor_;
    }
  }

  void Append(std::string_view str) {
    size_t nchars = std::min(remaining(), str.size());
    memcpy(cursor_, str.data(), nchars);
    cursor_ += nchars;
  }

 private:
  static constexpr size_t kFilterBufferSize = 4 * 1024;
  char data_[kFilterBufferSize];
  char* cursor_;
};

FilterBuffer filter_buffer;

std::unique_ptr<SizeInfo> info;
std::unique_ptr<SizeInfo> before_info;
std::vector<std::string> removed_sources;
std::vector<std::string> added_sources;
std::unique_ptr<DeltaSizeInfo> diff_info;
std::unique_ptr<TreeBuilder> builder;

std::unique_ptr<Json::StreamWriter> writer;

std::string JsonSerialize(const Json::Value& value) {
  if (!writer) {
    writer.reset(Json::StreamWriterBuilder().newStreamWriter());
  }
  std::stringstream s;
  writer->write(value, &s);
  return s.str();
}

bool ContainsUpper(const char* str) {
  while (*str) {
    if (*str >= 'A' && *str <= 'Z') {
      return true;
    }
    ++str;
  }
  return false;
}

std::unique_ptr<RE2> CreateFilterRegex(const char* pattern) {
  RE2::Options options;
  options.set_case_sensitive(ContainsUpper(pattern));
  return std::make_unique<RE2>(pattern, options);
}

bool MatchesRegex(const GroupedPath& id_path,
                  const BaseSymbol& sym,
                  const RE2& regex) {
  // Write the entire path to a buffer so that regex to match across it.
  filter_buffer.Reset();
  filter_buffer.Append(id_path.group);
  filter_buffer.Append('/');
  filter_buffer.Append(id_path.path);
  filter_buffer.Append(':');
  filter_buffer.Append(sym.FullName());

  // Always match against container even when not grouping by container.
  if (RE2::PartialMatch(filter_buffer.Get(), regex)) {
    return true;
  }

  return RE2::PartialMatch(sym.ContainerName(), regex);
}

bool IsMultiContainer() {
  // If DeltaSizeInfo is active, still take |info| since it's the "after" info.
  return info->containers.size() > 1 || !info->containers[0].name.empty();
}

void ClearInfoAndBuilderObjects() {
  builder.reset(nullptr);
  diff_info.reset(nullptr);
  before_info.reset(nullptr);
  info.reset(nullptr);
  removed_sources = {};
  added_sources = {};
}

}  // namespace

extern "C" {
void LoadSizeFile(char* compressed, size_t size) {
  ClearInfoAndBuilderObjects();
  if (IsDiffSizeInfo(compressed, size)) {
    info = std::make_unique<SizeInfo>();
    before_info = std::make_unique<SizeInfo>();
    ParseDiffSizeInfo(compressed, size, before_info.get(), info.get(),
                      &removed_sources, &added_sources);
    // DeltaSizeInfo instantiation for sparse diff.
    diff_info.reset(new DeltaSizeInfo(
        Diff(before_info.get(), info.get(), &removed_sources, &added_sources)));
  } else {
    info = std::make_unique<SizeInfo>();
    ParseSizeInfo(compressed, size, info.get());
  }
}

void LoadBeforeSizeFile(const char* compressed, size_t size) {
  // Don't call ClearInfoAndBuilderObjects(): It's assumed that LoadSizeFile()
  // was called immediately before.
  before_info = std::make_unique<SizeInfo>();
  ParseSizeInfo(compressed, size, before_info.get());
}

// Updates |builder| with provided filters and constructs the new tree.
// Typically called when the front-end form updates, to apply any new filters.
// Returns: True if the resulting tree is a diff, false if it is a snapshot.
bool BuildTree(bool method_count_mode,
               const char* group_by,
               const char* include_regex_str,
               const char* exclude_regex_str,
               const char* include_sections,
               int minimum_size_bytes,
               int match_flag,
               bool non_overhead,
               bool disassembly_mode) {
  std::vector<TreeBuilder::FilterFunc> filters;

  const bool diff_mode = info && before_info;

  if (method_count_mode && diff_mode) {
    // include_sections is used to filter to just .dex.method symbols.
    // For diffs, we also want to filter to just adds & removes.
    filters.push_back([](const GroupedPath&, const BaseSymbol& sym) -> bool {
      DiffStatus diff_status = sym.GetDiffStatus();
      return diff_status == DiffStatus::kAdded ||
             diff_status == DiffStatus::kRemoved;
    });
  }

  if (minimum_size_bytes > 0) {
    if (!diff_mode) {
      filters.push_back([minimum_size_bytes](const GroupedPath&,
                                             const BaseSymbol& sym) -> bool {
        return sym.Pss() >= minimum_size_bytes;
      });
    } else {
      filters.push_back([minimum_size_bytes](const GroupedPath&,
                                             const BaseSymbol& sym) -> bool {
        return abs(sym.Pss()) >= minimum_size_bytes;
      });
    }
  }

  // It's currently not useful to filter on more than one flag, so
  // |match_flag| can be assumed to be a power of two.
  if (match_flag) {
    std::cout << "Filtering on flag matching " << match_flag << std::endl;
    filters.push_back(
        [match_flag](const GroupedPath&, const BaseSymbol& sym) -> bool {
          return match_flag & sym.Flags();
        });
  }

  if (non_overhead) {
    filters.push_back([](const GroupedPath&, const BaseSymbol& sym) -> bool {
      return !sym.IsOverhead();
    });
  }

  if (disassembly_mode) {
    filters.push_back([](const GroupedPath&, const BaseSymbol& sym) -> bool {
      return sym.Disassembly() != nullptr;
    });
  }

  std::array<bool, 256> include_sections_map{};
  if (include_sections) {
    std::cout << "Filtering on sections " << include_sections << std::endl;
    for (const char* c = include_sections; *c; c++) {
      include_sections_map[static_cast<uint8_t>(*c)] = true;
    }
    filters.push_back([&include_sections_map](const GroupedPath&,
                                              const BaseSymbol& sym) -> bool {
      return include_sections_map[static_cast<uint8_t>(sym.Section())];
    });
  }

  // Ensure lifetime of regex lasts until filter is used.
  std::unique_ptr<RE2> include_regex;
  if (include_regex_str && *include_regex_str) {
    include_regex = CreateFilterRegex(include_regex_str);
    if (include_regex->error_code() == RE2::NoError) {
      filters.push_back([&include_regex](const GroupedPath& id_path,
                                         const BaseSymbol& sym) -> bool {
        return MatchesRegex(id_path, sym, *include_regex);
      });
    }
  }

  std::unique_ptr<RE2> exclude_regex;
  if (exclude_regex_str && *exclude_regex_str) {
    exclude_regex = CreateFilterRegex(exclude_regex_str);
    if (exclude_regex->error_code() == RE2::NoError) {
      filters.push_back([&exclude_regex](const GroupedPath& id_path,
                                         const BaseSymbol& sym) -> bool {
        return !MatchesRegex(id_path, sym, *exclude_regex);
      });
    }
  }

  // BuildTree() is called every time a new filter is applied in the HTML
  // viewer, but if we already have a DeltaSizeInfo we can skip regenerating it
  // and let the TreeBuilder filter the symbols we care about.
  if (diff_mode && !diff_info) {
    // DeltaSizeInfo instantiation for dense diff.
    diff_info.reset(new DeltaSizeInfo(
        Diff(before_info.get(), info.get(), nullptr, nullptr)));
  }

  if (diff_mode) {
    builder.reset(new TreeBuilder(diff_info.get()));
  } else {
    builder.reset(new TreeBuilder(info.get()));
  }

  std::unique_ptr<BaseLens> lens;
  char sep = '/';
  std::cout << "group_by=" << group_by << std::endl;
  if (!strcmp(group_by, "source_path")) {
    lens = std::make_unique<IdPathLens>();
  } else if (!strcmp(group_by, "container")) {
    lens = std::make_unique<ContainerLens>();
  } else if (!strcmp(group_by, "component")) {
    lens = std::make_unique<ComponentLens>();
    sep = '>';
  } else if (!strcmp(group_by, "template")) {
    lens = std::make_unique<TemplateLens>();
    filters.push_back([](const GroupedPath&, const BaseSymbol& sym) -> bool {
      return sym.IsTemplate() && sym.IsNative();
    });
  } else if (!strcmp(group_by, "generated_type")) {
    lens = std::make_unique<GeneratedLens>();
  } else {
    std::cerr << "Unsupported group_by=" << group_by << std::endl;
    exit(1);
  }
  builder->Build(std::move(lens), sep, method_count_mode, filters);

  return bool(diff_info);
}

// Returns a JSON string representing root data.
const char* Open(const char* path) {
  static std::string cached_result;
  Json::Value v = builder->Open(path);
  cached_result = JsonSerialize(v);
  return cached_result.c_str();
}

// Returns a JSON string representing the metadata.
const char* GetMetadata() {
  static std::string cached_result;
  Json::Value v;
  v["size_file"] = info->fields;
  if (before_info != nullptr) {
    v["before_size_file"] = before_info->fields;
  }
  cached_result = JsonSerialize(v);
  return cached_result.c_str();
}

// Returns global properties.
const char* QueryProperty(const char* key) {
  if (!strcmp(key, "isMultiContainer")) {
    return IsMultiContainer() ? "true" : "false";
  }
  std::cerr << "Unknown property: " << key << std::endl;
  exit(1);
}

const char* QueryAncestryById(uint32_t id) {
  static std::string cached_result;
  Json::Value v;
  v["ancestorIds"] = builder->GetAncestryById(id);
  cached_result = JsonSerialize(v);
  return cached_result.c_str();
}

}  // extern "C"
}  // namespace caspian