chromium/services/tracing/public/cpp/perfetto/java_heap_profiler/hprof_parser_android.h

// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef SERVICES_TRACING_PUBLIC_CPP_PERFETTO_JAVA_HEAP_PROFILER_HPROF_PARSER_ANDROID_H_
#define SERVICES_TRACING_PUBLIC_CPP_PERFETTO_JAVA_HEAP_PROFILER_HPROF_PARSER_ANDROID_H_

#include <string>
#include <string_view>
#include <unordered_map>

#include "base/component_export.h"
#include "base/gtest_prod_util.h"
#include "services/tracing/public/cpp/perfetto/java_heap_profiler/hprof_buffer_android.h"
#include "services/tracing/public/cpp/perfetto/java_heap_profiler/hprof_instances_android.h"
#include "third_party/perfetto/protos/perfetto/trace/profiling/heap_graph.pbzero.h"

namespace tracing {

using ObjectId = uint64_t;
using RootType = ::perfetto::protos::pbzero::HeapGraphRoot::Type;
using HeapGraphRoot = ::perfetto::protos::pbzero::HeapGraphRoot;
const uint64_t kInvalidObjectId = std::numeric_limits<uint64_t>::max();

// This class takes in a temporary file_path where Java API endpoint
// Debug.dumpHprofData() dumps hprof data to. This file is then parsed for
// references between different instances. The format is defined by said method.
// Refer to:
// https://docs.google.com/document/d/1frGMt8Ro7C6fjbDscImdxHVsFAbSH1mjdETWisZjzVE
// for more information on the file format and
// https://developer.android.com/reference/android/os/Debug#dumpHprofData(java.lang.String)
// for more information on the endpoint.
class COMPONENT_EXPORT(TRACING_CPP) HprofParser {
 public:
  enum ParseResult {
    PARSE_SUCCESS,
    PARSE_FAILED,
    FAILED_TO_OPEN_FILE,
    STRING_ID_NOT_FOUND,
    OBJECT_ID_NOT_FOUND,
  };

  struct ParseStats {
    ParseStats();
    ParseStats(const ParseStats&) = delete;
    ParseStats& operator=(const ParseStats&) = delete;

    // Returns the result of the parser. Set to fail by default.
    ParseResult result = PARSE_FAILED;

    // Number of strings found in heap dump.
    uint64_t num_strings = 0;

    // Number of class objects found in a class heap dump.
    uint64_t num_class_objects = 0;

    // Number of heap dump segments found in a heap dump.
    uint64_t num_heap_dump_segments = 0;

    // Number of class object dumps found within a heap dump segment.
    uint64_t num_class_object_dumps = 0;

    // Number of class instance dumps found within a heap dump segment.
    uint64_t num_class_instance_dumps = 0;

    // Number of object array dumps found within a heap dump segment.
    uint64_t num_object_array_dumps = 0;

    // Number of primitive array dumps found within a heap dump segment.
    uint64_t num_primitive_array_dumps = 0;
  };

  // This stores a reference to the location of the string in the hprof buffer.
  // The data will only be read once when calling the GetString() for the
  // first time. This avoids paging in all strings in the hprof file.
  struct StringReference {
    StringReference(const char* string_position, size_t length);
    ~StringReference();

    const std::string& GetString();

    const char* string_position;
    size_t length = 0;
    std::unique_ptr<std::string> cached_copy;
  };

  HprofParser(const std::string& file_path);
  ~HprofParser();
  HprofParser(const HprofParser&) = delete;
  HprofParser& operator=(const HprofParser&) = delete;

  // This method should only be called after Parse() has been called.
  const ParseStats& parse_stats() const { return parse_stats_; }

  const std::unordered_map<ObjectId, std::unique_ptr<ClassObject>>&
  class_objects() const {
    return class_objects_;
  }

  const std::unordered_map<ObjectId, std::unique_ptr<ClassInstance>>&
  class_instances() const {
    return class_instances_;
  }

  const std::unordered_map<ObjectId, std::unique_ptr<ObjectArrayInstance>>&
  object_array_instances() const {
    return object_array_instances_;
  }

  const std::unordered_map<ObjectId, std::unique_ptr<PrimitiveArrayInstance>>&
  primitive_array_instances() const {
    return primitive_array_instances_;
  }

  const std::unordered_map<RootType, std::vector<ObjectId>>& roots() const {
    return roots_;
  }

  // First opens the file at |file_path_| then passes the data to ParseFileData
  // to parse and record metrics. The hprof file is generated by
  // Debug.dumpHprofData().
  // This method should only ever be run once.
  ParseResult Parse();

 private:
  struct RegisteredNativeSize;
  FRIEND_TEST_ALL_PREFIXES(HprofParserTest, ParseStringTag);
  FRIEND_TEST_ALL_PREFIXES(HprofParserTest, ParseClassTag);
  FRIEND_TEST_ALL_PREFIXES(HprofParserTest, ParseClassObjectDumpSubtag);
  FRIEND_TEST_ALL_PREFIXES(HprofParserTest, ParseClassInstanceDumpSubtag);
  FRIEND_TEST_ALL_PREFIXES(HprofParserTest, ParseObjectArrayDumpSubtag);
  FRIEND_TEST_ALL_PREFIXES(HprofParserTest, ParsePrimitiveArrayDumpSubtag);
  FRIEND_TEST_ALL_PREFIXES(HprofParserTest, ModifyClassObjectTypeNames);

  FRIEND_TEST_ALL_PREFIXES(HprofParserTest,
                           BasicResolveClassInstanceReferences);
  FRIEND_TEST_ALL_PREFIXES(HprofParserTest, ResolveSuperClassObjectFields);
  FRIEND_TEST_ALL_PREFIXES(
      HprofParserTest,
      MultipleInstanceFieldsResolveClassInstanceReferences);
  FRIEND_TEST_ALL_PREFIXES(
      HprofParserTest,
      MissingObjectReferenceResolveClassInstanceReferences);
  FRIEND_TEST_ALL_PREFIXES(
      HprofParserTest,
      ExistingAndMissingReferencesResolveClassInstanceReferences);
  FRIEND_TEST_ALL_PREFIXES(HprofParserTest,
                           BasicResolveObjectArrayInstanceReferences);
  FRIEND_TEST_ALL_PREFIXES(
      HprofParserTest,
      MissingAndExistingReferencesResolveObjectArrayInstanceReferences);
  FRIEND_TEST_ALL_PREFIXES(HprofParserTest, NativeSizeComputation);

  // Parses hprof data file_data and records metrics in parse_stats_.
  void ParseFileData(const unsigned char* file_data, size_t file_size);

  ParseResult ParseStringTag(uint32_t record_length_);
  ParseResult ParseClassTag();
  ParseResult ParseClassObjectDumpSubtag();
  ParseResult ParseClassInstanceDumpSubtag();
  ParseResult ParseObjectArrayDumpSubtag();
  ParseResult ParsePrimitiveArrayDumpSubtag();
  ParseResult ParseHeapDumpTag(uint32_t record_length_);

  void ResolveSuperClassFields();
  ParseResult ResolveClassInstanceReferences();
  ParseResult ComputeNativeSizeOfObjects();
  ParseResult ResolveObjectArrayInstanceReferences();

  RegisteredNativeSize GetRegisteredNativeSize(ClassInstance* cleaner_instance);

  // Append java.lang.Class: to ClassObjects to differentiate them from
  // ClassInstances.
  void ModifyClassObjectTypeNames();

  // Searches through each of the four instance maps: class_objects_,
  // class_instances_, object_array_instances_, and primitive_array_instances_
  // for an instance with given id. If found, return the base instance of the
  // instance found. If not found, return a null pointer.
  Instance* FindInstance(ObjectId id);
  ClassInstance* FindClassInstance(ObjectId id);
  ClassObject* FindClassObject(ObjectId id);
  // Returns true after setting the position of |hprof_buffer_| to point to the
  // |field_name| in class.
  [[nodiscard]] bool SeekToFieldPosition(ClassInstance* instance,
                                         std::string_view field_name);

  std::unordered_map<ObjectId, std::unique_ptr<StringReference>> strings_;

  std::unordered_map<ObjectId, std::unique_ptr<ClassObject>> class_objects_;
  std::unordered_map<ObjectId, std::unique_ptr<ClassInstance>> class_instances_;
  std::unordered_map<ObjectId, std::unique_ptr<ObjectArrayInstance>>
      object_array_instances_;
  std::unordered_map<ObjectId, std::unique_ptr<PrimitiveArrayInstance>>
      primitive_array_instances_;

  std::unordered_map<RootType, std::vector<ObjectId>> roots_;

  std::unique_ptr<HprofBuffer> hprof_buffer_;

  const std::string file_path_;
  ParseStats parse_stats_;
};

}  // namespace tracing

#endif  // SERVICES_TRACING_PUBLIC_CPP_PERFETTO_JAVA_HEAP_PROFILER_HPROF_PARSER_ANDROID_H_