chromium/components/zucchini/disassembler_dex.cc

// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "components/zucchini/disassembler_dex.h"

#include <stddef.h>
#include <stdlib.h>

#include <algorithm>
#include <cmath>
#include <iterator>
#include <optional>
#include <set>
#include <utility>

#include "base/functional/bind.h"
#include "base/functional/callback.h"
#include "base/logging.h"
#include "base/memory/raw_ptr.h"
#include "base/numerics/checked_math.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/stringprintf.h"
#include "components/zucchini/buffer_source.h"
#include "components/zucchini/buffer_view.h"
#include "components/zucchini/io_utils.h"
#include "third_party/abseil-cpp/absl/strings/ascii.h"

namespace zucchini {

namespace {

// A DEX item specified by an offset, if absent, has a sentinel value of 0 since
// 0 is never a valid item offset (it points to magic at start of DEX).
constexpr offset_t kDexSentinelOffset =;

// A DEX item specified by an index, if absent, has a sentinel value of
// NO_INDEX = 0xFFFFFFFF. This is represented as an offset_t for uniformity.
constexpr offset_t kDexSentinelIndexAsOffset =;

static_assert;

// Size of a Dalvik instruction unit. Need to cast to signed int because
// sizeof() gives size_t, which dominates when operated on ptrdiff_t, then
// wrecks havoc for base::checked_cast<int16_t>().
constexpr int kInstrUnitSize =;

// Checks if |offset| is byte aligned to 32 bits or 4 bytes.
bool Is32BitAligned(offset_t offset) {}

// Returns a lower bound for the size of an item of type |type_item_code|.
// - For fixed-length items (e.g., kTypeFieldIdItem) this is the exact size.
// - For variant-length items (e.g., kTypeCodeItem), returns a value that is
//   known to be less than the item length (e.g., header size).
// - For items not handled by this function, returns 1 for sanity check.
size_t GetItemBaseSize(uint16_t type_item_code) {}

/******** CodeItemParser ********/

// A parser to extract successive code items from a DEX image whose header has
// been parsed.
class CodeItemParser {};

/******** InstructionParser ********/

// A class that successively reads |code_item| for Dalvik instructions, which
// are found at |insns|, spanning |insns_size| uint16_t "units". These units
// store instructions followed by optional non-instruction "payload". Finding
// payload boundary requires parsing: On finding an instruction that uses (and
// points to) payload, the boundary is updated.
class InstructionParser {};

/******** InstructionReferenceReader ********/

// A class to visit |code_items|, parse instructions, and emit embedded
// References of a type determined by |filter_| and |mapper_|. Only References
// located in |[lo, hi)| are emitted. |lo| and |hi| are assumed to never
// straddle the body of a Reference.
class InstructionReferenceReader : public ReferenceReader {};

/******** ItemReferenceReader ********/

// A class to visit fixed-size item elements (determined by |item_size|) and
// emit a "member variable of interest" (MVI, determined by |rel_location| and
// |mapper|) as Reference. Only MVIs lying in |[lo, hi)| are emitted. |lo| and
// |hi| are assumed to never straddle the body of a Reference.
class ItemReferenceReader : public ReferenceReader {};

// Parses a flattened jagged list of lists of items that looks like:
//   NTTT|NTT|NTTTT|N|NTT...
// where |N| is an uint32_t representing the number of items in each sub-list,
// and "T" is a fixed-size item (|item_width|) of type "T". On success, stores
// the offset of each |T| into |item_offsets|, and returns true. Otherwise
// (e.g., on finding any structural problem) returns false.
bool ParseItemOffsets(ConstBufferView image,
                      const dex::MapItem& map_item,
                      size_t item_width,
                      std::vector<offset_t>* item_offsets) {}

// Parses AnnotationDirectoryItems of the format (using RegEx) "(AF*M*P*)*",
// where:
//   A = AnnotationsDirectoryItem (contains class annotation),
//   F = FieldAnnotation,
//   M = MethodAnnotation,
//   P = ParameterAnnotation.
// On success, stores the offsets of each class, field, method and parameter
// annotation for each item into |*_annotation_offsets|. Otherwise on finding
// structural issues returns false.
bool ParseAnnotationsDirectoryItems(
    ConstBufferView image,
    const dex::MapItem& annotations_directory_map_item,
    std::vector<offset_t>* annotations_directory_item_offsets,
    std::vector<offset_t>* field_annotation_offsets,
    std::vector<offset_t>* method_annotation_offsets,
    std::vector<offset_t>* parameter_annotation_offsets) {}

/******** CachedItemListReferenceReader ********/

// A class that takes sorted |item_offsets|, and emits all member variable of
// interest (MVIs) that fall inside |[lo, hi)|. The MVI of each item has
// location of |rel_location| from item offset, and has target extracted with
// |mapper| (which performs validation). By the "atomicity assumption",
// [|lo, hi)| never cut across an MVI.
class CachedItemListReferenceReader : public ReferenceReader {};

// Reads an INT index at |location| in |image| and translates the index to the
// offset of a fixed-size item specified by |target_map_item| and
// |target_item_size|. Returns the target offset if valid, or kInvalidOffset
// otherwise. This is compatible with
// CachedReferenceListReferenceReader::Mapper,
// InstructionReferenceReader::Mapper, and ItemReferenceReader::Mapper.
template <typename INT>
static offset_t ReadTargetIndex(ConstBufferView image,
                                const dex::MapItem& target_map_item,
                                size_t target_item_size,
                                offset_t location) {}

// Reads a field or method index of the MethodHandleItem located at |location|
// in |image| and translates |method_handle_item.field_or_method_id| to the
// offset of a fixed-size item specified by |target_map_item| and
// |target_item_size|. The index is deemed to be of the correct target type if
// |method_handle_item.method_handle_type| falls within the range [|min_type|,
// |max_type|]. If the target type is correct ReadTargetIndex is called.
// Returns the target offset if valid, or kDexSentinelIndexAsOffset if
// |method_handle_item.method_handle_type| is of the wrong type, or
// kInvalidOffset otherwise.
//
// As of DEX version 39 MethodHandleType values for FieldId and MethodId each
// form one consecutive block of values. If this changes, then the interface to
// this function will need to be redesigned.
static offset_t ReadMethodHandleFieldOrMethodId(
    ConstBufferView image,
    const dex::MapItem& target_map_item,
    size_t target_item_size,
    dex::MethodHandleType min_type,
    dex::MethodHandleType max_type,
    offset_t location) {}

// Reads uint32_t value in |image| at (valid) |location| and checks whether it
// is a safe offset of a fixed-size item. Returns the target offset (possibly a
// sentinel) if valid, or kInvalidOffset otherwise. This is compatible with
// CachedReferenceListReferenceReader::Mapper,
// InstructionReferenceReader::Mapper, and ItemReferenceReader::Mapper.
static offset_t ReadTargetOffset32(ConstBufferView image, offset_t location) {}

/******** ReferenceWriterAdaptor ********/

// A ReferenceWriter that adapts a callback that performs type-specific
// Reference writes.
class ReferenceWriterAdaptor : public ReferenceWriter {};

// Helper that's compatible with ReferenceWriterAdaptor::Writer.
// Given that |ref.target| points to the start of a fixed size DEX item (e.g.,
// FieldIdItem), translates |ref.target| to item index, and writes the result to
// |ref.location| as |INT|.
template <typename INT>
static void WriteTargetIndex(const dex::MapItem& target_map_item,
                             size_t target_item_size,
                             Reference ref,
                             MutableBufferView image) {}

// Buffer for ReadDexHeader() to optionally return results.
struct ReadDexHeaderResults {};

// Returns whether |image| points to a DEX file. If this is a possibility and
// |opt_results| is not null, then uses it to pass extracted data to enable
// further parsing.
bool ReadDexHeader(ConstBufferView image, ReadDexHeaderResults* opt_results) {}

}  // namespace

/******** DisassemblerDex ********/

DisassemblerDex::DisassemblerDex() :{}

DisassemblerDex::~DisassemblerDex() = default;

// static.
bool DisassemblerDex::QuickDetect(ConstBufferView image) {}

ExecutableType DisassemblerDex::GetExeType() const {}

std::string DisassemblerDex::GetExeTypeString() const {}

std::vector<ReferenceGroup> DisassemblerDex::MakeReferenceGroups() const {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadStringIdToStringData(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadTypeIdToDescriptorStringId32(offset_t lo,
                                                      offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadProtoIdToShortyStringId32(offset_t lo, offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadProtoIdToReturnTypeId32(offset_t lo, offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadProtoIdToParametersTypeList(offset_t lo, offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToClassTypeId16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToTypeId16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadFieldToNameStringId32(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadMethodIdToClassTypeId16(offset_t lo, offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadMethodIdToProtoId16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadMethodIdToNameStringId32(offset_t lo, offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadClassDefToClassTypeId32(offset_t lo, offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadClassDefToSuperClassTypeId32(offset_t lo,
                                                      offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadClassDefToInterfacesTypeList(offset_t lo,
                                                      offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadClassDefToSourceFileStringId32(offset_t lo,
                                                        offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadClassDefToAnnotationDirectory(offset_t lo,
                                                       offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadClassDefToClassData(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadClassDefToStaticValuesEncodedArray(offset_t lo,
                                                            offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadCallSiteIdToCallSite32(offset_t lo, offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadMethodHandleToFieldId16(offset_t lo, offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadMethodHandleToMethodId16(offset_t lo, offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadTypeListToTypeId16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadAnnotationSetToAnnotation(offset_t lo, offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadAnnotationSetRefListToAnnotationSet(offset_t lo,
                                                             offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadAnnotationsDirectoryToClassAnnotationSet(offset_t lo,
                                                                  offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadAnnotationsDirectoryToFieldId32(offset_t lo,
                                                         offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadAnnotationsDirectoryToFieldAnnotationSet(offset_t lo,
                                                                  offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadAnnotationsDirectoryToMethodId32(offset_t lo,
                                                          offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadAnnotationsDirectoryToMethodAnnotationSet(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadAnnotationsDirectoryToParameterMethodId32(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader>
DisassemblerDex::MakeReadAnnotationsDirectoryToParameterAnnotationSetRef(
    offset_t lo,
    offset_t hi) {}

// MakeReadCode* readers use offset relative to the instruction beginning based
// on the instruction format ID.
// See https://source.android.com/devices/tech/dalvik/instruction-formats

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToStringId16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToStringId32(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToTypeId16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToProtoId16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToCallSiteId16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToMethodHandle16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToFieldId16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToMethodId16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode8(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode16(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceReader> DisassemblerDex::MakeReadCodeToRelCode32(
    offset_t lo,
    offset_t hi) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteStringId16(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteStringId32(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteTypeId16(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteTypeId32(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteProtoId16(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteFieldId16(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteFieldId32(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteMethodId16(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteMethodId32(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteCallSiteId16(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteMethodHandle16(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode8(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode16(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteRelCode32(
    MutableBufferView image) {}

std::unique_ptr<ReferenceWriter> DisassemblerDex::MakeWriteAbs32(
    MutableBufferView image) {}

bool DisassemblerDex::Parse(ConstBufferView image) {}

bool DisassemblerDex::ParseHeader() {}

}  // namespace zucchini