llvm/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp

//===-- TraceIntelPTMultiCpuDecoder.cpp -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "TraceIntelPTMultiCpuDecoder.h"
#include "TraceIntelPT.h"
#include "llvm/Support/Error.h"
#include <optional>

using namespace lldb;
using namespace lldb_private;
using namespace lldb_private::trace_intel_pt;
using namespace llvm;

TraceIntelPTMultiCpuDecoder::TraceIntelPTMultiCpuDecoder(
    TraceIntelPTSP trace_sp)
    : m_trace_wp(trace_sp) {
  for (Process *proc : trace_sp->GetAllProcesses()) {
    for (ThreadSP thread_sp : proc->GetThreadList().Threads()) {
      m_tids.insert(thread_sp->GetID());
    }
  }
}

TraceIntelPTSP TraceIntelPTMultiCpuDecoder::GetTrace() {
  return m_trace_wp.lock();
}

bool TraceIntelPTMultiCpuDecoder::TracesThread(lldb::tid_t tid) const {
  return m_tids.count(tid);
}

Expected<std::optional<uint64_t>> TraceIntelPTMultiCpuDecoder::FindLowestTSC() {
  std::optional<uint64_t> lowest_tsc;
  TraceIntelPTSP trace_sp = GetTrace();

  Error err = GetTrace()->OnAllCpusBinaryDataRead(
      IntelPTDataKinds::kIptTrace,
      [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error {
        for (auto &cpu_id_to_buffer : buffers) {
          Expected<std::optional<uint64_t>> tsc =
              FindLowestTSCInTrace(*trace_sp, cpu_id_to_buffer.second);
          if (!tsc)
            return tsc.takeError();
          if (*tsc && (!lowest_tsc || *lowest_tsc > **tsc))
            lowest_tsc = **tsc;
        }
        return Error::success();
      });
  if (err)
    return std::move(err);
  return lowest_tsc;
}

Expected<DecodedThreadSP> TraceIntelPTMultiCpuDecoder::Decode(Thread &thread) {
  if (Error err = CorrelateContextSwitchesAndIntelPtTraces())
    return std::move(err);

  TraceIntelPTSP trace_sp = GetTrace();

  return trace_sp->GetThreadTimer(thread.GetID())
      .TimeTask("Decoding instructions", [&]() -> Expected<DecodedThreadSP> {
        auto it = m_decoded_threads.find(thread.GetID());
        if (it != m_decoded_threads.end())
          return it->second;

        DecodedThreadSP decoded_thread_sp = std::make_shared<DecodedThread>(
            thread.shared_from_this(), trace_sp->GetPerfZeroTscConversion());

        Error err = trace_sp->OnAllCpusBinaryDataRead(
            IntelPTDataKinds::kIptTrace,
            [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error {
              auto it =
                  m_continuous_executions_per_thread->find(thread.GetID());
              if (it != m_continuous_executions_per_thread->end())
                return DecodeSystemWideTraceForThread(
                    *decoded_thread_sp, *trace_sp, buffers, it->second);

              return Error::success();
            });
        if (err)
          return std::move(err);

        m_decoded_threads.try_emplace(thread.GetID(), decoded_thread_sp);
        return decoded_thread_sp;
      });
}

static Expected<std::vector<PSBBlock>> GetPSBBlocksForCPU(TraceIntelPT &trace,
                                                          cpu_id_t cpu_id) {
  std::vector<PSBBlock> psb_blocks;
  Error err = trace.OnCpuBinaryDataRead(
      cpu_id, IntelPTDataKinds::kIptTrace,
      [&](ArrayRef<uint8_t> data) -> Error {
        Expected<std::vector<PSBBlock>> split_trace =
            SplitTraceIntoPSBBlock(trace, data, /*expect_tscs=*/true);
        if (!split_trace)
          return split_trace.takeError();

        psb_blocks = std::move(*split_trace);
        return Error::success();
      });
  if (err)
    return std::move(err);
  return psb_blocks;
}

Expected<DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>>>
TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() {
  DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>>
      continuous_executions_per_thread;
  TraceIntelPTSP trace_sp = GetTrace();

  std::optional<LinuxPerfZeroTscConversion> conv_opt =
      trace_sp->GetPerfZeroTscConversion();
  if (!conv_opt)
    return createStringError(
        inconvertibleErrorCode(),
        "TSC to nanoseconds conversion values were not found");

  LinuxPerfZeroTscConversion tsc_conversion = *conv_opt;

  for (cpu_id_t cpu_id : trace_sp->GetTracedCpus()) {
    Expected<std::vector<PSBBlock>> psb_blocks =
        GetPSBBlocksForCPU(*trace_sp, cpu_id);
    if (!psb_blocks)
      return psb_blocks.takeError();

    m_total_psb_blocks += psb_blocks->size();
    // We'll be iterating through the thread continuous executions and the intel
    // pt subtraces sorted by time.
    auto it = psb_blocks->begin();
    auto on_new_thread_execution =
        [&](const ThreadContinuousExecution &thread_execution) {
          IntelPTThreadContinousExecution execution(thread_execution);

          for (; it != psb_blocks->end() &&
                 *it->tsc < thread_execution.GetEndTSC();
               it++) {
            if (*it->tsc > thread_execution.GetStartTSC()) {
              execution.psb_blocks.push_back(*it);
            } else {
              m_unattributed_psb_blocks++;
            }
          }
          continuous_executions_per_thread[thread_execution.tid].push_back(
              execution);
        };
    Error err = trace_sp->OnCpuBinaryDataRead(
        cpu_id, IntelPTDataKinds::kPerfContextSwitchTrace,
        [&](ArrayRef<uint8_t> data) -> Error {
          Expected<std::vector<ThreadContinuousExecution>> executions =
              DecodePerfContextSwitchTrace(data, cpu_id, tsc_conversion);
          if (!executions)
            return executions.takeError();
          for (const ThreadContinuousExecution &exec : *executions)
            on_new_thread_execution(exec);
          return Error::success();
        });
    if (err)
      return std::move(err);

    m_unattributed_psb_blocks += psb_blocks->end() - it;
  }
  // We now sort the executions of each thread to have them ready for
  // instruction decoding
  for (auto &tid_executions : continuous_executions_per_thread)
    std::sort(tid_executions.second.begin(), tid_executions.second.end());

  return continuous_executions_per_thread;
}

Error TraceIntelPTMultiCpuDecoder::CorrelateContextSwitchesAndIntelPtTraces() {
  if (m_setup_error)
    return createStringError(inconvertibleErrorCode(), m_setup_error->c_str());

  if (m_continuous_executions_per_thread)
    return Error::success();

  Error err = GetTrace()->GetGlobalTimer().TimeTask(
      "Context switch and Intel PT traces correlation", [&]() -> Error {
        if (auto correlation = DoCorrelateContextSwitchesAndIntelPtTraces()) {
          m_continuous_executions_per_thread.emplace(std::move(*correlation));
          return Error::success();
        } else {
          return correlation.takeError();
        }
      });
  if (err) {
    m_setup_error = toString(std::move(err));
    return createStringError(inconvertibleErrorCode(), m_setup_error->c_str());
  }
  return Error::success();
}

size_t TraceIntelPTMultiCpuDecoder::GetNumContinuousExecutionsForThread(
    lldb::tid_t tid) const {
  if (!m_continuous_executions_per_thread)
    return 0;
  auto it = m_continuous_executions_per_thread->find(tid);
  if (it == m_continuous_executions_per_thread->end())
    return 0;
  return it->second.size();
}

size_t TraceIntelPTMultiCpuDecoder::GetTotalContinuousExecutionsCount() const {
  if (!m_continuous_executions_per_thread)
    return 0;
  size_t count = 0;
  for (const auto &kv : *m_continuous_executions_per_thread)
    count += kv.second.size();
  return count;
}

size_t
TraceIntelPTMultiCpuDecoder::GePSBBlocksCountForThread(lldb::tid_t tid) const {
  if (!m_continuous_executions_per_thread)
    return 0;
  size_t count = 0;
  auto it = m_continuous_executions_per_thread->find(tid);
  if (it == m_continuous_executions_per_thread->end())
    return 0;
  for (const IntelPTThreadContinousExecution &execution : it->second)
    count += execution.psb_blocks.size();
  return count;
}

size_t TraceIntelPTMultiCpuDecoder::GetUnattributedPSBBlocksCount() const {
  return m_unattributed_psb_blocks;
}

size_t TraceIntelPTMultiCpuDecoder::GetTotalPSBBlocksCount() const {
  return m_total_psb_blocks;
}