// SPDX-License-Identifier: GPL-2.0
/*
* dlfilter-show-cycles.c: Print the number of cycles at the start of each line
* Copyright (c) 2021, Intel Corporation.
*/
#include <perf/perf_dlfilter.h>
#include <string.h>
#include <stdio.h>
#define MAX_CPU 4096
enum {
INSTR_CYC,
BRNCH_CYC,
OTHER_CYC,
MAX_ENTRY
};
static __u64 cycles[MAX_CPU][MAX_ENTRY];
static __u64 cycles_rpt[MAX_CPU][MAX_ENTRY];
#define BITS 16
#define TABLESZ (1 << BITS)
#define TABLEMAX (TABLESZ / 2)
#define MASK (TABLESZ - 1)
static struct entry {
__u32 used;
__s32 tid;
__u64 cycles[MAX_ENTRY];
__u64 cycles_rpt[MAX_ENTRY];
} table[TABLESZ];
static int tid_cnt;
static int event_entry(const char *event)
{
if (!event)
return OTHER_CYC;
if (!strncmp(event, "instructions", 12))
return INSTR_CYC;
if (!strncmp(event, "branches", 8))
return BRNCH_CYC;
return OTHER_CYC;
}
static struct entry *find_entry(__s32 tid)
{
__u32 pos = tid & MASK;
struct entry *e;
e = &table[pos];
while (e->used) {
if (e->tid == tid)
return e;
if (++pos == TABLESZ)
pos = 0;
e = &table[pos];
}
if (tid_cnt >= TABLEMAX) {
fprintf(stderr, "Too many threads\n");
return NULL;
}
tid_cnt += 1;
e->used = 1;
e->tid = tid;
return e;
}
static void add_entry(__s32 tid, int pos, __u64 cnt)
{
struct entry *e = find_entry(tid);
if (e)
e->cycles[pos] += cnt;
}
int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
{
__s32 cpu = sample->cpu;
__s32 tid = sample->tid;
int pos;
if (!sample->cyc_cnt)
return 0;
pos = event_entry(sample->event);
if (cpu >= 0 && cpu < MAX_CPU)
cycles[cpu][pos] += sample->cyc_cnt;
else if (tid != -1)
add_entry(tid, pos, sample->cyc_cnt);
return 0;
}
static void print_vals(__u64 cycles, __u64 delta)
{
if (delta)
printf("%10llu %10llu ", (unsigned long long)cycles, (unsigned long long)delta);
else
printf("%10llu %10s ", (unsigned long long)cycles, "");
}
int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
{
__s32 cpu = sample->cpu;
__s32 tid = sample->tid;
int pos;
pos = event_entry(sample->event);
if (cpu >= 0 && cpu < MAX_CPU) {
print_vals(cycles[cpu][pos], cycles[cpu][pos] - cycles_rpt[cpu][pos]);
cycles_rpt[cpu][pos] = cycles[cpu][pos];
return 0;
}
if (tid != -1) {
struct entry *e = find_entry(tid);
if (e) {
print_vals(e->cycles[pos], e->cycles[pos] - e->cycles_rpt[pos]);
e->cycles_rpt[pos] = e->cycles[pos];
return 0;
}
}
printf("%22s", "");
return 0;
}
const char *filter_description(const char **long_description)
{
static char *long_desc = "Cycle counts are accumulated per CPU (or "
"per thread if CPU is not recorded) from IPC information, and "
"printed together with the change since the last print, at the "
"start of each line. Separate counts are kept for branches, "
"instructions or other events.";
*long_description = long_desc;
return "Print the number of cycles at the start of each line";
}