// SPDX-License-Identifier: GPL-2.0
/*
* fill_buf benchmark
*
* Copyright (C) 2018 Intel Corporation
*
* Authors:
* Sai Praneeth Prakhya <[email protected]>,
* Fenghua Yu <[email protected]>
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <inttypes.h>
#include <string.h>
#include "resctrl.h"
#define CL_SIZE (64)
#define PAGE_SIZE (4 * 1024)
#define MB (1024 * 1024)
static void sb(void)
{
#if defined(__i386) || defined(__x86_64)
asm volatile("sfence\n\t"
: : : "memory");
#endif
}
static void cl_flush(void *p)
{
#if defined(__i386) || defined(__x86_64)
asm volatile("clflush (%0)\n\t"
: : "r"(p) : "memory");
#endif
}
void mem_flush(unsigned char *buf, size_t buf_size)
{
unsigned char *cp = buf;
size_t i = 0;
buf_size = buf_size / CL_SIZE; /* mem size in cache lines */
for (i = 0; i < buf_size; i++)
cl_flush(&cp[i * CL_SIZE]);
sb();
}
/*
* Buffer index step advance to workaround HW prefetching interfering with
* the measurements.
*
* Must be a prime to step through all indexes of the buffer.
*
* Some primes work better than others on some architectures (from MBA/MBM
* result stability point of view).
*/
#define FILL_IDX_MULT 23
static int fill_one_span_read(unsigned char *buf, size_t buf_size)
{
unsigned int size = buf_size / (CL_SIZE / 2);
unsigned int i, idx = 0;
unsigned char sum = 0;
/*
* Read the buffer in an order that is unexpected by HW prefetching
* optimizations to prevent them interfering with the caching pattern.
*
* The read order is (in terms of halves of cachelines):
* i * FILL_IDX_MULT % size
* The formula is open-coded below to avoiding modulo inside the loop
* as it improves MBA/MBM result stability on some architectures.
*/
for (i = 0; i < size; i++) {
sum += buf[idx * (CL_SIZE / 2)];
idx += FILL_IDX_MULT;
while (idx >= size)
idx -= size;
}
return sum;
}
static void fill_one_span_write(unsigned char *buf, size_t buf_size)
{
unsigned char *end_ptr = buf + buf_size;
unsigned char *p;
p = buf;
while (p < end_ptr) {
*p = '1';
p += (CL_SIZE / 2);
}
}
void fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
{
int ret = 0;
while (1) {
ret = fill_one_span_read(buf, buf_size);
if (once)
break;
}
/* Consume read result so that reading memory is not optimized out. */
*value_sink = ret;
}
static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once)
{
while (1) {
fill_one_span_write(buf, buf_size);
if (once)
break;
}
}
unsigned char *alloc_buffer(size_t buf_size, int memflush)
{
void *buf = NULL;
uint64_t *p64;
size_t s64;
int ret;
ret = posix_memalign(&buf, PAGE_SIZE, buf_size);
if (ret < 0)
return NULL;
/* Initialize the buffer */
p64 = buf;
s64 = buf_size / sizeof(uint64_t);
while (s64 > 0) {
*p64 = (uint64_t)rand();
p64 += (CL_SIZE / sizeof(uint64_t));
s64 -= (CL_SIZE / sizeof(uint64_t));
}
/* Flush the memory before using to avoid "cache hot pages" effect */
if (memflush)
mem_flush(buf, buf_size);
return buf;
}
int run_fill_buf(size_t buf_size, int memflush, int op, bool once)
{
unsigned char *buf;
buf = alloc_buffer(buf_size, memflush);
if (!buf)
return -1;
if (op == 0)
fill_cache_read(buf, buf_size, once);
else
fill_cache_write(buf, buf_size, once);
free(buf);
return 0;
}