// SPDX-License-Identifier: GPL-2.0-only #include "cgroup-internal.h" #include <linux/sched/cputime.h> #include <linux/bpf.h> #include <linux/btf.h> #include <linux/btf_ids.h> #include <trace/events/cgroup.h> static DEFINE_SPINLOCK(cgroup_rstat_lock); static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock); static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu); static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu) { … } /* * Helper functions for rstat per CPU lock (cgroup_rstat_cpu_lock). * * This makes it easier to diagnose locking issues and contention in * production environments. The parameter @fast_path determine the * tracepoints being added, allowing us to diagnose "flush" related * operations without handling high-frequency fast-path "update" events. */ static __always_inline unsigned long _cgroup_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu, struct cgroup *cgrp, const bool fast_path) { … } static __always_inline void _cgroup_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu, struct cgroup *cgrp, unsigned long flags, const bool fast_path) { … } /** * cgroup_rstat_updated - keep track of updated rstat_cpu * @cgrp: target cgroup * @cpu: cpu on which rstat_cpu was updated * * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching * rstat_cpu->updated_children list. See the comment on top of * cgroup_rstat_cpu definition for details. */ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) { … } /** * cgroup_rstat_push_children - push children cgroups into the given list * @head: current head of the list (= subtree root) * @child: first child of the root * @cpu: target cpu * Return: A new singly linked list of cgroups to be flush * * Iteratively traverse down the cgroup_rstat_cpu updated tree level by * level and push all the parents first before their next level children * into a singly linked list built from the tail backward like "pushing" * cgroups into a stack. The root is pushed by the caller. */ static struct cgroup *cgroup_rstat_push_children(struct cgroup *head, struct cgroup *child, int cpu) { … } /** * cgroup_rstat_updated_list - return a list of updated cgroups to be flushed * @root: root of the cgroup subtree to traverse * @cpu: target cpu * Return: A singly linked list of cgroups to be flushed * * Walks the updated rstat_cpu tree on @cpu from @root. During traversal, * each returned cgroup is unlinked from the updated tree. * * The only ordering guarantee is that, for a parent and a child pair * covered by a given traversal, the child is before its parent in * the list. * * Note that updated_children is self terminated and points to a list of * child cgroups if not empty. Whereas updated_next is like a sibling link * within the children list and terminated by the parent cgroup. An exception * here is the cgroup root whose updated_next can be self terminated. */ static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu) { … } /* * A hook for bpf stat collectors to attach to and flush their stats. * Together with providing bpf kfuncs for cgroup_rstat_updated() and * cgroup_rstat_flush(), this enables a complete workflow where bpf progs that * collect cgroup stats can integrate with rstat for efficient flushing. * * A static noinline declaration here could cause the compiler to optimize away * the function. A global noinline declaration will keep the definition, but may * optimize away the callsite. Therefore, __weak is needed to ensure that the * call is still emitted, by telling the compiler that we don't know what the * function might eventually be. */ __bpf_hook_start(); __weak noinline void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu) { … } __bpf_hook_end(); /* * Helper functions for locking cgroup_rstat_lock. * * This makes it easier to diagnose locking issues and contention in * production environments. The parameter @cpu_in_loop indicate lock * was released and re-taken when collection data from the CPUs. The * value -1 is used when obtaining the main lock else this is the CPU * number processed last. */ static inline void __cgroup_rstat_lock(struct cgroup *cgrp, int cpu_in_loop) __acquires(&cgroup_rstat_lock) { … } static inline void __cgroup_rstat_unlock(struct cgroup *cgrp, int cpu_in_loop) __releases(&cgroup_rstat_lock) { … } /* see cgroup_rstat_flush() */ static void cgroup_rstat_flush_locked(struct cgroup *cgrp) __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock) { … } /** * cgroup_rstat_flush - flush stats in @cgrp's subtree * @cgrp: target cgroup * * Collect all per-cpu stats in @cgrp's subtree into the global counters * and propagate them upwards. After this function returns, all cgroups in * the subtree have up-to-date ->stat. * * This also gets all cgroups in the subtree including @cgrp off the * ->updated_children lists. * * This function may block. */ __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp) { … } /** * cgroup_rstat_flush_hold - flush stats in @cgrp's subtree and hold * @cgrp: target cgroup * * Flush stats in @cgrp's subtree and prevent further flushes. Must be * paired with cgroup_rstat_flush_release(). * * This function may block. */ void cgroup_rstat_flush_hold(struct cgroup *cgrp) __acquires(&cgroup_rstat_lock) { … } /** * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold() * @cgrp: cgroup used by tracepoint */ void cgroup_rstat_flush_release(struct cgroup *cgrp) __releases(&cgroup_rstat_lock) { … } int cgroup_rstat_init(struct cgroup *cgrp) { … } void cgroup_rstat_exit(struct cgroup *cgrp) { … } void __init cgroup_rstat_boot(void) { … } /* * Functions for cgroup basic resource statistics implemented on top of * rstat. */ static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat, struct cgroup_base_stat *src_bstat) { … } static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat, struct cgroup_base_stat *src_bstat) { … } static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) { … } static struct cgroup_rstat_cpu * cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags) { … } static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, struct cgroup_rstat_cpu *rstatc, unsigned long flags) { … } void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) { … } void __cgroup_account_cputime_field(struct cgroup *cgrp, enum cpu_usage_stat index, u64 delta_exec) { … } /* * compute the cputime for the root cgroup by getting the per cpu data * at a global level, then categorizing the fields in a manner consistent * with how it is done by __cgroup_account_cputime_field for each bit of * cpu time attributed to a cgroup. */ static void root_cgroup_cputime(struct cgroup_base_stat *bstat) { … } static void cgroup_force_idle_show(struct seq_file *seq, struct cgroup_base_stat *bstat) { … } void cgroup_base_stat_cputime_show(struct seq_file *seq) { … } /* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */ BTF_KFUNCS_START(bpf_rstat_kfunc_ids) BTF_ID_FLAGS(…) BTF_ID_FLAGS(…) BTF_KFUNCS_END(…) static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = …; static int __init bpf_rstat_kfunc_init(void) { … } late_initcall(bpf_rstat_kfunc_init);