type LoopControl … const Continue … const Break … type mapIntInt … func (m mapIntInt) Clone() mapIntInt { … } func (m mapIntInt) Keys() []int { … } func (m mapIntInt) Values(keys ...int) []int { … } func sum(xs []int) int { … } func mean(xs []int) float64 { … } func standardDeviation(xs []int) float64 { … } type numaOrSocketsFirstFuncs … type numaFirst … type socketsFirst … var _ … var _ … // If NUMA nodes are higher in the memory hierarchy than sockets, then we take // from the set of NUMA Nodes as the first level. func (n *numaFirst) takeFullFirstLevel() { … } // If NUMA nodes are higher in the memory hierarchy than sockets, then we take // from the set of sockets as the second level. func (n *numaFirst) takeFullSecondLevel() { … } // If NUMA nodes are higher in the memory hierarchy than sockets, then just // sort the NUMA nodes directly, and return them. func (n *numaFirst) sortAvailableNUMANodes() []int { … } // If NUMA nodes are higher in the memory hierarchy than sockets, then we need // to pull the set of sockets out of each sorted NUMA node, and accumulate the // partial order across them. func (n *numaFirst) sortAvailableSockets() []int { … } // If NUMA nodes are higher in the memory hierarchy than sockets, then // cores sit directly below sockets in the memory hierarchy. func (n *numaFirst) sortAvailableCores() []int { … } // If sockets are higher in the memory hierarchy than NUMA nodes, then we take // from the set of sockets as the first level. func (s *socketsFirst) takeFullFirstLevel() { … } // If sockets are higher in the memory hierarchy than NUMA nodes, then we take // from the set of NUMA Nodes as the second level. func (s *socketsFirst) takeFullSecondLevel() { … } // If sockets are higher in the memory hierarchy than NUMA nodes, then we need // to pull the set of NUMA nodes out of each sorted Socket, and accumulate the // partial order across them. func (s *socketsFirst) sortAvailableNUMANodes() []int { … } // If sockets are higher in the memory hierarchy than NUMA nodes, then just // sort the sockets directly, and return them. func (s *socketsFirst) sortAvailableSockets() []int { … } // If sockets are higher in the memory hierarchy than NUMA nodes, then cores // sit directly below NUMA Nodes in the memory hierarchy. func (s *socketsFirst) sortAvailableCores() []int { … } type availableCPUSorter … type sortCPUsPacked … type sortCPUsSpread … var _ … var _ … func (s sortCPUsPacked) sort() []int { … } func (s sortCPUsSpread) sort() []int { … } type CPUSortingStrategy … const CPUSortingStrategyPacked … const CPUSortingStrategySpread … type cpuAccumulator … func newCPUAccumulator(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy) *cpuAccumulator { … } // Returns true if the supplied NUMANode is fully available in `a.details`. // "fully available" means that all the CPUs in it are free. func (a *cpuAccumulator) isNUMANodeFree(numaID int) bool { … } // Returns true if the supplied socket is fully available in `a.details`. // "fully available" means that all the CPUs in it are free. func (a *cpuAccumulator) isSocketFree(socketID int) bool { … } // Returns true if the supplied core is fully available in `a.details`. // "fully available" means that all the CPUs in it are free. func (a *cpuAccumulator) isCoreFree(coreID int) bool { … } // Returns free NUMA Node IDs as a slice sorted by sortAvailableNUMANodes(). func (a *cpuAccumulator) freeNUMANodes() []int { … } // Returns free socket IDs as a slice sorted by sortAvailableSockets(). func (a *cpuAccumulator) freeSockets() []int { … } // Returns free core IDs as a slice sorted by sortAvailableCores(). func (a *cpuAccumulator) freeCores() []int { … } // Returns free CPU IDs as a slice sorted by sortAvailableCPUsPacked(). func (a *cpuAccumulator) freeCPUs() []int { … } // Sorts the provided list of NUMA nodes/sockets/cores/cpus referenced in 'ids' // by the number of available CPUs contained within them (smallest to largest). // The 'getCPU()' parameter defines the function that should be called to // retrieve the list of available CPUs for the type being referenced. If two // NUMA nodes/sockets/cores/cpus have the same number of available CPUs, they // are sorted in ascending order by their id. func (a *cpuAccumulator) sort(ids []int, getCPUs func(ids ...int) cpuset.CPUSet) { … } // Sort all NUMA nodes with at least one free CPU. // // If NUMA nodes are higher than sockets in the memory hierarchy, they are sorted by ascending number // of free CPUs that they contain. "higher than sockets in the memory hierarchy" means that NUMA nodes // contain a bigger number of CPUs (free and busy) than sockets, or equivalently that each NUMA node // contains more than one socket. // // If instead NUMA nodes are lower in the memory hierarchy than sockets, they are sorted as follows. // First, they are sorted by number of free CPUs in the sockets that contain them. Then, for each // socket they are sorted by number of free CPUs that they contain. The order is always ascending. // In other words, the relative order of two NUMA nodes is determined as follows: // 1. If the two NUMA nodes belong to different sockets, the NUMA node in the socket with the // smaller amount of free CPUs appears first. // 2. If the two NUMA nodes belong to the same socket, the NUMA node with the smaller amount of free // CPUs appears first. func (a *cpuAccumulator) sortAvailableNUMANodes() []int { … } // Sort all sockets with at least one free CPU. // // If sockets are higher than NUMA nodes in the memory hierarchy, they are sorted by ascending number // of free CPUs that they contain. "higher than NUMA nodes in the memory hierarchy" means that // sockets contain a bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each // socket contains more than one NUMA node. // // If instead sockets are lower in the memory hierarchy than NUMA nodes, they are sorted as follows. // First, they are sorted by number of free CPUs in the NUMA nodes that contain them. Then, for each // NUMA node they are sorted by number of free CPUs that they contain. The order is always ascending. // In other words, the relative order of two sockets is determined as follows: // 1. If the two sockets belong to different NUMA nodes, the socket in the NUMA node with the // smaller amount of free CPUs appears first. // 2. If the two sockets belong to the same NUMA node, the socket with the smaller amount of free // CPUs appears first. func (a *cpuAccumulator) sortAvailableSockets() []int { … } // Sort all cores with at least one free CPU. // // If sockets are higher in the memory hierarchy than NUMA nodes, meaning that sockets contain a // bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each socket contains // more than one NUMA node, the cores are sorted as follows. First, they are sorted by number of // free CPUs that their sockets contain. Then, for each socket, the cores in it are sorted by number // of free CPUs that their NUMA nodes contain. Then, for each NUMA node, the cores in it are sorted // by number of free CPUs that they contain. The order is always ascending. In other words, the // relative order of two cores is determined as follows: // 1. If the two cores belong to different sockets, the core in the socket with the smaller amount of // free CPUs appears first. // 2. If the two cores belong to the same socket but different NUMA nodes, the core in the NUMA node // with the smaller amount of free CPUs appears first. // 3. If the two cores belong to the same NUMA node and socket, the core with the smaller amount of // free CPUs appears first. // // If instead NUMA nodes are higher in the memory hierarchy than sockets, the sorting happens in the // same way as described in the previous paragraph, except that the priority of NUMA nodes and // sockets is inverted (e.g. first sort the cores by number of free CPUs in their NUMA nodes, then, // for each NUMA node, sort the cores by number of free CPUs in their sockets, etc...). func (a *cpuAccumulator) sortAvailableCores() []int { … } // Sort all free CPUs. // // If sockets are higher in the memory hierarchy than NUMA nodes, meaning that sockets contain a // bigger number of CPUs (free and busy) than NUMA nodes, or equivalently that each socket contains // more than one NUMA node, the CPUs are sorted as follows. First, they are sorted by number of // free CPUs that their sockets contain. Then, for each socket, the CPUs in it are sorted by number // of free CPUs that their NUMA nodes contain. Then, for each NUMA node, the CPUs in it are sorted // by number of free CPUs that their cores contain. Finally, for each core, the CPUs in it are // sorted by numerical ID. The order is always ascending. In other words, the relative order of two // CPUs is determined as follows: // 1. If the two CPUs belong to different sockets, the CPU in the socket with the smaller amount of // free CPUs appears first. // 2. If the two CPUs belong to the same socket but different NUMA nodes, the CPU in the NUMA node // with the smaller amount of free CPUs appears first. // 3. If the two CPUs belong to the same socket and NUMA node but different cores, the CPU in the // core with the smaller amount of free CPUs appears first. // 4. If the two CPUs belong to the same NUMA node, socket, and core, the CPU with the smaller ID // appears first. // // If instead NUMA nodes are higher in the memory hierarchy than sockets, the sorting happens in the // same way as described in the previous paragraph, except that the priority of NUMA nodes and // sockets is inverted (e.g. first sort the CPUs by number of free CPUs in their NUMA nodes, then, // for each NUMA node, sort the CPUs by number of free CPUs in their sockets, etc...). func (a *cpuAccumulator) sortAvailableCPUsPacked() []int { … } // Sort all available CPUs: // - First by core using sortAvailableSockets(). // - Then within each socket, sort cpus directly using the sort() algorithm defined above. func (a *cpuAccumulator) sortAvailableCPUsSpread() []int { … } func (a *cpuAccumulator) take(cpus cpuset.CPUSet) { … } func (a *cpuAccumulator) takeFullNUMANodes() { … } func (a *cpuAccumulator) takeFullSockets() { … } func (a *cpuAccumulator) takeFullCores() { … } func (a *cpuAccumulator) takeRemainingCPUs() { … } // rangeNUMANodesNeededToSatisfy returns minimum and maximum (in this order) number of NUMA nodes // needed to satisfy the cpuAccumulator's goal of accumulating `a.numCPUsNeeded` CPUs, assuming that // CPU groups have size given by the `cpuGroupSize` argument. func (a *cpuAccumulator) rangeNUMANodesNeededToSatisfy(cpuGroupSize int) (minNumNUMAs, maxNumNUMAs int) { … } // needsAtLeast returns true if and only if the accumulator needs at least `n` CPUs. // This means that needsAtLeast returns true even if more than `n` CPUs are needed. func (a *cpuAccumulator) needsAtLeast(n int) bool { … } // isSatisfied returns true if and only if the accumulator has all the CPUs it needs. func (a *cpuAccumulator) isSatisfied() bool { … } // isFailed returns true if and only if there aren't enough available CPUs in the system. // (e.g. the accumulator needs 4 CPUs but only 3 are available). func (a *cpuAccumulator) isFailed() bool { … } // iterateCombinations walks through all n-choose-k subsets of size k in n and // calls function 'f()' on each subset. For example, if n={0,1,2}, and k=2, // then f() will be called on the subsets {0,1}, {0,2}. and {1,2}. If f() ever // returns 'Break', we break early and exit the loop. func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopControl) { … } // takeByTopologyNUMAPacked returns a CPUSet containing `numCPUs` CPUs taken from the CPUs in the // set `availableCPUs`. `topo` describes how the CPUs are arranged between sockets, NUMA nodes // and physical cores (if hyperthreading is on a "CPU" is a thread rather than a full physical // core). // // If sockets are higher than NUMA nodes in the memory hierarchy (i.e. a socket contains more than // one NUMA node), the CPUs are selected as follows. // // If `numCPUs` is bigger than the total number of CPUs in a socket, and there are free (i.e. all // CPUs in them are free) sockets, the function takes as many entire free sockets as possible. // If there are no free sockets, or `numCPUs` is less than a whole socket, or the remaining number // of CPUs to take after having taken some whole sockets is less than a whole socket, the function // tries to take whole NUMA nodes. // // If the remaining number of CPUs to take is bigger than the total number of CPUs in a NUMA node, // and there are free (i.e. all CPUs in them are free) NUMA nodes, the function takes as many entire // free NUMA nodes as possible. The free NUMA nodes are taken from one socket at a time, and the // sockets are considered by ascending order of free CPUs in them. If there are no free NUMA nodes, // or the remaining number of CPUs to take after having taken full sockets and NUMA nodes is less // than a whole NUMA node, the function tries to take whole physical cores (cores). // // If `numCPUs` is bigger than the total number of CPUs in a core, and there are // free (i.e. all CPUs in them are free) cores, the function takes as many entire free cores as possible. // The cores are taken from one socket at a time, and the sockets are considered by // ascending order of free CPUs in them. For a given socket, the cores are taken one NUMA node at a time, // and the NUMA nodes are considered by ascending order of free CPUs in them. If there are no free // cores, or the remaining number of CPUs to take after having taken full sockets, NUMA nodes and // cores is less than a whole core, the function tries to take individual CPUs. // // The individual CPUs are taken from one socket at a time, and the sockets are considered by // ascending order of free CPUs in them. For a given socket, the CPUs are taken one NUMA node at a time, // and the NUMA nodes are considered by ascending order of free CPUs in them. For a given NUMA node, the // CPUs are taken one core at a time, and the core are considered by ascending order of free CPUs in them. // // If NUMA nodes are higher than Sockets in the memory hierarchy (i.e. a NUMA node contains more // than one socket), the CPUs are selected as written above, with the only differences being that // (1) the order with which full sockets and full NUMA nodes are acquired is swapped, and (2) the // order with which lower-level topology elements are selected is also swapped accordingly. E.g. // when selecting full cores, the cores are selected starting from the ones in the NUMA node with // the least amount of free CPUs to the one with the highest amount of free CPUs (i.e. in ascending // order of free CPUs). For any NUMA node, the cores are selected from the ones in the socket with // the least amount of free CPUs to the one with the highest amount of free CPUs. func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy) (cpuset.CPUSet, error) { … } // takeByTopologyNUMADistributed returns a CPUSet of size 'numCPUs'. // // It generates this CPUset by allocating CPUs from 'availableCPUs' according // to the algorithm outlined in KEP-2902: // // https://github.com/kubernetes/enhancements/tree/e7f51ffbe2ee398ffd1fba4a6d854f276bfad9fb/keps/sig-node/2902-cpumanager-distribute-cpus-policy-option // // This algorithm evenly distribute CPUs across NUMA nodes in cases where more // than one NUMA node is required to satisfy the allocation. This is in // contrast to the takeByTopologyNUMAPacked algorithm, which attempts to 'pack' // CPUs onto NUMA nodes and fill them up before moving on to the next one. // // At a high-level this algorithm can be summarized as: // // For each NUMA single node: // - If all requested CPUs can be allocated from this NUMA node; // --> Do the allocation by running takeByTopologyNUMAPacked() over the // available CPUs in that NUMA node and return // // Otherwise, for each pair of NUMA nodes: // - If the set of requested CPUs (modulo 2) can be evenly split across // the 2 NUMA nodes; AND // - Any remaining CPUs (after the modulo operation) can be striped across // some subset of the NUMA nodes; // --> Do the allocation by running takeByTopologyNUMAPacked() over the // available CPUs in both NUMA nodes and return // // Otherwise, for each 3-tuple of NUMA nodes: // - If the set of requested CPUs (modulo 3) can be evenly distributed // across the 3 NUMA nodes; AND // - Any remaining CPUs (after the modulo operation) can be striped across // some subset of the NUMA nodes; // --> Do the allocation by running takeByTopologyNUMAPacked() over the // available CPUs in all three NUMA nodes and return // // ... // // Otherwise, for the set of all NUMA nodes: // - If the set of requested CPUs (modulo NUM_NUMA_NODES) can be evenly // distributed across all NUMA nodes; AND // - Any remaining CPUs (after the modulo operation) can be striped across // some subset of the NUMA nodes; // --> Do the allocation by running takeByTopologyNUMAPacked() over the // available CPUs in all NUMA nodes and return // // If none of the above conditions can be met, then resort back to a // best-effort fit of packing CPUs into NUMA nodes by calling // takeByTopologyNUMAPacked() over all available CPUs. // // NOTE: A "balance score" will be calculated to help find the best subset of // NUMA nodes to allocate any 'remainder' CPUs from (in cases where the total // number of CPUs to allocate cannot be evenly distributed across the chosen // set of NUMA nodes). This "balance score" is calculated as the standard // deviation of how many CPUs will be available on each NUMA node after all // evenly distributed and remainder CPUs are allocated. The subset with the // lowest "balance score" will receive the CPUs in order to keep the overall // allocation of CPUs as "balanced" as possible. // // NOTE: This algorithm has been generalized to take an additional // 'cpuGroupSize' parameter to ensure that CPUs are always allocated in groups // of size 'cpuGroupSize' according to the algorithm described above. This is // important, for example, to ensure that all CPUs (i.e. all hyperthreads) from // a single core are allocated together. func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int, cpuSortingStrategy CPUSortingStrategy) (cpuset.CPUSet, error) { … }