type Candidate … type candidate … // Victims returns s.victims. func (s *candidate) Victims() *extenderv1.Victims { … } // Name returns s.name. func (s *candidate) Name() string { … } type candidateList … func newCandidateList(size int32) *candidateList { … } // add adds a new candidate to the internal array atomically. func (cl *candidateList) add(c *candidate) { … } // size returns the number of candidate stored. Note that some add() operations // might still be executing when this is called, so care must be taken to // ensure that all add() operations complete before accessing the elements of // the list. func (cl *candidateList) size() int32 { … } // get returns the internal candidate array. This function is NOT atomic and // assumes that all add() operations have been completed. func (cl *candidateList) get() []Candidate { … } type Interface … type Evaluator … // Preempt returns a PostFilterResult carrying suggested nominatedNodeName, along with a Status. // The semantics of returned <PostFilterResult, Status> varies on different scenarios: // // - <nil, Error>. This denotes it's a transient/rare error that may be self-healed in future cycles. // // - <nil, Unschedulable>. This status is mostly as expected like the preemptor is waiting for the // victims to be fully terminated. // // - In both cases above, a nil PostFilterResult is returned to keep the pod's nominatedNodeName unchanged. // // - <non-nil PostFilterResult, Unschedulable>. It indicates the pod cannot be scheduled even with preemption. // In this case, a non-nil PostFilterResult is returned and result.NominatingMode instructs how to deal with // the nominatedNodeName. // // - <non-nil PostFilterResult, Success>. It's the regular happy path // and the non-empty nominatedNodeName will be applied to the preemptor pod. func (ev *Evaluator) Preempt(ctx context.Context, pod *v1.Pod, m framework.NodeToStatusReader) (*framework.PostFilterResult, *framework.Status) { … } // FindCandidates calculates a slice of preemption candidates. // Each candidate is executable to make the given <pod> schedulable. func (ev *Evaluator) findCandidates(ctx context.Context, allNodes []*framework.NodeInfo, pod *v1.Pod, m framework.NodeToStatusReader) ([]Candidate, *framework.NodeToStatus, error) { … } // callExtenders calls given <extenders> to select the list of feasible candidates. // We will only check <candidates> with extenders that support preemption. // Extenders which do not support preemption may later prevent preemptor from being scheduled on the nominated // node. In that case, scheduler will find a different host for the preemptor in subsequent scheduling cycles. func (ev *Evaluator) callExtenders(logger klog.Logger, pod *v1.Pod, candidates []Candidate) ([]Candidate, *framework.Status) { … } // SelectCandidate chooses the best-fit candidate from given <candidates> and return it. // NOTE: This method is exported for easier testing in default preemption. func (ev *Evaluator) SelectCandidate(ctx context.Context, candidates []Candidate) Candidate { … } // prepareCandidate does some preparation work before nominating the selected candidate: // - Evict the victim pods // - Reject the victim pods if they are in waitingPod map // - Clear the low-priority pods' nominatedNodeName status if needed func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1.Pod, pluginName string) *framework.Status { … } func getPodDisruptionBudgets(pdbLister policylisters.PodDisruptionBudgetLister) ([]*policy.PodDisruptionBudget, error) { … } // pickOneNodeForPreemption chooses one node among the given nodes. // It assumes pods in each map entry are ordered by decreasing priority. // If the scoreFuns is not empty, It picks a node based on score scoreFuns returns. // If the scoreFuns is empty, // It picks a node based on the following criteria: // 1. A node with minimum number of PDB violations. // 2. A node with minimum highest priority victim is picked. // 3. Ties are broken by sum of priorities of all victims. // 4. If there are still ties, node with the minimum number of victims is picked. // 5. If there are still ties, node with the latest start time of all highest priority victims is picked. // 6. If there are still ties, the first such node is picked (sort of randomly). // The 'minNodes1' and 'minNodes2' are being reused here to save the memory // allocation and garbage collection time. func pickOneNodeForPreemption(logger klog.Logger, nodesToVictims map[string]*extenderv1.Victims, scoreFuncs []func(node string) int64) string { … } // getLowerPriorityNominatedPods returns pods whose priority is smaller than the // priority of the given "pod" and are nominated to run on the given node. // Note: We could possibly check if the nominated lower priority pods still fit // and return those that no longer fit, but that would require lots of // manipulation of NodeInfo and PreFilter state per nominated pod. It may not be // worth the complexity, especially because we generally expect to have a very // small number of nominated pods per node. func getLowerPriorityNominatedPods(logger klog.Logger, pn framework.PodNominator, pod *v1.Pod, nodeName string) []*v1.Pod { … } // DryRunPreemption simulates Preemption logic on <potentialNodes> in parallel, // returns preemption candidates and a map indicating filtered nodes statuses. // The number of candidates depends on the constraints defined in the plugin's args. In the returned list of // candidates, ones that do not violate PDB are preferred over ones that do. // NOTE: This method is exported for easier testing in default preemption. func (ev *Evaluator) DryRunPreemption(ctx context.Context, pod *v1.Pod, potentialNodes []*framework.NodeInfo, pdbs []*policy.PodDisruptionBudget, offset int32, candidatesNum int32) ([]Candidate, *framework.NodeToStatus, error) { … }