// SPDX-License-Identifier: GPL-2.0-or-later /* * Hierarchical Budget Worst-case Fair Weighted Fair Queueing * (B-WF2Q+): hierarchical scheduling algorithm by which the BFQ I/O * scheduler schedules generic entities. The latter can represent * either single bfq queues (associated with processes) or groups of * bfq queues (associated with cgroups). */ #include "bfq-iosched.h" /** * bfq_gt - compare two timestamps. * @a: first ts. * @b: second ts. * * Return @a > @b, dealing with wrapping correctly. */ static int bfq_gt(u64 a, u64 b) { … } static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree) { … } static unsigned int bfq_class_idx(struct bfq_entity *entity) { … } unsigned int bfq_tot_busy_queues(struct bfq_data *bfqd) { … } static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, bool expiration); static bool bfq_update_parent_budget(struct bfq_entity *next_in_service); /** * bfq_update_next_in_service - update sd->next_in_service * @sd: sched_data for which to perform the update. * @new_entity: if not NULL, pointer to the entity whose activation, * requeueing or repositioning triggered the invocation of * this function. * @expiration: id true, this function is being invoked after the * expiration of the in-service entity * * This function is called to update sd->next_in_service, which, in * its turn, may change as a consequence of the insertion or * extraction of an entity into/from one of the active trees of * sd. These insertions/extractions occur as a consequence of * activations/deactivations of entities, with some activations being * 'true' activations, and other activations being requeueings (i.e., * implementing the second, requeueing phase of the mechanism used to * reposition an entity in its active tree; see comments on * __bfq_activate_entity and __bfq_requeue_entity for details). In * both the last two activation sub-cases, new_entity points to the * just activated or requeued entity. * * Returns true if sd->next_in_service changes in such a way that * entity->parent may become the next_in_service for its parent * entity. */ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, struct bfq_entity *new_entity, bool expiration) { … } #ifdef CONFIG_BFQ_GROUP_IOSCHED /* * Returns true if this budget changes may let next_in_service->parent * become the next_in_service entity for its parent entity. */ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service) { … } /* * This function tells whether entity stops being a candidate for next * service, according to the restrictive definition of the field * next_in_service. In particular, this function is invoked for an * entity that is about to be set in service. * * If entity is a queue, then the entity is no longer a candidate for * next service according to the that definition, because entity is * about to become the in-service queue. This function then returns * true if entity is a queue. * * In contrast, entity could still be a candidate for next service if * it is not a queue, and has more than one active child. In fact, * even if one of its children is about to be set in service, other * active children may still be the next to serve, for the parent * entity, even according to the above definition. As a consequence, a * non-queue entity is not a candidate for next-service only if it has * only one active child. And only if this condition holds, then this * function returns true for a non-queue entity. */ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) { … } static void bfq_inc_active_entities(struct bfq_entity *entity) { … } static void bfq_dec_active_entities(struct bfq_entity *entity) { … } #else /* CONFIG_BFQ_GROUP_IOSCHED */ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service) { return false; } static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) { return true; } static void bfq_inc_active_entities(struct bfq_entity *entity) { } static void bfq_dec_active_entities(struct bfq_entity *entity) { } #endif /* CONFIG_BFQ_GROUP_IOSCHED */ /* * Shift for timestamp calculations. This actually limits the maximum * service allowed in one timestamp delta (small shift values increase it), * the maximum total weight that can be used for the queues in the system * (big shift values increase it), and the period of virtual time * wraparounds. */ #define WFQ_SERVICE_SHIFT … struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) { … } /** * bfq_delta - map service into the virtual time domain. * @service: amount of service. * @weight: scale factor (weight of an entity or weight sum). */ static u64 bfq_delta(unsigned long service, unsigned long weight) { … } /** * bfq_calc_finish - assign the finish time to an entity. * @entity: the entity to act upon. * @service: the service to be charged to the entity. */ static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service) { … } /** * bfq_entity_of - get an entity from a node. * @node: the node field of the entity. * * Convert a node pointer to the relative entity. This is used only * to simplify the logic of some functions and not as the generic * conversion mechanism because, e.g., in the tree walking functions, * the check for a %NULL value would be redundant. */ struct bfq_entity *bfq_entity_of(struct rb_node *node) { … } /** * bfq_extract - remove an entity from a tree. * @root: the tree root. * @entity: the entity to remove. */ static void bfq_extract(struct rb_root *root, struct bfq_entity *entity) { … } /** * bfq_idle_extract - extract an entity from the idle tree. * @st: the service tree of the owning @entity. * @entity: the entity being removed. */ static void bfq_idle_extract(struct bfq_service_tree *st, struct bfq_entity *entity) { … } /** * bfq_insert - generic tree insertion. * @root: tree root. * @entity: entity to insert. * * This is used for the idle and the active tree, since they are both * ordered by finish time. */ static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) { … } /** * bfq_update_min - update the min_start field of a entity. * @entity: the entity to update. * @node: one of its children. * * This function is called when @entity may store an invalid value for * min_start due to updates to the active tree. The function assumes * that the subtree rooted at @node (which may be its left or its right * child) has a valid min_start value. */ static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node) { … } /** * bfq_update_active_node - recalculate min_start. * @node: the node to update. * * @node may have changed position or one of its children may have moved, * this function updates its min_start value. The left and right subtrees * are assumed to hold a correct min_start value. */ static void bfq_update_active_node(struct rb_node *node) { … } /** * bfq_update_active_tree - update min_start for the whole active tree. * @node: the starting node. * * @node must be the deepest modified node after an update. This function * updates its min_start using the values held by its children, assuming * that they did not change, and then updates all the nodes that may have * changed in the path to the root. The only nodes that may have changed * are the ones in the path or their siblings. */ static void bfq_update_active_tree(struct rb_node *node) { … } /** * bfq_active_insert - insert an entity in the active tree of its * group/device. * @st: the service tree of the entity. * @entity: the entity being inserted. * * The active tree is ordered by finish time, but an extra key is kept * per each node, containing the minimum value for the start times of * its children (and the node itself), so it's possible to search for * the eligible node with the lowest finish time in logarithmic time. */ static void bfq_active_insert(struct bfq_service_tree *st, struct bfq_entity *entity) { … } /** * bfq_ioprio_to_weight - calc a weight from an ioprio. * @ioprio: the ioprio value to convert. */ unsigned short bfq_ioprio_to_weight(int ioprio) { … } /** * bfq_weight_to_ioprio - calc an ioprio from a weight. * @weight: the weight value to convert. * * To preserve as much as possible the old only-ioprio user interface, * 0 is used as an escape ioprio value for weights (numerically) equal or * larger than IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF. */ static unsigned short bfq_weight_to_ioprio(int weight) { … } static void bfq_get_entity(struct bfq_entity *entity) { … } /** * bfq_find_deepest - find the deepest node that an extraction can modify. * @node: the node being removed. * * Do the first step of an extraction in an rb tree, looking for the * node that will replace @node, and returning the deepest node that * the following modifications to the tree can touch. If @node is the * last node in the tree return %NULL. */ static struct rb_node *bfq_find_deepest(struct rb_node *node) { … } /** * bfq_active_extract - remove an entity from the active tree. * @st: the service_tree containing the tree. * @entity: the entity being removed. */ static void bfq_active_extract(struct bfq_service_tree *st, struct bfq_entity *entity) { … } /** * bfq_idle_insert - insert an entity into the idle tree. * @st: the service tree containing the tree. * @entity: the entity to insert. */ static void bfq_idle_insert(struct bfq_service_tree *st, struct bfq_entity *entity) { … } /** * bfq_forget_entity - do not consider entity any longer for scheduling * @st: the service tree. * @entity: the entity being removed. * @is_in_service: true if entity is currently the in-service entity. * * Forget everything about @entity. In addition, if entity represents * a queue, and the latter is not in service, then release the service * reference to the queue (the one taken through bfq_get_entity). In * fact, in this case, there is really no more service reference to * the queue, as the latter is also outside any service tree. If, * instead, the queue is in service, then __bfq_bfqd_reset_in_service * will take care of putting the reference when the queue finally * stops being served. */ static void bfq_forget_entity(struct bfq_service_tree *st, struct bfq_entity *entity, bool is_in_service) { … } /** * bfq_put_idle_entity - release the idle tree ref of an entity. * @st: service tree for the entity. * @entity: the entity being released. */ void bfq_put_idle_entity(struct bfq_service_tree *st, struct bfq_entity *entity) { … } /** * bfq_forget_idle - update the idle tree if necessary. * @st: the service tree to act upon. * * To preserve the global O(log N) complexity we only remove one entry here; * as the idle tree will not grow indefinitely this can be done safely. */ static void bfq_forget_idle(struct bfq_service_tree *st) { … } struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity) { … } /* * Update weight and priority of entity. If update_class_too is true, * then update the ioprio_class of entity too. * * The reason why the update of ioprio_class is controlled through the * last parameter is as follows. Changing the ioprio class of an * entity implies changing the destination service trees for that * entity. If such a change occurred when the entity is already on one * of the service trees for its previous class, then the state of the * entity would become more complex: none of the new possible service * trees for the entity, according to bfq_entity_service_tree(), would * match any of the possible service trees on which the entity * is. Complex operations involving these trees, such as entity * activations and deactivations, should take into account this * additional complexity. To avoid this issue, this function is * invoked with update_class_too unset in the points in the code where * entity may happen to be on some tree. */ struct bfq_service_tree * __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, struct bfq_entity *entity, bool update_class_too) { … } /** * bfq_bfqq_served - update the scheduler status after selection for * service. * @bfqq: the queue being served. * @served: bytes to transfer. * * NOTE: this can be optimized, as the timestamps of upper level entities * are synchronized every time a new bfqq is selected for service. By now, * we keep it to better check consistency. */ void bfq_bfqq_served(struct bfq_queue *bfqq, int served) { … } /** * bfq_bfqq_charge_time - charge an amount of service equivalent to the length * of the time interval during which bfqq has been in * service. * @bfqd: the device * @bfqq: the queue that needs a service update. * @time_ms: the amount of time during which the queue has received service * * If a queue does not consume its budget fast enough, then providing * the queue with service fairness may impair throughput, more or less * severely. For this reason, queues that consume their budget slowly * are provided with time fairness instead of service fairness. This * goal is achieved through the BFQ scheduling engine, even if such an * engine works in the service, and not in the time domain. The trick * is charging these queues with an inflated amount of service, equal * to the amount of service that they would have received during their * service slot if they had been fast, i.e., if their requests had * been dispatched at a rate equal to the estimated peak rate. * * It is worth noting that time fairness can cause important * distortions in terms of bandwidth distribution, on devices with * internal queueing. The reason is that I/O requests dispatched * during the service slot of a queue may be served after that service * slot is finished, and may have a total processing time loosely * correlated with the duration of the service slot. This is * especially true for short service slots. */ void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq, unsigned long time_ms) { … } static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, struct bfq_service_tree *st, bool backshifted) { … } /** * __bfq_activate_entity - handle activation of entity. * @entity: the entity being activated. * @non_blocking_wait_rq: true if entity was waiting for a request * * Called for a 'true' activation, i.e., if entity is not active and * one of its children receives a new request. * * Basically, this function updates the timestamps of entity and * inserts entity into its active tree, after possibly extracting it * from its idle tree. */ static void __bfq_activate_entity(struct bfq_entity *entity, bool non_blocking_wait_rq) { … } /** * __bfq_requeue_entity - handle requeueing or repositioning of an entity. * @entity: the entity being requeued or repositioned. * * Requeueing is needed if this entity stops being served, which * happens if a leaf descendant entity has expired. On the other hand, * repositioning is needed if the next_inservice_entity for the child * entity has changed. See the comments inside the function for * details. * * Basically, this function: 1) removes entity from its active tree if * present there, 2) updates the timestamps of entity and 3) inserts * entity back into its active tree (in the new, right position for * the new values of the timestamps). */ static void __bfq_requeue_entity(struct bfq_entity *entity) { … } static void __bfq_activate_requeue_entity(struct bfq_entity *entity, bool non_blocking_wait_rq) { … } /** * bfq_activate_requeue_entity - activate or requeue an entity representing a * bfq_queue, and activate, requeue or reposition * all ancestors for which such an update becomes * necessary. * @entity: the entity to activate. * @non_blocking_wait_rq: true if this entity was waiting for a request * @requeue: true if this is a requeue, which implies that bfqq is * being expired; thus ALL its ancestors stop being served and must * therefore be requeued * @expiration: true if this function is being invoked in the expiration path * of the in-service queue */ static void bfq_activate_requeue_entity(struct bfq_entity *entity, bool non_blocking_wait_rq, bool requeue, bool expiration) { … } /** * __bfq_deactivate_entity - update sched_data and service trees for * entity, so as to represent entity as inactive * @entity: the entity being deactivated. * @ins_into_idle_tree: if false, the entity will not be put into the * idle tree. * * If necessary and allowed, puts entity into the idle tree. NOTE: * entity may be on no tree if in service. */ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) { … } /** * bfq_deactivate_entity - deactivate an entity representing a bfq_queue. * @entity: the entity to deactivate. * @ins_into_idle_tree: true if the entity can be put into the idle tree * @expiration: true if this function is being invoked in the expiration path * of the in-service queue */ static void bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree, bool expiration) { … } /** * bfq_calc_vtime_jump - compute the value to which the vtime should jump, * if needed, to have at least one entity eligible. * @st: the service tree to act upon. * * Assumes that st is not empty. */ static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st) { … } static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value) { … } /** * bfq_first_active_entity - find the eligible entity with * the smallest finish time * @st: the service tree to select from. * @vtime: the system virtual to use as a reference for eligibility * * This function searches the first schedulable entity, starting from the * root of the tree and going on the left every time on this side there is * a subtree with at least one eligible (start <= vtime) entity. The path on * the right is followed only if a) the left subtree contains no eligible * entities and b) no eligible entity has been found yet. */ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st, u64 vtime) { … } /** * __bfq_lookup_next_entity - return the first eligible entity in @st. * @st: the service tree. * @in_service: whether or not there is an in-service entity for the sched_data * this active tree belongs to. * * If there is no in-service entity for the sched_data st belongs to, * then return the entity that will be set in service if: * 1) the parent entity this st belongs to is set in service; * 2) no entity belonging to such parent entity undergoes a state change * that would influence the timestamps of the entity (e.g., becomes idle, * becomes backlogged, changes its budget, ...). * * In this first case, update the virtual time in @st too (see the * comments on this update inside the function). * * In contrast, if there is an in-service entity, then return the * entity that would be set in service if not only the above * conditions, but also the next one held true: the currently * in-service entity, on expiration, * 1) gets a finish time equal to the current one, or * 2) is not eligible any more, or * 3) is idle. */ static struct bfq_entity * __bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service) { … } /** * bfq_lookup_next_entity - return the first eligible entity in @sd. * @sd: the sched_data. * @expiration: true if we are on the expiration path of the in-service queue * * This function is invoked when there has been a change in the trees * for sd, and we need to know what is the new next entity to serve * after this change. */ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, bool expiration) { … } bool next_queue_may_preempt(struct bfq_data *bfqd) { … } /* * Get next queue for service. */ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) { … } /* returns true if the in-service queue gets freed */ bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) { … } void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool ins_into_idle_tree, bool expiration) { … } void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) { … } void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool expiration) { … } void bfq_add_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq) { … } void bfq_del_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq) { … } /* * Called when the bfqq no longer has requests pending, remove it from * the service tree. As a special case, it can be invoked during an * expiration. */ void bfq_del_bfqq_busy(struct bfq_queue *bfqq, bool expiration) { … } /* * Called when an inactive queue receives a new request. */ void bfq_add_bfqq_busy(struct bfq_queue *bfqq) { … }