/* * Copyright 2012-2014 Ecole Normale Superieure * Copyright 2014 INRIA Rocquencourt * * Use of this software is governed by the MIT license * * Written by Sven Verdoolaege, * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France * and Inria Paris - Rocquencourt, Domaine de Voluceau - Rocquencourt, * B.P. 105 - 78153 Le Chesnay, France */ #include <limits.h> #include <isl/id.h> #include <isl/val.h> #include <isl/space.h> #include <isl/aff.h> #include <isl/constraint.h> #include <isl/set.h> #include <isl/ilp.h> #include <isl/union_set.h> #include <isl/union_map.h> #include <isl/schedule_node.h> #include <isl/options.h> #include <isl_sort.h> #include <isl_tarjan.h> #include <isl_ast_private.h> #include <isl_ast_build_expr.h> #include <isl_ast_build_private.h> #include <isl_ast_graft_private.h> /* Try and reduce the number of disjuncts in the representation of "set", * without dropping explicit representations of local variables. */ static __isl_give isl_set *isl_set_coalesce_preserve(__isl_take isl_set *set) { … } /* Data used in generate_domain. * * "build" is the input build. * "list" collects the results. */ struct isl_generate_domain_data { … }; static __isl_give isl_ast_graft_list *generate_next_level( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build); static __isl_give isl_ast_graft_list *generate_code( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build, int internal); /* Generate an AST for a single domain based on * the (non single valued) inverse schedule "executed". * * We extend the schedule with the iteration domain * and continue generating through a call to generate_code. * * In particular, if executed has the form * * S -> D * * then we continue generating code on * * [S -> D] -> D * * The extended inverse schedule is clearly single valued * ensuring that the nested generate_code will not reach this function, * but will instead create calls to all elements of D that need * to be executed from the current schedule domain. */ static isl_stat generate_non_single_valued(__isl_take isl_map *executed, struct isl_generate_domain_data *data) { … } /* Call the at_each_domain callback, if requested by the user, * after recording the current inverse schedule in the build. */ static __isl_give isl_ast_graft *at_each_domain(__isl_take isl_ast_graft *graft, __isl_keep isl_map *executed, __isl_keep isl_ast_build *build) { … } /* Generate a call expression for the single executed * domain element "map" and put a guard around it based its (simplified) * domain. "executed" is the original inverse schedule from which "map" * has been derived. In particular, "map" is either identical to "executed" * or it is the result of gisting "executed" with respect to the build domain. * "executed" is only used if there is an at_each_domain callback. * * At this stage, any pending constraints in the build can no longer * be simplified with respect to any enforced constraints since * the call node does not have any enforced constraints. * Since all pending constraints not covered by any enforced constraints * will be added as a guard to the graft in create_node_scaled, * even in the eliminated case, the pending constraints * can be considered to have been generated by outer constructs. * * If the user has set an at_each_domain callback, it is called * on the constructed call expression node. */ static isl_stat add_domain(__isl_take isl_map *executed, __isl_take isl_map *map, struct isl_generate_domain_data *data) { … } /* Generate an AST for a single domain based on * the inverse schedule "executed" and add it to data->list. * * If there is more than one domain element associated to the current * schedule "time", then we need to continue the generation process * in generate_non_single_valued. * Note that the inverse schedule being single-valued may depend * on constraints that are only available in the original context * domain specified by the user. We therefore first introduce * some of the constraints of data->build->domain. In particular, * we intersect with a single-disjunct approximation of this set. * We perform this approximation to avoid further splitting up * the executed relation, possibly introducing a disjunctive guard * on the statement. * * On the other hand, we only perform the test after having taken the gist * of the domain as the resulting map is the one from which the call * expression is constructed. Using this map to construct the call * expression usually yields simpler results in cases where the original * map is not obviously single-valued. * If the original map is obviously single-valued, then the gist * operation is skipped. * * Because we perform the single-valuedness test on the gisted map, * we may in rare cases fail to recognize that the inverse schedule * is single-valued. This becomes problematic if this happens * from the recursive call through generate_non_single_valued * as we would then end up in an infinite recursion. * We therefore check if we are inside a call to generate_non_single_valued * and revert to the ungisted map if the gisted map turns out not to be * single-valued. * * Otherwise, call add_domain to generate a call expression (with guard) and * to call the at_each_domain callback, if any. */ static isl_stat generate_domain(__isl_take isl_map *executed, void *user) { … } /* Call build->create_leaf to a create "leaf" node in the AST, * encapsulate the result in an isl_ast_graft and return the result * as a 1-element list. * * Note that the node returned by the user may be an entire tree. * * Since the node itself cannot enforce any constraints, we turn * all pending constraints into guards and add them to the resulting * graft to ensure that they will be generated. * * Before we pass control to the user, we first clear some information * from the build that is (presumbably) only meaningful * for the current code generation. * This includes the create_leaf callback itself, so we make a copy * of the build first. */ static __isl_give isl_ast_graft_list *call_create_leaf( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) { … } static __isl_give isl_ast_graft_list *build_ast_from_child( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed); /* Generate an AST after having handled the complete schedule * of this call to the code generator or the complete band * if we are generating an AST from a schedule tree. * * If we are inside a band node, then move on to the child of the band. * * If the user has specified a create_leaf callback, control * is passed to the user in call_create_leaf. * * Otherwise, we generate one or more calls for each individual * domain in generate_domain. */ static __isl_give isl_ast_graft_list *generate_inner_level( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) { … } /* Call the before_each_for callback, if requested by the user. */ static __isl_give isl_ast_node *before_each_for(__isl_take isl_ast_node *node, __isl_keep isl_ast_build *build) { … } /* Call the after_each_for callback, if requested by the user. */ static __isl_give isl_ast_graft *after_each_for(__isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build) { … } /* Plug in all the know values of the current and outer dimensions * in the domain of "executed". In principle, we only need to plug * in the known value of the current dimension since the values of * outer dimensions have been plugged in already. * However, it turns out to be easier to just plug in all known values. */ static __isl_give isl_union_map *plug_in_values( __isl_take isl_union_map *executed, __isl_keep isl_ast_build *build) { … } /* Check if the constraint "c" is a lower bound on dimension "pos", * an upper bound, or independent of dimension "pos". */ static int constraint_type(isl_constraint *c, int pos) { … } /* Compare the types of the constraints "a" and "b", * resulting in constraints that are independent of "depth" * to be sorted before the lower bounds on "depth", which in * turn are sorted before the upper bounds on "depth". */ static int cmp_constraint(__isl_keep isl_constraint *a, __isl_keep isl_constraint *b, void *user) { … } /* Extract a lower bound on dimension "pos" from constraint "c". * * If the constraint is of the form * * a x + f(...) >= 0 * * then we essentially return * * l = ceil(-f(...)/a) * * However, if the current dimension is strided, then we need to make * sure that the lower bound we construct is of the form * * f + s a * * with f the offset and s the stride. * We therefore compute * * f + s * ceil((l - f)/s) */ static __isl_give isl_aff *lower_bound(__isl_keep isl_constraint *c, int pos, __isl_keep isl_ast_build *build) { … } /* Return the exact lower bound (or upper bound if "upper" is set) * of "domain" as a piecewise affine expression. * * If we are computing a lower bound (of a strided dimension), then * we need to make sure it is of the form * * f + s a * * where f is the offset and s is the stride. * We therefore need to include the stride constraint before computing * the minimum. */ static __isl_give isl_pw_aff *exact_bound(__isl_keep isl_set *domain, __isl_keep isl_ast_build *build, int upper) { … } /* Callback for sorting the isl_pw_aff_list passed to reduce_list and * remove_redundant_lower_bounds. */ static int reduce_list_cmp(__isl_keep isl_pw_aff *a, __isl_keep isl_pw_aff *b, void *user) { … } /* Given a list of lower bounds "list", remove those that are redundant * with respect to the other bounds in "list" and the domain of "build". * * We first sort the bounds in the same way as they would be sorted * by set_for_node_expressions so that we can try and remove the last * bounds first. * * For a lower bound to be effective, there needs to be at least * one domain element for which it is larger than all other lower bounds. * For each lower bound we therefore intersect the domain with * the conditions that it is larger than all other bounds and * check whether the result is empty. If so, the bound can be removed. */ static __isl_give isl_pw_aff_list *remove_redundant_lower_bounds( __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build) { … } /* Extract a lower bound on dimension "pos" from each constraint * in "constraints" and return the list of lower bounds. * If "constraints" has zero elements, then we extract a lower bound * from "domain" instead. * * If the current dimension is strided, then the lower bound * is adjusted by lower_bound to match the stride information. * This modification may make one or more lower bounds redundant * with respect to the other lower bounds. We therefore check * for this condition and remove the redundant lower bounds. */ static __isl_give isl_pw_aff_list *lower_bounds( __isl_keep isl_constraint_list *constraints, int pos, __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) { … } /* Extract an upper bound on dimension "pos" from each constraint * in "constraints" and return the list of upper bounds. * If "constraints" has zero elements, then we extract an upper bound * from "domain" instead. */ static __isl_give isl_pw_aff_list *upper_bounds( __isl_keep isl_constraint_list *constraints, int pos, __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) { … } /* Return an isl_ast_expr that performs the reduction of type "type" * on AST expressions corresponding to the elements in "list". * * The list is assumed to contain at least one element. * If the list contains exactly one element, then the returned isl_ast_expr * simply computes that affine expression. * If the list contains more than one element, then we sort it * using a fairly arbitrary but hopefully reasonably stable order. */ static __isl_give isl_ast_expr *reduce_list(enum isl_ast_expr_op_type type, __isl_keep isl_pw_aff_list *list, __isl_keep isl_ast_build *build) { … } /* Add guards implied by the "generated constraints", * but not (necessarily) enforced by the generated AST to "guard". * In particular, if there is any stride constraints, * then add the guard implied by those constraints. * If we have generated a degenerate loop, then add the guard * implied by "bounds" on the outer dimensions, i.e., the guard * that ensures that the single value actually exists. * Since there may also be guards implied by a combination * of these constraints, we first combine them before * deriving the implied constraints. */ static __isl_give isl_set *add_implied_guards(__isl_take isl_set *guard, int degenerate, __isl_keep isl_basic_set *bounds, __isl_keep isl_ast_build *build) { … } /* Update "graft" based on "sub_build" for the degenerate case. * * "build" is the build in which graft->node was created * "sub_build" contains information about the current level itself, * including the single value attained. * * We set the initialization part of the for loop to the single * value attained by the current dimension. * The increment and condition are not strictly needed as they are known * to be "1" and "iterator <= value" respectively. */ static __isl_give isl_ast_graft *refine_degenerate( __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build, __isl_keep isl_ast_build *sub_build) { … } /* Return the intersection of constraints in "list" as a set. */ static __isl_give isl_set *intersect_constraints( __isl_keep isl_constraint_list *list) { … } /* Compute the constraints on the outer dimensions enforced by * graft->node and add those constraints to graft->enforced, * in case the upper bound is expressed as a set "upper". * * In particular, if l(...) is a lower bound in "lower", and * * -a i + f(...) >= 0 or a i <= f(...) * * is an upper bound ocnstraint on the current dimension i, * then the for loop enforces the constraint * * -a l(...) + f(...) >= 0 or a l(...) <= f(...) * * We therefore simply take each lower bound in turn, plug it into * the upper bounds and compute the intersection over all lower bounds. * * If a lower bound is a rational expression, then * isl_basic_set_preimage_multi_aff will force this rational * expression to have only integer values. However, the loop * itself does not enforce this integrality constraint. We therefore * use the ceil of the lower bounds instead of the lower bounds themselves. * Other constraints will make sure that the for loop is only executed * when each of the lower bounds attains an integral value. * In particular, potentially rational values only occur in * lower_bound if the offset is a (seemingly) rational expression, * but then outer conditions will make sure that this rational expression * only attains integer values. */ static __isl_give isl_ast_graft *set_enforced_from_set( __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower, int pos, __isl_keep isl_set *upper) { … } /* Compute the constraints on the outer dimensions enforced by * graft->node and add those constraints to graft->enforced, * in case the upper bound is expressed as * a list of affine expressions "upper". * * The enforced condition is that each lower bound expression is less * than or equal to each upper bound expression. */ static __isl_give isl_ast_graft *set_enforced_from_list( __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower, __isl_keep isl_pw_aff_list *upper) { … } /* Does "aff" have a negative constant term? */ static isl_bool aff_constant_is_negative(__isl_keep isl_set *set, __isl_keep isl_aff *aff, void *user) { … } /* Does "pa" have a negative constant term over its entire domain? */ static isl_bool pw_aff_constant_is_negative(__isl_keep isl_pw_aff *pa, void *user) { … } /* Does each element in "list" have a negative constant term? */ static int list_constant_is_negative(__isl_keep isl_pw_aff_list *list) { … } /* Add 1 to each of the elements in "list", where each of these elements * is defined over the internal schedule space of "build". */ static __isl_give isl_pw_aff_list *list_add_one( __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build) { … } /* Set the condition part of the for node graft->node in case * the upper bound is represented as a list of piecewise affine expressions. * * In particular, set the condition to * * iterator <= min(list of upper bounds) * * If each of the upper bounds has a negative constant term, then * set the condition to * * iterator < min(list of (upper bound + 1)s) * */ static __isl_give isl_ast_graft *set_for_cond_from_list( __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *list, __isl_keep isl_ast_build *build) { … } /* Set the condition part of the for node graft->node in case * the upper bound is represented as a set. */ static __isl_give isl_ast_graft *set_for_cond_from_set( __isl_take isl_ast_graft *graft, __isl_keep isl_set *set, __isl_keep isl_ast_build *build) { … } /* Construct an isl_ast_expr for the increment (i.e., stride) of * the current dimension. */ static __isl_give isl_ast_expr *for_inc(__isl_keep isl_ast_build *build) { … } /* Should we express the loop condition as * * iterator <= min(list of upper bounds) * * or as a conjunction of constraints? * * The first is constructed from a list of upper bounds. * The second is constructed from a set. * * If there are no upper bounds in "constraints", then this could mean * that "domain" simply doesn't have an upper bound or that we didn't * pick any upper bound. In the first case, we want to generate the * loop condition as a(n empty) conjunction of constraints * In the second case, we will compute * a single upper bound from "domain" and so we use the list form. * * If there are upper bounds in "constraints", * then we use the list form iff the atomic_upper_bound option is set. */ static int use_upper_bound_list(isl_ctx *ctx, int n_upper, __isl_keep isl_set *domain, int depth) { … } /* Fill in the expressions of the for node in graft->node. * * In particular, * - set the initialization part of the loop to the maximum of the lower bounds * - extract the increment from the stride of the current dimension * - construct the for condition either based on a list of upper bounds * or on a set of upper bound constraints. */ static __isl_give isl_ast_graft *set_for_node_expressions( __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower, int use_list, __isl_keep isl_pw_aff_list *upper_list, __isl_keep isl_set *upper_set, __isl_keep isl_ast_build *build) { … } /* Update "graft" based on "bounds" and "domain" for the generic, * non-degenerate, case. * * "c_lower" and "c_upper" contain the lower and upper bounds * that the loop node should express. * "domain" is the subset of the intersection of the constraints * for which some code is executed. * * There may be zero lower bounds or zero upper bounds in "constraints" * in case the list of constraints was created * based on the atomic option or based on separation with explicit bounds. * In that case, we use "domain" to derive lower and/or upper bounds. * * We first compute a list of one or more lower bounds. * * Then we decide if we want to express the condition as * * iterator <= min(list of upper bounds) * * or as a conjunction of constraints. * * The set of enforced constraints is then computed either based on * a list of upper bounds or on a set of upper bound constraints. * We do not compute any enforced constraints if we were forced * to compute a lower or upper bound using exact_bound. The domains * of the resulting expressions may imply some bounds on outer dimensions * that we do not want to appear in the enforced constraints since * they are not actually enforced by the corresponding code. * * Finally, we fill in the expressions of the for node. */ static __isl_give isl_ast_graft *refine_generic_bounds( __isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *c_lower, __isl_take isl_constraint_list *c_upper, __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) { … } /* Internal data structure used inside count_constraints to keep * track of the number of constraints that are independent of dimension "pos", * the lower bounds in "pos" and the upper bounds in "pos". */ struct isl_ast_count_constraints_data { … }; /* Increment data->n_indep, data->lower or data->upper depending * on whether "c" is independent of dimensions data->pos, * a lower bound or an upper bound. */ static isl_stat count_constraints(__isl_take isl_constraint *c, void *user) { … } /* Update "graft" based on "bounds" and "domain" for the generic, * non-degenerate, case. * * "list" respresent the list of bounds that need to be encoded by * the for loop. Only the constraints that involve the iterator * are relevant here. The other constraints are taken care of by * the caller and are included in the generated constraints of "build". * "domain" is the subset of the intersection of the constraints * for which some code is executed. * "build" is the build in which graft->node was created. * * We separate lower bounds, upper bounds and constraints that * are independent of the loop iterator. * * The actual for loop bounds are generated in refine_generic_bounds. */ static __isl_give isl_ast_graft *refine_generic_split( __isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *list, __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) { … } /* Update "graft" based on "bounds" and "domain" for the generic, * non-degenerate, case. * * "bounds" respresent the bounds that need to be encoded by * the for loop (or a guard around the for loop). * "domain" is the subset of "bounds" for which some code is executed. * "build" is the build in which graft->node was created. * * We break up "bounds" into a list of constraints and continue with * refine_generic_split. */ static __isl_give isl_ast_graft *refine_generic( __isl_take isl_ast_graft *graft, __isl_keep isl_basic_set *bounds, __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) { … } /* Create a for node for the current level. * * Mark the for node degenerate if "degenerate" is set. */ static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build, int degenerate) { … } /* If the ast_build_exploit_nested_bounds option is set, then return * the constraints enforced by all elements in "list". * Otherwise, return the universe. */ static __isl_give isl_basic_set *extract_shared_enforced( __isl_keep isl_ast_graft_list *list, __isl_keep isl_ast_build *build) { … } /* Return the pending constraints of "build" that are not already taken * care of (by a combination of "enforced" and the generated constraints * of "build"). */ static __isl_give isl_set *extract_pending(__isl_keep isl_ast_build *build, __isl_keep isl_basic_set *enforced) { … } /* Create an AST node for the current dimension based on * the schedule domain "bounds" and return the node encapsulated * in an isl_ast_graft. * * "executed" is the current inverse schedule, taking into account * the bounds in "bounds" * "domain" is the domain of "executed", with inner dimensions projected out. * It may be a strict subset of "bounds" in case "bounds" was created * based on the atomic option or based on separation with explicit bounds. * * "domain" may satisfy additional equalities that result * from intersecting "executed" with "bounds" in add_node. * It may also satisfy some global constraints that were dropped out because * we performed separation with explicit bounds. * The very first step is then to copy these constraints to "bounds". * * Since we may be calling before_each_for and after_each_for * callbacks, we record the current inverse schedule in the build. * * We consider three builds, * "build" is the one in which the current level is created, * "body_build" is the build in which the next level is created, * "sub_build" is essentially the same as "body_build", except that * the depth has not been increased yet. * * "build" already contains information (in strides and offsets) * about the strides at the current level, but this information is not * reflected in the build->domain. * We first add this information and the "bounds" to the sub_build->domain. * isl_ast_build_set_loop_bounds adds the stride information and * checks whether the current dimension attains * only a single value and whether this single value can be represented using * a single affine expression. * In the first case, the current level is considered "degenerate". * In the second, sub-case, the current level is considered "eliminated". * Eliminated levels don't need to be reflected in the AST since we can * simply plug in the affine expression. For degenerate, but non-eliminated, * levels, we do introduce a for node, but mark is as degenerate so that * it can be printed as an assignment of the single value to the loop * "iterator". * * If the current level is eliminated, we explicitly plug in the value * for the current level found by isl_ast_build_set_loop_bounds in the * inverse schedule. This ensures that if we are working on a slice * of the domain based on information available in the inverse schedule * and the build domain, that then this information is also reflected * in the inverse schedule. This operation also eliminates the current * dimension from the inverse schedule making sure no inner dimensions depend * on the current dimension. Otherwise, we create a for node, marking * it degenerate if appropriate. The initial for node is still incomplete * and will be completed in either refine_degenerate or refine_generic. * * We then generate a sequence of grafts for the next level, * create a surrounding graft for the current level and insert * the for node we created (if the current level is not eliminated). * Before creating a graft for the current level, we first extract * hoistable constraints from the child guards and combine them * with the pending constraints in the build. These constraints * are used to simplify the child guards and then added to the guard * of the current graft to ensure that they will be generated. * If the hoisted guard is a disjunction, then we use it directly * to gist the guards on the children before intersect it with the * pending constraints. We do so because this disjunction is typically * identical to the guards on the children such that these guards * can be effectively removed completely. After the intersection, * the gist operation would have a harder time figuring this out. * * Finally, we set the bounds of the for loop in either * refine_degenerate or refine_generic. * We do so in a context where the pending constraints of the build * have been replaced by the guard of the current graft. */ static __isl_give isl_ast_graft *create_node_scaled( __isl_take isl_union_map *executed, __isl_take isl_basic_set *bounds, __isl_take isl_set *domain, __isl_take isl_ast_build *build) { … } /* Internal data structure for checking if all constraints involving * the input dimension "depth" are such that the other coefficients * are multiples of "m", reducing "m" if they are not. * If "m" is reduced all the way down to "1", then the check has failed * and we break out of the iteration. */ struct isl_check_scaled_data { … }; /* If constraint "c" involves the input dimension data->depth, * then make sure that all the other coefficients are multiples of data->m, * reducing data->m if needed. * Break out of the iteration if data->m has become equal to "1". */ static isl_stat constraint_check_scaled(__isl_take isl_constraint *c, void *user) { … } /* For each constraint of "bmap" that involves the input dimension data->depth, * make sure that all the other coefficients are multiples of data->m, * reducing data->m if needed. * Break out of the iteration if data->m has become equal to "1". */ static isl_stat basic_map_check_scaled(__isl_take isl_basic_map *bmap, void *user) { … } /* For each constraint of "map" that involves the input dimension data->depth, * make sure that all the other coefficients are multiples of data->m, * reducing data->m if needed. * Break out of the iteration if data->m has become equal to "1". */ static isl_stat map_check_scaled(__isl_take isl_map *map, void *user) { … } /* Create an AST node for the current dimension based on * the schedule domain "bounds" and return the node encapsulated * in an isl_ast_graft. * * "executed" is the current inverse schedule, taking into account * the bounds in "bounds" * "domain" is the domain of "executed", with inner dimensions projected out. * * * Before moving on to the actual AST node construction in create_node_scaled, * we first check if the current dimension is strided and if we can scale * down this stride. Note that we only do this if the ast_build_scale_strides * option is set. * * In particular, let the current dimension take on values * * f + s a * * with a an integer. We check if we can find an integer m that (obviously) * divides both f and s. * * If so, we check if the current dimension only appears in constraints * where the coefficients of the other variables are multiples of m. * We perform this extra check to avoid the risk of introducing * divisions by scaling down the current dimension. * * If so, we scale the current dimension down by a factor of m. * That is, we plug in * * i = m i' (1) * * Note that in principle we could always scale down strided loops * by plugging in * * i = f + s i' * * but this may result in i' taking on larger values than the original i, * due to the shift by "f". * By constrast, the scaling in (1) can only reduce the (absolute) value "i". */ static __isl_give isl_ast_graft *create_node(__isl_take isl_union_map *executed, __isl_take isl_basic_set *bounds, __isl_take isl_set *domain, __isl_take isl_ast_build *build) { … } /* Add the basic set to the list that "user" points to. */ static isl_stat collect_basic_set(__isl_take isl_basic_set *bset, void *user) { … } /* Extract the basic sets of "set" and collect them in an isl_basic_set_list. */ static __isl_give isl_basic_set_list *isl_basic_set_list_from_set( __isl_take isl_set *set) { … } /* Generate code for the schedule domain "bounds" * and add the result to "list". * * We mainly detect strides here and check if the bounds do not * conflict with the current build domain * and then pass over control to create_node. * * "bounds" reflects the bounds on the current dimension and possibly * some extra conditions on outer dimensions. * It does not, however, include any divs involving the current dimension, * so it does not capture any stride constraints. * We therefore need to compute that part of the schedule domain that * intersects with "bounds" and derive the strides from the result. */ static __isl_give isl_ast_graft_list *add_node( __isl_take isl_ast_graft_list *list, __isl_take isl_union_map *executed, __isl_take isl_basic_set *bounds, __isl_take isl_ast_build *build) { … } /* Does any element of i follow or coincide with any element of j * at the current depth for equal values of the outer dimensions? */ static isl_bool domain_follows_at_depth(__isl_keep isl_basic_set *i, __isl_keep isl_basic_set *j, void *user) { … } /* Split up each element of "list" into a part that is related to "bset" * according to "gt" and a part that is not. * Return a list that consist of "bset" and all the pieces. */ static __isl_give isl_basic_set_list *add_split_on( __isl_take isl_basic_set_list *list, __isl_take isl_basic_set *bset, __isl_keep isl_basic_map *gt) { … } static __isl_give isl_ast_graft_list *generate_sorted_domains( __isl_keep isl_basic_set_list *domain_list, __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build); /* Internal data structure for add_nodes. * * "executed" and "build" are extra arguments to be passed to add_node. * "list" collects the results. */ struct isl_add_nodes_data { … }; /* Generate code for the schedule domains in "scc" * and add the results to "list". * * The domains in "scc" form a strongly connected component in the ordering. * If the number of domains in "scc" is larger than 1, then this means * that we cannot determine a valid ordering for the domains in the component. * This should be fairly rare because the individual domains * have been made disjoint first. * The problem is that the domains may be integrally disjoint but not * rationally disjoint. For example, we may have domains * * { [i,i] : 0 <= i <= 1 } and { [i,1-i] : 0 <= i <= 1 } * * These two domains have an empty intersection, but their rational * relaxations do intersect. It is impossible to order these domains * in the second dimension because the first should be ordered before * the second for outer dimension equal to 0, while it should be ordered * after for outer dimension equal to 1. * * This may happen in particular in case of unrolling since the domain * of each slice is replaced by its simple hull. * * For each basic set i in "scc" and for each of the following basic sets j, * we split off that part of the basic set i that shares the outer dimensions * with j and lies before j in the current dimension. * We collect all the pieces in a new list that replaces "scc". * * While the elements in "scc" should be disjoint, we double-check * this property to avoid running into an infinite recursion in case * they intersect due to some internal error. */ static isl_stat add_nodes(__isl_take isl_basic_set_list *scc, void *user) { … } /* Sort the domains in "domain_list" according to the execution order * at the current depth (for equal values of the outer dimensions), * generate code for each of them, collecting the results in a list. * If no code is generated (because the intersection of the inverse schedule * with the domains turns out to be empty), then an empty list is returned. * * The caller is responsible for ensuring that the basic sets in "domain_list" * are pair-wise disjoint. It can, however, in principle happen that * two basic sets should be ordered one way for one value of the outer * dimensions and the other way for some other value of the outer dimensions. * We therefore play safe and look for strongly connected components. * The function add_nodes takes care of handling non-trivial components. */ static __isl_give isl_ast_graft_list *generate_sorted_domains( __isl_keep isl_basic_set_list *domain_list, __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) { … } /* Do i and j share any values for the outer dimensions? */ static isl_bool shared_outer(__isl_keep isl_basic_set *i, __isl_keep isl_basic_set *j, void *user) { … } /* Internal data structure for generate_sorted_domains_wrap. * * "n" is the total number of basic sets * "executed" and "build" are extra arguments to be passed * to generate_sorted_domains. * * "single" is set to 1 by generate_sorted_domains_wrap if there * is only a single component. * "list" collects the results. */ struct isl_ast_generate_parallel_domains_data { … }; /* Call generate_sorted_domains on "scc", fuse the result into a list * with either zero or one graft and collect the these single element * lists into data->list. * * If there is only one component, i.e., if the number of basic sets * in the current component is equal to the total number of basic sets, * then data->single is set to 1 and the result of generate_sorted_domains * is not fused. */ static isl_stat generate_sorted_domains_wrap(__isl_take isl_basic_set_list *scc, void *user) { … } /* Look for any (weakly connected) components in the "domain_list" * of domains that share some values of the outer dimensions. * That is, domains in different components do not share any values * of the outer dimensions. This means that these components * can be freely reordered. * Within each of the components, we sort the domains according * to the execution order at the current depth. * * If there is more than one component, then generate_sorted_domains_wrap * fuses the result of each call to generate_sorted_domains * into a list with either zero or one graft and collects these (at most) * single element lists into a bigger list. This means that the elements of the * final list can be freely reordered. In particular, we sort them * according to an arbitrary but fixed ordering to ease merging of * graft lists from different components. */ static __isl_give isl_ast_graft_list *generate_parallel_domains( __isl_keep isl_basic_set_list *domain_list, __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) { … } /* Internal data for separate_domain. * * "explicit" is set if we only want to use explicit bounds. * * "domain" collects the separated domains. */ struct isl_separate_domain_data { … }; /* Extract implicit bounds on the current dimension for the executed "map". * * The domain of "map" may involve inner dimensions, so we * need to eliminate them. */ static __isl_give isl_set *implicit_bounds(__isl_take isl_map *map, __isl_keep isl_ast_build *build) { … } /* Extract explicit bounds on the current dimension for the executed "map". * * Rather than eliminating the inner dimensions as in implicit_bounds, * we simply drop any constraints involving those inner dimensions. * The idea is that most bounds that are implied by constraints on the * inner dimensions will be enforced by for loops and not by explicit guards. * There is then no need to separate along those bounds. */ static __isl_give isl_set *explicit_bounds(__isl_take isl_map *map, __isl_keep isl_ast_build *build) { … } /* Split data->domain into pieces that intersect with the range of "map" * and pieces that do not intersect with the range of "map" * and then add that part of the range of "map" that does not intersect * with data->domain. */ static isl_stat separate_domain(__isl_take isl_map *map, void *user) { … } /* Separate the schedule domains of "executed". * * That is, break up the domain of "executed" into basic sets, * such that for each basic set S, every element in S is associated with * the same domain spaces. * * "space" is the (single) domain space of "executed". */ static __isl_give isl_set *separate_schedule_domains( __isl_take isl_space *space, __isl_take isl_union_map *executed, __isl_keep isl_ast_build *build) { … } /* Temporary data used during the search for a lower bound for unrolling. * * "build" is the build in which the unrolling will be performed * "domain" is the original set for which to find a lower bound * "depth" is the dimension for which to find a lower boudn * "expansion" is the expansion that needs to be applied to "domain" * in the unrolling that will be performed * * "lower" is the best lower bound found so far. It is NULL if we have not * found any yet. * "n" is the corresponding size. If lower is NULL, then the value of n * is undefined. * "n_div" is the maximal number of integer divisions in the first * unrolled iteration (after expansion). It is set to -1 if it hasn't * been computed yet. */ struct isl_find_unroll_data { … }; /* Return the constraint * * i_"depth" = aff + offset */ static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff, int offset) { … } /* Update *user to the number of integer divisions in the first element * of "ma", if it is larger than the current value. */ static isl_stat update_n_div(__isl_take isl_set *set, __isl_take isl_multi_aff *ma, void *user) { … } /* Get the number of integer divisions in the expression for the iterator * value at the first slice in the unrolling based on lower bound "lower", * taking into account the expansion that needs to be performed on this slice. */ static int get_expanded_n_div(struct isl_find_unroll_data *data, __isl_keep isl_aff *lower) { … } /* Is the lower bound "lower" with corresponding iteration count "n" * better than the one stored in "data"? * If there is no upper bound on the iteration count ("n" is infinity) or * if the count is too large, then we cannot use this lower bound. * Otherwise, if there was no previous lower bound or * if the iteration count of the new lower bound is smaller than * the iteration count of the previous lower bound, then we consider * the new lower bound to be better. * If the iteration count is the same, then compare the number * of integer divisions that would be needed to express * the iterator value at the first slice in the unrolling * according to the lower bound. If we end up computing this * number, then store the lowest value in data->n_div. */ static int is_better_lower_bound(struct isl_find_unroll_data *data, __isl_keep isl_aff *lower, __isl_keep isl_val *n) { … } /* Check if we can use "c" as a lower bound and if it is better than * any previously found lower bound. * * If "c" does not involve the dimension at the current depth, * then we cannot use it. * Otherwise, let "c" be of the form * * i >= f(j)/a * * We compute the maximal value of * * -ceil(f(j)/a)) + i + 1 * * over the domain. If there is such a value "n", then we know * * -ceil(f(j)/a)) + i + 1 <= n * * or * * i < ceil(f(j)/a)) + n * * meaning that we can use ceil(f(j)/a)) as a lower bound for unrolling. * We just need to check if we have found any lower bound before and * if the new lower bound is better (smaller n or fewer integer divisions) * than the previously found lower bounds. */ static isl_stat update_unrolling_lower_bound(struct isl_find_unroll_data *data, __isl_keep isl_constraint *c) { … } /* Check if we can use "c" as a lower bound and if it is better than * any previously found lower bound. */ static isl_stat constraint_find_unroll(__isl_take isl_constraint *c, void *user) { … } /* Look for a lower bound l(i) on the dimension at "depth" * and a size n such that "domain" is a subset of * * { [i] : l(i) <= i_d < l(i) + n } * * where d is "depth" and l(i) depends only on earlier dimensions. * Furthermore, try and find a lower bound such that n is as small as possible. * In particular, "n" needs to be finite. * "build" is the build in which the unrolling will be performed. * "expansion" is the expansion that needs to be applied to "domain" * in the unrolling that will be performed. * * Inner dimensions have been eliminated from "domain" by the caller. * * We first construct a collection of lower bounds on the input set * by computing its simple hull. We then iterate through them, * discarding those that we cannot use (either because they do not * involve the dimension at "depth" or because they have no corresponding * upper bound, meaning that "n" would be unbounded) and pick out the * best from the remaining ones. * * If we cannot find a suitable lower bound, then we consider that * to be an error. */ static __isl_give isl_aff *find_unroll_lower_bound( __isl_keep isl_ast_build *build, __isl_keep isl_set *domain, int depth, __isl_keep isl_basic_map *expansion, int *n) { … } /* Call "fn" on each iteration of the current dimension of "domain". * If "init" is not NULL, then it is called with the number of * iterations before any call to "fn". * Return -1 on failure. * * Since we are going to be iterating over the individual values, * we first check if there are any strides on the current dimension. * If there is, we rewrite the current dimension i as * * i = stride i' + offset * * and then iterate over individual values of i' instead. * * We then look for a lower bound on i' and a size such that the domain * is a subset of * * { [j,i'] : l(j) <= i' < l(j) + n } * * and then take slices of the domain at values of i' * between l(j) and l(j) + n - 1. * * We compute the unshifted simple hull of each slice to ensure that * we have a single basic set per offset. The slicing constraint * may get simplified away before the unshifted simple hull is taken * and may therefore in some rare cases disappear from the result. * We therefore explicitly add the constraint back after computing * the unshifted simple hull to ensure that the basic sets * remain disjoint. The constraints that are dropped by taking the hull * will be taken into account at the next level, as in the case of the * atomic option. * * Finally, we map i' back to i and call "fn". */ static int foreach_iteration(__isl_take isl_set *domain, __isl_keep isl_ast_build *build, int (*init)(int n, void *user), int (*fn)(__isl_take isl_basic_set *bset, void *user), void *user) { … } /* Data structure for storing the results and the intermediate objects * of compute_domains. * * "list" is the main result of the function and contains a list * of disjoint basic sets for which code should be generated. * * "executed" and "build" are inputs to compute_domains. * "schedule_domain" is the domain of "executed". * * "option" contains the domains at the current depth that should by * atomic, separated or unrolled. These domains are as specified by * the user, except that inner dimensions have been eliminated and * that they have been made pair-wise disjoint. * * "sep_class" contains the user-specified split into separation classes * specialized to the current depth. * "done" contains the union of the separation domains that have already * been handled. */ struct isl_codegen_domains { … }; /* Internal data structure for do_unroll. * * "domains" stores the results of compute_domains. * "class_domain" is the original class domain passed to do_unroll. * "unroll_domain" collects the unrolled iterations. */ struct isl_ast_unroll_data { … }; /* Given an iteration of an unrolled domain represented by "bset", * add it to data->domains->list. * Since we may have dropped some constraints, we intersect with * the class domain again to ensure that each element in the list * is disjoint from the other class domains. */ static int do_unroll_iteration(__isl_take isl_basic_set *bset, void *user) { … } /* Extend domains->list with a list of basic sets, one for each value * of the current dimension in "domain" and remove the corresponding * sets from the class domain. Return the updated class domain. * The divs that involve the current dimension have not been projected out * from this domain. * * We call foreach_iteration to iterate over the individual values and * in do_unroll_iteration we collect the individual basic sets in * domains->list and their union in data->unroll_domain, which is then * used to update the class domain. */ static __isl_give isl_set *do_unroll(struct isl_codegen_domains *domains, __isl_take isl_set *domain, __isl_take isl_set *class_domain) { … } /* Add domains to domains->list for each individual value of the current * dimension, for that part of the schedule domain that lies in the * intersection of the option domain and the class domain. * Remove the corresponding sets from the class domain and * return the updated class domain. * * We first break up the unroll option domain into individual pieces * and then handle each of them separately. The unroll option domain * has been made disjoint in compute_domains_init_options, * * Note that we actively want to combine different pieces of the * schedule domain that have the same value at the current dimension. * We therefore need to break up the unroll option domain before * intersecting with class and schedule domain, hoping that the * unroll option domain specified by the user is relatively simple. */ static __isl_give isl_set *compute_unroll_domains( struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) { … } /* Try and construct a single basic set that includes the intersection of * the schedule domain, the atomic option domain and the class domain. * Add the resulting basic set(s) to domains->list and remove them * from class_domain. Return the updated class domain. * * We construct a single domain rather than trying to combine * the schedule domains of individual domains because we are working * within a single component so that non-overlapping schedule domains * should already have been separated. * We do however need to make sure that this single domains is a subset * of the class domain so that it would not intersect with any other * class domains. This means that we may end up splitting up the atomic * domain in case separation classes are being used. * * "domain" is the intersection of the schedule domain and the class domain, * with inner dimensions projected out. */ static __isl_give isl_set *compute_atomic_domain( struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) { … } /* Split up the schedule domain into uniform basic sets, * in the sense that each element in a basic set is associated to * elements of the same domains, and add the result to domains->list. * Do this for that part of the schedule domain that lies in the * intersection of "class_domain" and the separate option domain. * * "class_domain" may or may not include the constraints * of the schedule domain, but this does not make a difference * since we are going to intersect it with the domain of the inverse schedule. * If it includes schedule domain constraints, then they may involve * inner dimensions, but we will eliminate them in separation_domain. */ static int compute_separate_domain(struct isl_codegen_domains *domains, __isl_keep isl_set *class_domain) { … } /* Split up the domain at the current depth into disjoint * basic sets for which code should be generated separately * for the given separation class domain. * * If any separation classes have been defined, then "class_domain" * is the domain of the current class and does not refer to inner dimensions. * Otherwise, "class_domain" is the universe domain. * * We first make sure that the class domain is disjoint from * previously considered class domains. * * The separate domains can be computed directly from the "class_domain". * * The unroll, atomic and remainder domains need the constraints * from the schedule domain. * * For unrolling, the actual schedule domain is needed (with divs that * may refer to the current dimension) so that stride detection can be * performed. * * For atomic and remainder domains, inner dimensions and divs involving * the current dimensions should be eliminated. * In case we are working within a separation class, we need to intersect * the result with the current "class_domain" to ensure that the domains * are disjoint from those generated from other class domains. * * The domain that has been made atomic may be larger than specified * by the user since it needs to be representable as a single basic set. * This possibly larger domain is removed from class_domain by * compute_atomic_domain. It is computed first so that the extended domain * would not overlap with any domains computed before. * Similary, the unrolled domains may have some constraints removed and * may therefore also be larger than specified by the user. * * If anything is left after handling separate, unroll and atomic, * we split it up into basic sets and append the basic sets to domains->list. */ static isl_stat compute_partial_domains(struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) { … } /* Split up the domain at the current depth into disjoint * basic sets for which code should be generated separately * for the separation class identified by "pnt". * * We extract the corresponding class domain from domains->sep_class, * eliminate inner dimensions and pass control to compute_partial_domains. */ static isl_stat compute_class_domains(__isl_take isl_point *pnt, void *user) { … } /* Extract the domains at the current depth that should be atomic, * separated or unrolled and store them in option. * * The domains specified by the user might overlap, so we make * them disjoint by subtracting earlier domains from later domains. */ static void compute_domains_init_options(isl_set *option[4], __isl_keep isl_ast_build *build) { … } /* Split up the domain at the current depth into disjoint * basic sets for which code should be generated separately, * based on the user-specified options. * Return the list of disjoint basic sets. * * There are three kinds of domains that we need to keep track of. * - the "schedule domain" is the domain of "executed" * - the "class domain" is the domain corresponding to the currrent * separation class * - the "option domain" is the domain corresponding to one of the options * atomic, unroll or separate * * We first consider the individial values of the separation classes * and split up the domain for each of them separately. * Finally, we consider the remainder. If no separation classes were * specified, then we call compute_partial_domains with the universe * "class_domain". Otherwise, we take the "schedule_domain" as "class_domain", * with inner dimensions removed. We do this because we want to * avoid computing the complement of the class domains (i.e., the difference * between the universe and domains->done). */ static __isl_give isl_basic_set_list *compute_domains( __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) { … } /* Generate code for a single component, after shifting (if any) * has been applied, in case the schedule was specified as a union map. * * We first split up the domain at the current depth into disjoint * basic sets based on the user-specified options. * Then we generated code for each of them and concatenate the results. */ static __isl_give isl_ast_graft_list *generate_shifted_component_flat( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) { … } /* Generate code for a single component, after shifting (if any) * has been applied, in case the schedule was specified as a schedule tree * and the separate option was specified. * * We perform separation on the domain of "executed" and then generate * an AST for each of the resulting disjoint basic sets. */ static __isl_give isl_ast_graft_list *generate_shifted_component_tree_separate( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) { … } /* Internal data structure for generate_shifted_component_tree_unroll. * * "executed" and "build" are inputs to generate_shifted_component_tree_unroll. * "list" collects the constructs grafts. */ struct isl_ast_unroll_tree_data { … }; /* Initialize data->list to a list of "n" elements. */ static int init_unroll_tree(int n, void *user) { … } /* Given an iteration of an unrolled domain represented by "bset", * generate the corresponding AST and add the result to data->list. */ static int do_unroll_tree_iteration(__isl_take isl_basic_set *bset, void *user) { … } /* Generate code for a single component, after shifting (if any) * has been applied, in case the schedule was specified as a schedule tree * and the unroll option was specified. * * We call foreach_iteration to iterate over the individual values and * construct and collect the corresponding grafts in do_unroll_tree_iteration. */ static __isl_give isl_ast_graft_list *generate_shifted_component_tree_unroll( __isl_take isl_union_map *executed, __isl_take isl_set *domain, __isl_take isl_ast_build *build) { … } /* Does "domain" involve a disjunction that is purely based on * constraints involving only outer dimension? * * In particular, is there a disjunction such that the constraints * involving the current and later dimensions are the same over * all the disjuncts? */ static isl_bool has_pure_outer_disjunction(__isl_keep isl_set *domain, __isl_keep isl_ast_build *build) { … } /* Generate code for a single component, after shifting (if any) * has been applied, in case the schedule was specified as a schedule tree. * In particular, handle the base case where there is either no isolated * set or we are within the isolated set (in which case "isolated" is set) * or the iterations that precede or follow the isolated set. * * The schedule domain is broken up or combined into basic sets * according to the AST generation option specified in the current * schedule node, which may be either atomic, separate, unroll or * unspecified. If the option is unspecified, then we currently simply * split the schedule domain into disjoint basic sets. * * In case the separate option is specified, the AST generation is * handled by generate_shifted_component_tree_separate. * In the other cases, we need the global schedule domain. * In the unroll case, the AST generation is then handled by * generate_shifted_component_tree_unroll which needs the actual * schedule domain (with divs that may refer to the current dimension) * so that stride detection can be performed. * In the atomic or unspecified case, inner dimensions and divs involving * the current dimensions should be eliminated. * The result is then either combined into a single basic set or * split up into disjoint basic sets. * Finally an AST is generated for each basic set and the results are * concatenated. * * If the schedule domain involves a disjunction that is purely based on * constraints involving only outer dimension, then it is treated as * if atomic was specified. This ensures that only a single loop * is generated instead of a sequence of identical loops with * different guards. */ static __isl_give isl_ast_graft_list *generate_shifted_component_tree_base( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build, int isolated) { … } /* Extract out the disjunction imposed by "domain" on the outer * schedule dimensions. * * In particular, remove all inner dimensions from "domain" (including * the current dimension) and then remove the constraints that are shared * by all disjuncts in the result. */ static __isl_give isl_set *extract_disjunction(__isl_take isl_set *domain, __isl_keep isl_ast_build *build) { … } /* Add "guard" to the grafts in "list". * "build" is the outer AST build, while "sub_build" includes "guard" * in its generated domain. * * First combine the grafts into a single graft and then add the guard. * If the list is empty, or if some error occurred, then simply return * the list. */ static __isl_give isl_ast_graft_list *list_add_guard( __isl_take isl_ast_graft_list *list, __isl_keep isl_set *guard, __isl_keep isl_ast_build *build, __isl_keep isl_ast_build *sub_build) { … } /* Generate code for a single component, after shifting (if any) * has been applied, in case the schedule was specified as a schedule tree. * In particular, do so for the specified subset of the schedule domain. * * If we are outside of the isolated part, then "domain" may include * a disjunction. Explicitly generate this disjunction at this point * instead of relying on the disjunction getting hoisted back up * to this level. */ static __isl_give isl_ast_graft_list *generate_shifted_component_tree_part( __isl_keep isl_union_map *executed, __isl_take isl_set *domain, __isl_keep isl_ast_build *build, int isolated) { … } /* Generate code for a single component, after shifting (if any) * has been applied, in case the schedule was specified as a schedule tree. * In particular, do so for the specified sequence of subsets * of the schedule domain, "before", "isolated", "after" and "other", * where only the "isolated" part is considered to be isolated. */ static __isl_give isl_ast_graft_list *generate_shifted_component_parts( __isl_take isl_union_map *executed, __isl_take isl_set *before, __isl_take isl_set *isolated, __isl_take isl_set *after, __isl_take isl_set *other, __isl_take isl_ast_build *build) { … } /* Does "set" intersect "first", but not "second"? */ static isl_bool only_intersects_first(__isl_keep isl_set *set, __isl_keep isl_set *first, __isl_keep isl_set *second) { … } /* Generate code for a single component, after shifting (if any) * has been applied, in case the schedule was specified as a schedule tree. * In particular, do so in case of isolation where there is * only an "isolated" part and an "after" part. * "dead1" and "dead2" are freed by this function in order to simplify * the caller. * * The "before" and "other" parts are set to empty sets. */ static __isl_give isl_ast_graft_list *generate_shifted_component_only_after( __isl_take isl_union_map *executed, __isl_take isl_set *isolated, __isl_take isl_set *after, __isl_take isl_ast_build *build, __isl_take isl_set *dead1, __isl_take isl_set *dead2) { … } /* Generate code for a single component, after shifting (if any) * has been applied, in case the schedule was specified as a schedule tree. * * We first check if the user has specified an isolated schedule domain * and that we are not already outside of this isolated schedule domain. * If so, we break up the schedule domain into iterations that * precede the isolated domain, the isolated domain itself, * the iterations that follow the isolated domain and * the remaining iterations (those that are incomparable * to the isolated domain). * We generate an AST for each piece and concatenate the results. * * If the isolated domain is not convex, then it is replaced * by a convex superset to ensure that the sets of preceding and * following iterations are properly defined and, in particular, * that there are no intermediate iterations that do not belong * to the isolated domain. * * In the special case where at least one element of the schedule * domain that does not belong to the isolated domain needs * to be scheduled after this isolated domain, but none of those * elements need to be scheduled before, break up the schedule domain * in only two parts, the isolated domain, and a part that will be * scheduled after the isolated domain. * * If no isolated set has been specified, then we generate an * AST for the entire inverse schedule. */ static __isl_give isl_ast_graft_list *generate_shifted_component_tree( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) { … } /* Generate code for a single component, after shifting (if any) * has been applied. * * Call generate_shifted_component_tree or generate_shifted_component_flat * depending on whether the schedule was specified as a schedule tree. */ static __isl_give isl_ast_graft_list *generate_shifted_component( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) { … } struct isl_set_map_pair { … }; /* Given an array "domain" of isl_set_map_pairs and an array "order" * of indices into the "domain" array, * return the union of the "map" fields of the elements * indexed by the first "n" elements of "order". */ static __isl_give isl_union_map *construct_component_executed( struct isl_set_map_pair *domain, int *order, int n) { … } /* Generate code for a single component, after shifting (if any) * has been applied. * * The component inverse schedule is specified as the "map" fields * of the elements of "domain" indexed by the first "n" elements of "order". */ static __isl_give isl_ast_graft_list *generate_shifted_component_from_list( struct isl_set_map_pair *domain, int *order, int n, __isl_take isl_ast_build *build) { … } /* Does set dimension "pos" of "set" have an obviously fixed value? */ static int dim_is_fixed(__isl_keep isl_set *set, int pos) { … } /* Given an array "domain" of isl_set_map_pairs and an array "order" * of indices into the "domain" array, * do all (except for at most one) of the "set" field of the elements * indexed by the first "n" elements of "order" have a fixed value * at position "depth"? */ static int at_most_one_non_fixed(struct isl_set_map_pair *domain, int *order, int n, int depth) { … } /* Given an array "domain" of isl_set_map_pairs and an array "order" * of indices into the "domain" array, * eliminate the inner dimensions from the "set" field of the elements * indexed by the first "n" elements of "order", provided the current * dimension does not have a fixed value. * * Return the index of the first element in "order" with a corresponding * "set" field that does not have an (obviously) fixed value. */ static int eliminate_non_fixed(struct isl_set_map_pair *domain, int *order, int n, int depth, __isl_keep isl_ast_build *build) { … } /* Given an array "domain" of isl_set_map_pairs and an array "order" * of indices into the "domain" array, * find the element of "domain" (amongst those indexed by the first "n" * elements of "order") with the "set" field that has the smallest * value for the current iterator. * * Note that the domain with the smallest value may depend on the parameters * and/or outer loop dimension. Since the result of this function is only * used as heuristic, we only make a reasonable attempt at finding the best * domain, one that should work in case a single domain provides the smallest * value for the current dimension over all values of the parameters * and outer dimensions. * * In particular, we compute the smallest value of the first domain * and replace it by that of any later domain if that later domain * has a smallest value that is smaller for at least some value * of the parameters and outer dimensions. */ static int first_offset(struct isl_set_map_pair *domain, int *order, int n, __isl_keep isl_ast_build *build) { … } /* Construct a shifted inverse schedule based on the original inverse schedule, * the stride and the offset. * * The original inverse schedule is specified as the "map" fields * of the elements of "domain" indexed by the first "n" elements of "order". * * "stride" and "offset" are such that the difference * between the values of the current dimension of domain "i" * and the values of the current dimension for some reference domain are * equal to * * stride * integer + offset[i] * * Moreover, 0 <= offset[i] < stride. * * For each domain, we create a map * * { [..., j, ...] -> [..., j - offset[i], offset[i], ....] } * * where j refers to the current dimension and the other dimensions are * unchanged, and apply this map to the original schedule domain. * * For example, for the original schedule * * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 } * * and assuming the offset is 0 for the A domain and 1 for the B domain, * we apply the mapping * * { [j] -> [j, 0] } * * to the schedule of the "A" domain and the mapping * * { [j - 1] -> [j, 1] } * * to the schedule of the "B" domain. * * * Note that after the transformation, the differences between pairs * of values of the current dimension over all domains are multiples * of stride and that we have therefore exposed the stride. * * * To see that the mapping preserves the lexicographic order, * first note that each of the individual maps above preserves the order. * If the value of the current iterator is j1 in one domain and j2 in another, * then if j1 = j2, we know that the same map is applied to both domains * and the order is preserved. * Otherwise, let us assume, without loss of generality, that j1 < j2. * If c1 >= c2 (with c1 and c2 the corresponding offsets), then * * j1 - c1 < j2 - c2 * * and the order is preserved. * If c1 < c2, then we know * * 0 <= c2 - c1 < s * * We also have * * j2 - j1 = n * s + r * * with n >= 0 and 0 <= r < s. * In other words, r = c2 - c1. * If n > 0, then * * j1 - c1 < j2 - c2 * * If n = 0, then * * j1 - c1 = j2 - c2 * * and so * * (j1 - c1, c1) << (j2 - c2, c2) * * with "<<" the lexicographic order, proving that the order is preserved * in all cases. */ static __isl_give isl_union_map *construct_shifted_executed( struct isl_set_map_pair *domain, int *order, int n, __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset, __isl_keep isl_ast_build *build) { … } /* Generate code for a single component, after exposing the stride, * given that the schedule domain is "shifted strided". * * The component inverse schedule is specified as the "map" fields * of the elements of "domain" indexed by the first "n" elements of "order". * * The schedule domain being "shifted strided" means that the differences * between the values of the current dimension of domain "i" * and the values of the current dimension for some reference domain are * equal to * * stride * integer + offset[i] * * We first look for the domain with the "smallest" value for the current * dimension and adjust the offsets such that the offset of the "smallest" * domain is equal to zero. The other offsets are reduced modulo stride. * * Based on this information, we construct a new inverse schedule in * construct_shifted_executed that exposes the stride. * Since this involves the introduction of a new schedule dimension, * the build needs to be changed accordingly. * After computing the AST, the newly introduced dimension needs * to be removed again from the list of grafts. We do this by plugging * in a mapping that represents the new schedule domain in terms of the * old schedule domain. */ static __isl_give isl_ast_graft_list *generate_shift_component( struct isl_set_map_pair *domain, int *order, int n, __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset, __isl_take isl_ast_build *build) { … } /* Does any node in the schedule tree rooted at the current schedule node * of "build" depend on outer schedule nodes? */ static int has_anchored_subtree(__isl_keep isl_ast_build *build) { … } /* Generate code for a single component. * * The component inverse schedule is specified as the "map" fields * of the elements of "domain" indexed by the first "n" elements of "order". * * This function may modify the "set" fields of "domain". * * Before proceeding with the actual code generation for the component, * we first check if there are any "shifted" strides, meaning that * the schedule domains of the individual domains are all strided, * but that they have different offsets, resulting in the union * of schedule domains not being strided anymore. * * The simplest example is the schedule * * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 } * * Both schedule domains are strided, but their union is not. * This function detects such cases and then rewrites the schedule to * * { A[i] -> [2i, 0]: 0 <= i < 10; B[i] -> [2i, 1] : 0 <= i < 10 } * * In the new schedule, the schedule domains have the same offset (modulo * the stride), ensuring that the union of schedule domains is also strided. * * * If there is only a single domain in the component, then there is * nothing to do. Similarly, if the current schedule dimension has * a fixed value for almost all domains then there is nothing to be done. * In particular, we need at least two domains where the current schedule * dimension does not have a fixed value. * Finally, in case of a schedule map input, * if any of the options refer to the current schedule dimension, * then we bail out as well. It would be possible to reformulate the options * in terms of the new schedule domain, but that would introduce constraints * that separate the domains in the options and that is something we would * like to avoid. * In the case of a schedule tree input, we bail out if any of * the descendants of the current schedule node refer to outer * schedule nodes in any way. * * * To see if there is any shifted stride, we look at the differences * between the values of the current dimension in pairs of domains * for equal values of outer dimensions. These differences should be * of the form * * m x + r * * with "m" the stride and "r" a constant. Note that we cannot perform * this analysis on individual domains as the lower bound in each domain * may depend on parameters or outer dimensions and so the current dimension * itself may not have a fixed remainder on division by the stride. * * In particular, we compare the first domain that does not have an * obviously fixed value for the current dimension to itself and all * other domains and collect the offsets and the gcd of the strides. * If the gcd becomes one, then we failed to find shifted strides. * If the gcd is zero, then the differences were all fixed, meaning * that some domains had non-obviously fixed values for the current dimension. * If all the offsets are the same (for those domains that do not have * an obviously fixed value for the current dimension), then we do not * apply the transformation. * If none of the domains were skipped, then there is nothing to do. * If some of them were skipped, then if we apply separation, the schedule * domain should get split in pieces with a (non-shifted) stride. * * Otherwise, we apply a shift to expose the stride in * generate_shift_component. */ static __isl_give isl_ast_graft_list *generate_component( struct isl_set_map_pair *domain, int *order, int n, __isl_take isl_ast_build *build) { … } /* Store both "map" itself and its domain in the * structure pointed to by *next and advance to the next array element. */ static isl_stat extract_domain(__isl_take isl_map *map, void *user) { … } static isl_bool after_in_tree(__isl_keep isl_union_map *umap, __isl_keep isl_schedule_node *node); /* Is any domain element of "umap" scheduled after any of * the corresponding image elements by the tree rooted at * the child of "node"? */ static isl_bool after_in_child(__isl_keep isl_union_map *umap, __isl_keep isl_schedule_node *node) { … } /* Is any domain element of "umap" scheduled after any of * the corresponding image elements by the tree rooted at * the band node "node"? * * We first check if any domain element is scheduled after any * of the corresponding image elements by the band node itself. * If not, we restrict "map" to those pairs of element that * are scheduled together by the band node and continue with * the child of the band node. * If there are no such pairs then the map passed to after_in_child * will be empty causing it to return 0. */ static isl_bool after_in_band(__isl_keep isl_union_map *umap, __isl_keep isl_schedule_node *node) { … } /* Is any domain element of "umap" scheduled after any of * the corresponding image elements by the tree rooted at * the context node "node"? * * The context constraints apply to the schedule domain, * so we cannot apply them directly to "umap", which contains * pairs of statement instances. Instead, we add them * to the range of the prefix schedule for both domain and * range of "umap". */ static isl_bool after_in_context(__isl_keep isl_union_map *umap, __isl_keep isl_schedule_node *node) { … } /* Is any domain element of "umap" scheduled after any of * the corresponding image elements by the tree rooted at * the expansion node "node"? * * We apply the expansion to domain and range of "umap" and * continue with its child. */ static isl_bool after_in_expansion(__isl_keep isl_union_map *umap, __isl_keep isl_schedule_node *node) { … } /* Is any domain element of "umap" scheduled after any of * the corresponding image elements by the tree rooted at * the extension node "node"? * * Since the extension node may add statement instances before or * after the pairs of statement instances in "umap", we return isl_bool_true * to ensure that these pairs are not broken up. */ static isl_bool after_in_extension(__isl_keep isl_union_map *umap, __isl_keep isl_schedule_node *node) { … } /* Is any domain element of "umap" scheduled after any of * the corresponding image elements by the tree rooted at * the filter node "node"? * * We intersect domain and range of "umap" with the filter and * continue with its child. */ static isl_bool after_in_filter(__isl_keep isl_union_map *umap, __isl_keep isl_schedule_node *node) { … } /* Is any domain element of "umap" scheduled after any of * the corresponding image elements by the tree rooted at * the set node "node"? * * This is only the case if this condition holds in any * of the (filter) children of the set node. * In particular, if the domain and the range of "umap" * are contained in different children, then the condition * does not hold. */ static isl_bool after_in_set(__isl_keep isl_union_map *umap, __isl_keep isl_schedule_node *node) { … } /* Return the filter of child "i" of "node". */ static __isl_give isl_union_set *child_filter( __isl_keep isl_schedule_node *node, int i) { … } /* Is any domain element of "umap" scheduled after any of * the corresponding image elements by the tree rooted at * the sequence node "node"? * * This happens in particular if any domain element is * contained in a later child than one containing a range element or * if the condition holds within a given child in the sequence. * The later part of the condition is checked by after_in_set. */ static isl_bool after_in_sequence(__isl_keep isl_union_map *umap, __isl_keep isl_schedule_node *node) { … } /* Is any domain element of "umap" scheduled after any of * the corresponding image elements by the tree rooted at "node"? * * If "umap" is empty, then clearly there is no such element. * Otherwise, consider the different types of nodes separately. */ static isl_bool after_in_tree(__isl_keep isl_union_map *umap, __isl_keep isl_schedule_node *node) { … } /* Is any domain element of "map1" scheduled after any domain * element of "map2" by the subtree underneath the current band node, * while at the same time being scheduled together by the current * band node, i.e., by "map1" and "map2? * * If the child of the current band node is a leaf, then * no element can be scheduled after any other element. * * Otherwise, we construct a relation between domain elements * of "map1" and domain elements of "map2" that are scheduled * together and then check if the subtree underneath the current * band node determines their relative order. */ static isl_bool after_in_subtree(__isl_keep isl_ast_build *build, __isl_keep isl_map *map1, __isl_keep isl_map *map2) { … } /* Internal data for any_scheduled_after. * * "build" is the build in which the AST is constructed. * "depth" is the number of loops that have already been generated * "group_coscheduled" is a local copy of options->ast_build_group_coscheduled * "domain" is an array of set-map pairs corresponding to the different * iteration domains. The set is the schedule domain, i.e., the domain * of the inverse schedule, while the map is the inverse schedule itself. */ struct isl_any_scheduled_after_data { … }; /* Is any element of domain "i" scheduled after any element of domain "j" * (for a common iteration of the first data->depth loops)? * * data->domain[i].set contains the domain of the inverse schedule * for domain "i", i.e., elements in the schedule domain. * * If we are inside a band of a schedule tree and there is a pair * of elements in the two domains that is schedule together by * the current band, then we check if any element of "i" may be schedule * after element of "j" by the descendants of the band node. * * If data->group_coscheduled is set, then we also return 1 if there * is any pair of elements in the two domains that are scheduled together. */ static isl_bool any_scheduled_after(int i, int j, void *user) { … } /* Look for independent components at the current depth and generate code * for each component separately. The resulting lists of grafts are * merged in an attempt to combine grafts with identical guards. * * Code for two domains can be generated separately if all the elements * of one domain are scheduled before (or together with) all the elements * of the other domain. We therefore consider the graph with as nodes * the domains and an edge between two nodes if any element of the first * node is scheduled after any element of the second node. * If the ast_build_group_coscheduled is set, then we also add an edge if * there is any pair of elements in the two domains that are scheduled * together. * Code is then generated (by generate_component) * for each of the strongly connected components in this graph * in their topological order. * * Since the test is performed on the domain of the inverse schedules of * the different domains, we precompute these domains and store * them in data.domain. */ static __isl_give isl_ast_graft_list *generate_components( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) { … } /* Generate code for the next level (and all inner levels). * * If "executed" is empty, i.e., no code needs to be generated, * then we return an empty list. * * If we have already generated code for all loop levels, then we pass * control to generate_inner_level. * * If "executed" lives in a single space, i.e., if code needs to be * generated for a single domain, then there can only be a single * component and we go directly to generate_shifted_component. * Otherwise, we call generate_components to detect the components * and to call generate_component on each of them separately. */ static __isl_give isl_ast_graft_list *generate_next_level( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) { … } /* Internal data structure used by isl_ast_build_node_from_schedule_map. * internal, executed and build are the inputs to generate_code. * list collects the output. */ struct isl_generate_code_data { … }; /* Given an inverse schedule in terms of the external build schedule, i.e., * * [E -> S] -> D * * with E the external build schedule and S the additional schedule "space", * reformulate the inverse schedule in terms of the internal schedule domain, * i.e., return * * [I -> S] -> D * * We first obtain a mapping * * I -> E * * take the inverse and the product with S -> S, resulting in * * [I -> S] -> [E -> S] * * Applying the map to the input produces the desired result. */ static __isl_give isl_union_map *internal_executed( __isl_take isl_union_map *executed, __isl_keep isl_space *space, __isl_keep isl_ast_build *build) { … } /* Generate an AST that visits the elements in the range of data->executed * in the relative order specified by the corresponding domain element(s) * for those domain elements that belong to "set". * Add the result to data->list. * * The caller ensures that "set" is a universe domain. * "space" is the space of the additional part of the schedule. * It is equal to the space of "set" if build->domain is parametric. * Otherwise, it is equal to the range of the wrapped space of "set". * * If the build space is not parametric and * if isl_ast_build_node_from_schedule_map * was called from an outside user (data->internal not set), then * the (inverse) schedule refers to the external build domain and needs to * be transformed to refer to the internal build domain. * * If the build space is parametric, then we add some of the parameter * constraints to the executed relation. Adding these constraints * allows for an earlier detection of conflicts in some cases. * However, we do not want to divide the executed relation into * more disjuncts than necessary. We therefore approximate * the constraints on the parameters by a single disjunct set. * * The build is extended to include the additional part of the schedule. * If the original build space was not parametric, then the options * in data->build refer only to the additional part of the schedule * and they need to be adjusted to refer to the complete AST build * domain. * * After having adjusted inverse schedule and build, we start generating * code with the outer loop of the current code generation * in generate_next_level. * * If the original build space was not parametric, we undo the embedding * on the resulting isl_ast_node_list so that it can be used within * the outer AST build. */ static isl_stat generate_code_in_space(struct isl_generate_code_data *data, __isl_take isl_set *set, __isl_take isl_space *space) { … } /* Generate an AST that visits the elements in the range of data->executed * in the relative order specified by the corresponding domain element(s) * for those domain elements that belong to "set". * Add the result to data->list. * * The caller ensures that "set" is a universe domain. * * If the build space S is not parametric, then the space of "set" * need to be a wrapped relation with S as domain. That is, it needs * to be of the form * * [S -> T] * * Check this property and pass control to generate_code_in_space * passing along T. * If the build space is not parametric, then T is the space of "set". */ static isl_stat generate_code_set(__isl_take isl_set *set, void *user) { … } /* Generate an AST that visits the elements in the range of "executed" * in the relative order specified by the corresponding domain element(s). * * "build" is an isl_ast_build that has either been constructed by * isl_ast_build_from_context or passed to a callback set by * isl_ast_build_set_create_leaf. * In the first case, the space of the isl_ast_build is typically * a parametric space, although this is currently not enforced. * In the second case, the space is never a parametric space. * If the space S is not parametric, then the domain space(s) of "executed" * need to be wrapped relations with S as domain. * * If the domain of "executed" consists of several spaces, then an AST * is generated for each of them (in arbitrary order) and the results * are concatenated. * * If "internal" is set, then the domain "S" above refers to the internal * schedule domain representation. Otherwise, it refers to the external * representation, as returned by isl_ast_build_get_schedule_space. * * We essentially run over all the spaces in the domain of "executed" * and call generate_code_set on each of them. */ static __isl_give isl_ast_graft_list *generate_code( __isl_take isl_union_map *executed, __isl_take isl_ast_build *build, int internal) { … } /* Generate an AST that visits the elements in the domain of "schedule" * in the relative order specified by the corresponding image element(s). * * "build" is an isl_ast_build that has either been constructed by * isl_ast_build_from_context or passed to a callback set by * isl_ast_build_set_create_leaf. * In the first case, the space of the isl_ast_build is typically * a parametric space, although this is currently not enforced. * In the second case, the space is never a parametric space. * If the space S is not parametric, then the range space(s) of "schedule" * need to be wrapped relations with S as domain. * * If the range of "schedule" consists of several spaces, then an AST * is generated for each of them (in arbitrary order) and the results * are concatenated. * * We first initialize the local copies of the relevant options. * We do this here rather than when the isl_ast_build is created * because the options may have changed between the construction * of the isl_ast_build and the call to isl_generate_code. * * The main computation is performed on an inverse schedule (with * the schedule domain in the domain and the elements to be executed * in the range) called "executed". */ __isl_give isl_ast_node *isl_ast_build_node_from_schedule_map( __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule) { … } /* The old name for isl_ast_build_node_from_schedule_map. * It is being kept for backward compatibility, but * it will be removed in the future. */ __isl_give isl_ast_node *isl_ast_build_ast_from_schedule( __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule) { … } /* Generate an AST that visits the elements in the domain of "executed" * in the relative order specified by the leaf node "node". * * The relation "executed" maps the outer generated loop iterators * to the domain elements executed by those iterations. * * Simply pass control to generate_inner_level. * Note that the current build does not refer to any band node, so * that generate_inner_level will not try to visit the child of * the leaf node. * * If multiple statement instances reach a leaf, * then they can be executed in any order. * Group the list of grafts based on shared guards * such that identical guards are only generated once * when the list is eventually passed on to isl_ast_graft_list_fuse. */ static __isl_give isl_ast_graft_list *build_ast_from_leaf( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed) { … } /* Check that the band partial schedule "partial" does not filter out * any statement instances, as specified by the range of "executed". */ static isl_stat check_band_schedule_total_on_instances( __isl_keep isl_multi_union_pw_aff *partial, __isl_keep isl_union_map *executed) { … } /* Generate an AST that visits the elements in the domain of "executed" * in the relative order specified by the band node "node" and its descendants. * * The relation "executed" maps the outer generated loop iterators * to the domain elements executed by those iterations. * * If the band is empty, we continue with its descendants. * Otherwise, we extend the build and the inverse schedule with * the additional space/partial schedule and continue generating * an AST in generate_next_level. * As soon as we have extended the inverse schedule with the additional * partial schedule, we look for equalities that may exists between * the old and the new part. */ static __isl_give isl_ast_graft_list *build_ast_from_band( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed) { … } /* Hoist a list of grafts (in practice containing a single graft) * from "sub_build" (which includes extra context information) * to "build". * * In particular, project out all additional parameters introduced * by the context node from the enforced constraints and the guard * of the single graft. */ static __isl_give isl_ast_graft_list *hoist_out_of_context( __isl_take isl_ast_graft_list *list, __isl_keep isl_ast_build *build, __isl_keep isl_ast_build *sub_build) { … } /* Generate an AST that visits the elements in the domain of "executed" * in the relative order specified by the context node "node" * and its descendants. * * The relation "executed" maps the outer generated loop iterators * to the domain elements executed by those iterations. * * The context node may introduce additional parameters as well as * constraints on the outer schedule dimensions or original parameters. * * We add the extra parameters to a new build and the context * constraints to both the build and (as a single disjunct) * to the domain of "executed". Since the context constraints * are specified in terms of the input schedule, we first need * to map them to the internal schedule domain. * * After constructing the AST from the descendants of "node", * we combine the list of grafts into a single graft within * the new build, in order to be able to exploit the additional * context constraints during this combination. * * Additionally, if the current node is the outermost node in * the schedule tree (apart from the root domain node), we generate * all pending guards, again to be able to exploit the additional * context constraints. We currently do not do this for internal * context nodes since we may still want to hoist conditions * to outer AST nodes. * * If the context node introduced any new parameters, then they * are removed from the set of enforced constraints and guard * in hoist_out_of_context. */ static __isl_give isl_ast_graft_list *build_ast_from_context( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed) { … } /* Generate an AST that visits the elements in the domain of "executed" * in the relative order specified by the expansion node "node" and * its descendants. * * The relation "executed" maps the outer generated loop iterators * to the domain elements executed by those iterations. * * We expand the domain elements by the expansion and * continue with the descendants of the node. */ static __isl_give isl_ast_graft_list *build_ast_from_expansion( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed) { … } /* Generate an AST that visits the elements in the domain of "executed" * in the relative order specified by the extension node "node" and * its descendants. * * The relation "executed" maps the outer generated loop iterators * to the domain elements executed by those iterations. * * Extend the inverse schedule with the extension applied to current * set of generated constraints. Since the extension if formulated * in terms of the input schedule, it first needs to be transformed * to refer to the internal schedule. */ static __isl_give isl_ast_graft_list *build_ast_from_extension( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed) { … } /* Generate an AST that visits the elements in the domain of "executed" * in the relative order specified by the filter node "node" and * its descendants. * * The relation "executed" maps the outer generated loop iterators * to the domain elements executed by those iterations. * * We simply intersect the iteration domain (i.e., the range of "executed") * with the filter and continue with the descendants of the node, * unless the resulting inverse schedule is empty, in which * case we return an empty list. * * If the result of the intersection is equal to the original "executed" * relation, then keep the original representation since the intersection * may have unnecessarily broken up the relation into a greater number * of disjuncts. */ static __isl_give isl_ast_graft_list *build_ast_from_filter( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed) { … } /* Generate an AST that visits the elements in the domain of "executed" * in the relative order specified by the guard node "node" and * its descendants. * * The relation "executed" maps the outer generated loop iterators * to the domain elements executed by those iterations. * * Ensure that the associated guard is enforced by the outer AST * constructs by adding it to the guard of the graft. * Since we know that we will enforce the guard, we can also include it * in the generated constraints used to construct an AST for * the descendant nodes. */ static __isl_give isl_ast_graft_list *build_ast_from_guard( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed) { … } /* Call the before_each_mark callback, if requested by the user. * * Return 0 on success and -1 on error. * * The caller is responsible for recording the current inverse schedule * in "build". */ static isl_stat before_each_mark(__isl_keep isl_id *mark, __isl_keep isl_ast_build *build) { … } /* Call the after_each_mark callback, if requested by the user. * * The caller is responsible for recording the current inverse schedule * in "build". */ static __isl_give isl_ast_graft *after_each_mark( __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build) { … } /* Generate an AST that visits the elements in the domain of "executed" * in the relative order specified by the mark node "node" and * its descendants. * * The relation "executed" maps the outer generated loop iterators * to the domain elements executed by those iterations. * Since we may be calling before_each_mark and after_each_mark * callbacks, we record the current inverse schedule in the build. * * We generate an AST for the child of the mark node, combine * the graft list into a single graft and then insert the mark * in the AST of that single graft. */ static __isl_give isl_ast_graft_list *build_ast_from_mark( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed) { … } static __isl_give isl_ast_graft_list *build_ast_from_schedule_node( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed); /* Generate an AST that visits the elements in the domain of "executed" * in the relative order specified by the sequence (or set) node "node" and * its descendants. * * The relation "executed" maps the outer generated loop iterators * to the domain elements executed by those iterations. * * We simply generate an AST for each of the children and concatenate * the results. */ static __isl_give isl_ast_graft_list *build_ast_from_sequence( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed) { … } /* Generate an AST that visits the elements in the domain of "executed" * in the relative order specified by the node "node" and its descendants. * * The relation "executed" maps the outer generated loop iterators * to the domain elements executed by those iterations. * * The node types are handled in separate functions. * Set nodes are currently treated in the same way as sequence nodes. * The children of a set node may be executed in any order, * including the order of the children. */ static __isl_give isl_ast_graft_list *build_ast_from_schedule_node( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed) { … } /* Generate an AST that visits the elements in the domain of "executed" * in the relative order specified by the (single) child of "node" and * its descendants. * * The relation "executed" maps the outer generated loop iterators * to the domain elements executed by those iterations. * * This function is never called on a leaf, set or sequence node, * so the node always has exactly one child. */ static __isl_give isl_ast_graft_list *build_ast_from_child( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, __isl_take isl_union_map *executed) { … } /* Generate an AST that visits the elements in the domain of the domain * node "node" in the relative order specified by its descendants. * * An initial inverse schedule is created that maps a zero-dimensional * schedule space to the node domain. * The input "build" is assumed to have a parametric domain and * is replaced by the same zero-dimensional schedule space. * * We also add some of the parameter constraints in the build domain * to the executed relation. Adding these constraints * allows for an earlier detection of conflicts in some cases. * However, we do not want to divide the executed relation into * more disjuncts than necessary. We therefore approximate * the constraints on the parameters by a single disjunct set. */ static __isl_give isl_ast_node *build_ast_from_domain( __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node) { … } /* Generate an AST that visits the elements in the domain of "schedule" * in the relative order specified by the schedule tree. * * "build" is an isl_ast_build that has been created using * isl_ast_build_alloc or isl_ast_build_from_context based * on a parametric set. * * The construction starts at the root node of the schedule, * which is assumed to be a domain node. */ __isl_give isl_ast_node *isl_ast_build_node_from_schedule( __isl_keep isl_ast_build *build, __isl_take isl_schedule *schedule) { … }