chromium/infra/config/lib/builder_health_indicators.star

# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Library for defining builder health indicator thresholds.

See chromium/src -- //docs/infra/builder_health_indicators.md for more info.
"""

load("@stdlib//internal/graph.star", "graph")
load("@stdlib//internal/luci/common.star", "keys")
load("//project.star", "settings")
load("./builder_exemptions.star", "exempted_from_contact_builders")
load("./nodes.star", "nodes")
load("./structs.star", "structs")

_HEALTH_SPEC = nodes.create_bucket_scoped_node_type("health_spec")

# See https://source.chromium.org/chromium/infra/infra/+/main:go/src/infra/cr_builder_health/src_config.go
# for all configurable thresholds.
_default_specs = {
    "Unhealthy": struct(
        score = 5,
        period_days = 7 if settings.project.startswith("chromium") else 14,
        # If any of these thresholds are exceeded, the builder will be deemed
        # unhealthy.
        # Setting a value of None will ignore that threshold
        infra_fail_rate = struct(
            average = 0.05,
        ),
        fail_rate = struct(
            average = 0.2,
        ),
        build_time = struct(
            p50_mins = None,
        ),
        pending_time = struct(
            p50_mins = 20 if settings.project.startswith("chromium") else 60,
        ),
    ),
    "Low Value": struct(
        score = 1,
        period_days = 90,
        # If any of these thresholds are met, the builder will be deemed
        # low-value and will be considered for deletion.
        # Setting a value of None will ignore that threshold
        fail_rate = struct(
            average = 0.99,
        ),
    ),
}

_blank_unhealthy_thresholds = struct(
    infra_fail_rate = struct(
        average = None,
    ),
    fail_rate = struct(
        average = None,
    ),
    build_time = struct(
        p50_mins = None,
    ),
    pending_time = struct(
        p50_mins = None,
    ),
)

blank_low_value_thresholds = struct(
    fail_rate = struct(
        average = None,
    ),
)

DEFAULT = {
    "Unhealthy": struct(
        score = 5,
        period_days = 7 if settings.project.startswith("chromium") else 14,
        _default = "_default",
    ),
    "Low Value": struct(
        score = 1,
        period_days = 90,
        _default = "_default",
    ),
}

# Users define the specs as {problem_name -> problem_spec} for aesthetic reasons
# So all user-exposed functions expect a dictionary.
# We then convert that into a list of [problem_specs] so the object encapsulates
# its own name, for ease of processing
def unhealthy_thresholds(
        fail_rate = struct(),
        infra_fail_rate = struct(),
        build_time = struct(),
        pending_time = struct()):
    thresholds = {"fail_rate": fail_rate, "infra_fail_rate": infra_fail_rate, "build_time": build_time, "pending_time": pending_time}
    fail_if_any_none_val(thresholds)

    return structs.evolve(_blank_unhealthy_thresholds, **thresholds)

def low_value_thresholds(
        fail_rate = struct()):
    thresholds = {"fail_rate": fail_rate}
    fail_if_any_none_val(thresholds)

    return structs.evolve(blank_low_value_thresholds, **thresholds)

def fail_if_any_none_val(vals):
    for k, v in vals.items():
        if v == None:
            fail(k + " threshold was None. Thresholds can't be None. Use an empty struct() instead")

def modified_default(modifications):
    return _merge_mods(_default_specs, modifications)

def _merge_mods(base, modifications):
    spec = dict(base)

    for mod_name, mod in modifications.items():
        mods_proto = structs.to_proto_properties(mod)
        if len(mods_proto) == 0:
            fail("Modifications for health spec \"{}\" were empty.".format(mod_name))

        if mod_name not in spec:
            spec[mod_name] = mod
        else:
            spec[mod_name] = structs.evolve(spec[mod_name], **mods_proto)

    return spec

def _exempted_from_contact(bucket, builder):
    return builder in exempted_from_contact_builders.get(bucket, [])

def register_health_spec(bucket, name, specs, contact_team_email):
    if not contact_team_email and not _exempted_from_contact(bucket, name):
        fail("Builder " + name + " must have a contact_team_email. All new builders must specify a team email for contact in case the builder stops being healthy or providing value.")
    elif contact_team_email and _exempted_from_contact(bucket, name):
        fail("Need to remove builder " + bucket + "/" + name + " from exempted_from_contact_builders")

    if specs:
        spec = struct(
            problem_specs = _convert_specs(specs),
            contact_team_email = contact_team_email,
        )
        health_spec_key = _HEALTH_SPEC.add(
            bucket,
            name,
            props = structs.to_proto_properties(spec),
            idempotent = True,
        )

        graph.add_edge(keys.project(), health_spec_key)

def _convert_specs(specs):
    """Users define the specs as {problem_name -> problem_spec} for aesthetic reasons,

    So all user-exposed functions expect a dictionary.
    We then convert that into a list of [problem_specs] so the object encapsulates its own name, for ease of processing
    """
    converted_specs = []
    for name, spec in specs.items():
        thresholds_spec = structs.to_proto_properties(spec)
        thresholds_spec.pop("score")
        thresholds_spec.pop("period_days")
        converted_specs.append(struct(
            name = name,
            score = spec.score,
            period_days = spec.period_days,
            thresholds = thresholds_spec,
        ))

    return converted_specs

def _generate_health_specs(ctx):
    specs = {}

    for node in graph.children(keys.project(), _HEALTH_SPEC.kind):
        bucket = node.key.container.id
        builder = node.key.id
        specs.setdefault(bucket, {})[builder] = node.props

    result = {
        "_default_specs": _convert_specs(_default_specs),
        "specs": specs,
    }

    ctx.output["health-specs/health-specs.json"] = json.indent(json.encode(result), indent = "  ")

health_spec = struct(
    DEFAULT = DEFAULT,
    unhealthy_thresholds = unhealthy_thresholds,
    low_value_thresholds = low_value_thresholds,
    modified_default = modified_default,
)

lucicfg.generator(_generate_health_specs)