chromium/tools/grit/grit/tool/update_resource_ids/assigner.py

# Copyright 2019 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Assign IDs to resource_ids file based on usage, while preserving structure.

resource_ids assignment is divided into two parts:
(A) Coarse assignment: Assigns start IDs of items.
(B) Quota assignment: Assigns per-tag ID allotments for a given item, allowing
    padding for IDs.

These parts are interdependent: Start IDs (A) of an item depends on ID
allotments (B) of *other* items; and ID allotment (B) of an item depends on its
start IDs (A) to compute alignment.

(B) hides padding and alignment details of tags so that (A) can be abstracted
into the graph construction and traversal problem in DagCoarseIdAssigner.
"""

import math

from grit.tool.update_resource_ids import common


class Aligner:
  """Helper to allot IDs, given start ID and ID usage.

  Args:
    expand: Scale factor relative to ID usage. Must be >= 1.0.
    slack: Minimum number of reserved ID at end. Must be >= 0.
    align: ID alignment of results. Must be >= 1.
  """

  def __init__(self, expand=1.0, slack=0, align=1):
    assert expand >= 1.0 and slack >= 0 and align >= 1
    self._expand = expand
    self._slack = slack
    self._align = align

  def Calc(self, cur_id, usage):
    quota = max(int(math.ceil(usage * self._expand)), usage + self._slack)
    return common.AlignUp(cur_id + quota, self._align)


class QuotaAssigner:
  """Main class for (B), for ID allotment of tags for an item."""

  def __init__(self, aligner):
    self._aligner = aligner

  def Gen(self, item, start_id):
    """Generates per-tag *end* ID in |item|, succeeding |start_id|."""
    cur_id = start_id
    for tag in item.tags:  # Sorted by |tag.lo|.
      cur_id = self._aligner.Calc(cur_id, tag.usage)
      yield tag, cur_id


class BaseCoarseIdAssigner:
  """Base class for coarse assignment."""

  def __init__(self, item_list, align):
    self._item_list = item_list
    self._align = align

  def GenStartIds(self):
    """Visits |_item_list| and yields (|item|, new |start_id|).

    Visit follows dependency order: If item B succeeds item A, then A is visited
    before B. Caller must call FeedWeight() to assign ID allotment.
    """
    raise NotImplementedError()

  def FeedWeight(self, item, weight):
    """Callback to assign number of IDs allotted to |item|."""
    raise NotImplementedError()


class NaiveCoarseIdAssigner(BaseCoarseIdAssigner):
  """CoarseIdAssigner that assigns item with non-overlapping start IDs."""

  def __init__(self, item_list, align):
    super().__init__(item_list, align)
    first_id = self._item_list[0].tags[0].id
    self._cur_id = common.AlignUp(first_id, self._align)

  def GenStartIds(self):
    """Visits items in array order."""
    for item in self._item_list:
      yield item, self._cur_id

  def FeedWeight(self, item, weight):
    self._cur_id = common.AlignUp(self._cur_id + weight, self._align)


class DagCoarseIdAssigner(BaseCoarseIdAssigner):
  r"""CoarseIdAssigner that preserves existing structure.

Start ID assignment in resource_ids is structured a Series-Parallel Graph, which
is a directed, acyclic multi-graph generated by the following:
* Start: Single directed edge. S <-- T.
* Operation 1: A <-- B becomes A <-- C <-- B.
* Operation 2: A <-- B becomes A <== B (add parallel edge).

Each vertex (A, B, ...) is a start ID. S = globally minimal ID. T = \infty is an
implicit sentinel. Each edge maps to an item, and edge weight is ID allotment.
The edge A <-- B means "A's ID assignment needs to be determined before B's"
(i.e., "B depends on A"), and requires A < B.

resource_ids stores a "flattened" representation of the graph, as a list of
items (with meta data). Thus coarse ID assignment consists of the following:
(1) Process list of items (with old start ID and meta data) to rebuild graph.
(2) Traverse graph in an order that satisfies dependencies.
(3) When vertex A has its ID assigned, the weight of each edge "A <--" (i.e., an
    item) can have its weight (ID allotment) computed via quota assignment of A.
(4) New start IDs satisfy A + w <= B for each edge A <-- B with weight w > 0.

The key algorithm challenge is (1). Note that it does not need weight details,
so we only assume A < B whenever A <-- B. Now we're faced with 2 subproblems:
(1a) How to represent the graph as a list of integers (with meta data)?
(1b) Given the list representation, how to recover the graph?

For (1a), we start with DFS traversal of the (transposed, i.e., reversed) graph
starting from S, and apply the following:
* For each edge A <-- B traversed, emit A into sequence,
* Traverse a B <-- Y only when all X <-- B have been traversed.

The resulting sequence has the length as the number of edges, and has the useful
property that a vertex's dependencies always appear before the vertex itself!
Note this the sentinel T is omitted.

Example 1:
  S <-- A <-- B <-- C <-- T  => "SABC".

Example 2:
  S <-- A <-- B <-- C <-- T  => "SA|AB|SDEC|SF",
  |     |     |     |     |  or "SF|SA|AB|SDEC",
  |     + <-- +     |     |  or "SDE|SA|ABC|SF",
  |                 |     |  or "SF|SDE|SA|ABC".
  + <---D <-- E <---+     |
  |                       |
  + <-- F <---------------+

Here, "X|Y" denotes backtracking between visiting X and visiting Y. This appears
if and only if X >= Y, so "|" an optional (but illustrative) character that's
not in the actual output. We will use it consistently in comments, and so the
absence of "|" denotes the converse. For example, "XY" implies X < Y.

In terms of the basic operations:
* Start: S <-- T => "S".
* Operation 1: "...AB..." => "...ACB..." (or "...A" => "...AB").
* Operation 2: "...AB..." => "...A|AB..." (or "...A" => "...A|A").

For Example 2, a viable "evolution path" is:
"S" => "S|S" => "SC|S" => "S|SC|S" => "SA|SC|S" => "SAB|SC|S" => "SA|AB|SC|S"
    => "SA|AB|SDC|S" => "SA|AB|SDEC|S" => "SA|AB|SDEC|SF".
(Alternative: "S|S" => "S|SC" => etc.).

Note: {A, ...} are *unlabelled* integers, and "spurious equalities" such as
A = D or A = F can occur!

For (1b), we wish to build the graph from the sequence. This requires (1a) to be
injective (up to isomorphism). Unfortunately, this does not always hold.
Example:
  S <-- A <-- C <-- D <-- T  => "SA|SBCD".
  |           |
  + <-- B <---+
vs.
  S <----- A <----- D <-- T  => "SA|SBCD".
  |                 |
  + <-- B <-- C <---+

To fix this, we prepend a "join" label (*) to each vertex that has multiple
dependencies. With this, the example above produce different results:
   "SA|SB*CD" != "SA|SBC*D".

Unfortunately, this is also inadequate.  Example:
  S <-------- B <-- T  => "S|S|S|S*A*B",
  |           |
  + <---------+
  |           |
  + <-- A <---+
  |     |
  + <---+
vs.
  S <-------- B <-- T  => "S|S|S|S*A*B".
  |           |
  + <---A <---+
  |     |
  + <---+
  |     |
  + <---+

To fix this, we also label the number of dependencies. In text representation,
we just show multiple (#dependencies - 1) copies of '*'. Now we have:
  "S|S|S|S*A**B" != "S|S|S|S**A*B".

The "join" label with count adequately addresses the issue (proof omitted). In
the resource_ids files, these are stored as the "join" field of an item's meta
data.

Additional comments for (1b) and other steps are detailed below.
"""

  class DagNode:
    """An element of the implicit graph, corresponding to an item.

    This actually represents an edge-vertex pair, corresponding to an item.
    A vertex is represented by a collection of DagNode that uses |sib| to link
    to a "representative node". The representative node, in turn, holds the list
    of all |deps| (dependencies) of the vertex.
    """

    def __init__(self, item, old_id):
      self.item = item
      self.old_id = old_id
      self.new_id = None
      self.weight = None

  def __init__(self, item_list, align):
    super().__init__(item_list, align)
    self._node_dict = {}  # Maps from |lo| to item.

  def GenStartIds(self):
    """Traverses implicit graph and yields new IDs.

    Algorithm: Process |old_id| of items sequentially. Successive items A and B
    can be "AB" (A < B), "A*...B" (A < B), or "A|B" (A >= B). "AB" and "A*...B"
    imply A <-- B, and are accumulated in |trace|. "A|B" are jumps that rewinds
    |trace| to the latest B (must exist), and A is pushed into |jumps|. A join
    "A*...B" also pops |num_join - 1| items {X_i} in |jump|, and X_i <-- B. In
    the end, unprocessed elements in |jumps| all link to sentinel T, and can be
    ignored.
    """
    # DagNode stack of "A" when "AB" is found (increasing |old_id|).
    trace = []
    # DagNode stack of "A" when "A|B" jumps is found.
    jumps = []
    for item in self._item_list:  # Sorted by |lo|.
      meta = item.meta
      # |num_join| indicates "*" in "A*...B", and specify B's dependencies: +1
      # from A, and +count("*") from |jumps|.
      num_join = meta['join'].val if meta and 'join' in meta else None
      node = DagCoarseIdAssigner.DagNode(item, item.tags[0].id)
      self._node_dict[item.lo] = node
      if trace:
        if trace[-1].old_id >= node.old_id:  # "A|B".
          if num_join:
            raise ValueError('Cannot join on jump: %d' % node.old_id)
          jumps.append(trace[-1])  # Add A to |jumps|, for later join.
          while trace and trace[-1].old_id > node.old_id:  # Rewind to find B.
            trace.pop()
          if not trace or trace[-1].old_id != node.old_id:  #
            raise ValueError('Cannot jump to unvisited: %d' % node.old_id)
          node.new_id = trace.pop().new_id  # Copy B & remove. Will re-add B.
        else:  # "AB" or "A*...B".
          node.new_id = trace[-1].new_id + trace[-1].weight  # A --> B
          if num_join:  # "A*...B".
            for _ in range(1, num_join):
              t = jumps.pop()
              node.new_id = max(node.new_id, t.new_id + t.weight)  # X_i --> B.
      else:
        node.new_id = node.old_id  # Initial S.
      trace.append(node)  # Add B.
      align = meta['align'].val if meta and 'align' in meta else self._align
      node.new_id = common.AlignUp(node.new_id, align)
      yield node.item, node.new_id
      # Expect caller to calling FreedWeight() and update |node.weight|.

  def FeedWeight(self, item, weight):
    self._node_dict[item.lo].weight = weight


def GenerateNewIds(item_list, use_naive):
  """Visits all tags in |item_list| and generates new ids.

  New ids are generated based on old ids and usages.
  """
  Assigner = NaiveCoarseIdAssigner if use_naive else DagCoarseIdAssigner
  coarse_id_assigner = Assigner(item_list, 10)
  quota_assigner = QuotaAssigner(Aligner(expand=1.15, slack=3, align=10))
  for item, start_id in coarse_id_assigner.GenStartIds():  # Topo-sorted.
    cur_id = start_id
    for tag, next_id in quota_assigner.Gen(item, start_id):  # Sorted by |lo|.
      yield tag, cur_id
      cur_id = next_id
    coarse_id_assigner.FeedWeight(item, next_id - start_id)