chromium/build/locale_tool.py

#!/usr/bin/env vpython3
# Copyright 2019 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Helper script used to manage locale-related files in Chromium.

This script is used to check, and potentially fix, many locale-related files
in your Chromium workspace, such as:

  - GRIT input files (.grd) and the corresponding translations (.xtb).

  - BUILD.gn files listing Android localized resource string resource .xml
    generated by GRIT for all supported Chrome locales. These correspond to
    <output> elements that use the type="android" attribute.

The --scan-dir <dir> option can be used to check for all files under a specific
directory, and the --fix-inplace option can be used to try fixing any file
that doesn't pass the check.

This can be very handy to avoid tedious and repetitive work when adding new
translations / locales to the Chrome code base, since this script can update
said input files for you.

Important note: checks and fix may fail on some input files. For example
remoting/resources/remoting_strings.grd contains an in-line comment element
inside its <outputs> section that breaks the script. The check will fail, and
trying to fix it too, but at least the file will not be modified.
"""


import argparse
import json
import os
import re
import shutil
import subprocess
import sys
import unittest

# Assume this script is under build/
_SCRIPT_DIR = os.path.dirname(__file__)
_SCRIPT_NAME = os.path.join(_SCRIPT_DIR, os.path.basename(__file__))
_TOP_SRC_DIR = os.path.join(_SCRIPT_DIR, '..')

# Need to import android/gyp/util/resource_utils.py here.
sys.path.insert(0, os.path.join(_SCRIPT_DIR, 'android/gyp'))

from util import build_utils
from util import resource_utils


# This locale is the default and doesn't have translations.
_DEFAULT_LOCALE = 'en-US'

# Misc terminal codes to provide human friendly progress output.
_CONSOLE_CODE_MOVE_CURSOR_TO_COLUMN_0 = '\x1b[0G'
_CONSOLE_CODE_ERASE_LINE = '\x1b[K'
_CONSOLE_START_LINE = (
    _CONSOLE_CODE_MOVE_CURSOR_TO_COLUMN_0 + _CONSOLE_CODE_ERASE_LINE)

##########################################################################
##########################################################################
#####
#####    G E N E R I C   H E L P E R   F U N C T I O N S
#####
##########################################################################
##########################################################################

def _FixChromiumLangAttribute(lang):
  """Map XML "lang" attribute values to Chromium locale names."""
  _CHROMIUM_LANG_FIXES = {
      'en': 'en-US',  # For now, Chromium doesn't have an 'en' locale.
      'iw': 'he',  # 'iw' is the obsolete form of ISO 639-1 for Hebrew
      'no': 'nb',  # 'no' is used by the Translation Console for Norwegian (nb).
  }
  return _CHROMIUM_LANG_FIXES.get(lang, lang)


def _FixTranslationConsoleLocaleName(locale):
  _FIXES = {
      'nb': 'no',  # Norwegian.
      'he': 'iw',  # Hebrew
  }
  return _FIXES.get(locale, locale)


def _CompareLocaleLists(list_a, list_expected, list_name):
  """Compare two lists of locale names. Print errors if they differ.

  Args:
    list_a: First list of locales.
    list_expected: Second list of locales, as expected.
    list_name: Name of list printed in error messages.
  Returns:
    On success, return False. On error, print error messages and return True.
  """
  errors = []
  missing_locales = sorted(set(list_a) - set(list_expected))
  if missing_locales:
    errors.append('Missing locales: %s' % missing_locales)

  extra_locales = sorted(set(list_expected) - set(list_a))
  if extra_locales:
    errors.append('Unexpected locales: %s' % extra_locales)

  if errors:
    print('Errors in %s definition:' % list_name)
    for error in errors:
      print('  %s\n' % error)
    return True

  return False


def _BuildIntervalList(input_list, predicate):
  """Find ranges of contiguous list items that pass a given predicate.

  Args:
    input_list: An input list of items of any type.
    predicate: A function that takes a list item and return True if it
      passes a given test.
  Returns:
    A list of (start_pos, end_pos) tuples, where all items in
    [start_pos, end_pos) pass the predicate.
  """
  result = []
  size = len(input_list)
  start = 0
  while True:
    # Find first item in list that passes the predicate.
    while start < size and not predicate(input_list[start]):
      start += 1

    if start >= size:
      return result

    # Find first item in the rest of the list that does not pass the
    # predicate.
    end = start + 1
    while end < size and predicate(input_list[end]):
      end += 1

    result.append((start, end))
    start = end + 1


def _SortListSubRange(input_list, start, end, key_func):
  """Sort an input list's sub-range according to a specific key function.

  Args:
    input_list: An input list.
    start: Sub-range starting position in list.
    end: Sub-range limit position in list.
    key_func: A function that extracts a sort key from a line.
  Returns:
    A copy of |input_list|, with all items in [|start|, |end|) sorted
    according to |key_func|.
  """
  result = input_list[:start]
  inputs = []
  for pos in xrange(start, end):
    line = input_list[pos]
    key = key_func(line)
    inputs.append((key, line))

  for _, line in sorted(inputs):
    result.append(line)

  result += input_list[end:]
  return result


def _SortElementsRanges(lines, element_predicate, element_key):
  """Sort all elements of a given type in a list of lines by a given key.

  Args:
    lines: input lines.
    element_predicate: predicate function to select elements to sort.
    element_key: lambda returning a comparison key for each element that
      passes the predicate.
  Returns:
    A new list of input lines, with lines [start..end) sorted.
  """
  intervals = _BuildIntervalList(lines, element_predicate)
  for start, end in intervals:
    lines = _SortListSubRange(lines, start, end, element_key)

  return lines


def _ProcessFile(input_file, locales, check_func, fix_func):
  """Process a given input file, potentially fixing it.

  Args:
    input_file: Input file path.
    locales: List of Chrome locales to consider / expect.
    check_func: A lambda called to check the input file lines with
      (input_lines, locales) argument. It must return an list of error
      messages, or None on success.
    fix_func: None, or a lambda called to fix the input file lines with
      (input_lines, locales). It must return the new list of lines for
      the input file, and may raise an Exception in case of error.
  Returns:
    True at the moment.
  """
  print('%sProcessing %s...' % (_CONSOLE_START_LINE, input_file), end=' ')
  sys.stdout.flush()
  with open(input_file) as f:
    input_lines = f.readlines()
  errors = check_func(input_file, input_lines, locales)
  if errors:
    print('\n%s%s' % (_CONSOLE_START_LINE, '\n'.join(errors)))
    if fix_func:
      try:
        input_lines = fix_func(input_file, input_lines, locales)
        output = ''.join(input_lines)
        with open(input_file, 'wt') as f:
          f.write(output)
        print('Fixed %s.' % input_file)
      except Exception as e:  # pylint: disable=broad-except
        print('Skipped %s: %s' % (input_file, e))

  return True


def _ScanDirectoriesForFiles(scan_dirs, file_predicate):
  """Scan a directory for files that match a given predicate.

  Args:
    scan_dir: A list of top-level directories to start scan in.
    file_predicate: lambda function which is passed the file's base name
      and returns True if its full path, relative to |scan_dir|, should be
      passed in the result.
  Returns:
    A list of file full paths.
  """
  result = []
  for src_dir in scan_dirs:
    for root, _, files in os.walk(src_dir):
      result.extend(os.path.join(root, f) for f in files if file_predicate(f))
  return result


def _WriteFile(file_path, file_data):
  """Write |file_data| to |file_path|."""
  with open(file_path, 'w') as f:
    f.write(file_data)


def _FindGnExecutable():
  """Locate the real GN executable used by this Chromium checkout.

  This is needed because the depot_tools 'gn' wrapper script will look
  for .gclient and other things we really don't need here.

  Returns:
    Path of real host GN executable from current Chromium src/ checkout.
  """
  # Simply scan buildtools/*/gn and return the first one found so we don't
  # have to guess the platform-specific sub-directory name (e.g. 'linux64'
  # for 64-bit Linux machines).
  buildtools_dir = os.path.join(_TOP_SRC_DIR, 'buildtools')
  for subdir in os.listdir(buildtools_dir):
    subdir_path = os.path.join(buildtools_dir, subdir)
    if not os.path.isdir(subdir_path):
      continue
    gn_path = os.path.join(subdir_path, 'gn')
    if os.path.exists(gn_path):
      return gn_path
  return None


def _PrettyPrintListAsLines(input_list, available_width, trailing_comma=False):
  result = []
  input_str = ', '.join(input_list)
  while len(input_str) > available_width:
    pos = input_str.rfind(',', 0, available_width)
    result.append(input_str[:pos + 1])
    input_str = input_str[pos + 1:].lstrip()
  if trailing_comma and input_str:
    input_str += ','
  result.append(input_str)
  return result


class _PrettyPrintListAsLinesTest(unittest.TestCase):

  def test_empty_list(self):
    self.assertListEqual([''], _PrettyPrintListAsLines([], 10))

  def test_wrapping(self):
    input_list = ['foo', 'bar', 'zoo', 'tool']
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 8),
        ['foo,', 'bar,', 'zoo,', 'tool'])
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 12), ['foo, bar,', 'zoo, tool'])
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 79), ['foo, bar, zoo, tool'])

  def test_trailing_comma(self):
    input_list = ['foo', 'bar', 'zoo', 'tool']
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 8, trailing_comma=True),
        ['foo,', 'bar,', 'zoo,', 'tool,'])
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 12, trailing_comma=True),
        ['foo, bar,', 'zoo, tool,'])
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 79, trailing_comma=True),
        ['foo, bar, zoo, tool,'])


##########################################################################
##########################################################################
#####
#####    L O C A L E S   L I S T S
#####
##########################################################################
##########################################################################

# Various list of locales that will be extracted from build/config/locales.gni
# Do not use these directly, use ChromeLocales(), and IosUnsupportedLocales()
# instead to access these lists.
_INTERNAL_CHROME_LOCALES = []
_INTERNAL_IOS_UNSUPPORTED_LOCALES = []


def ChromeLocales():
  """Return the list of all locales supported by Chrome."""
  if not _INTERNAL_CHROME_LOCALES:
    _ExtractAllChromeLocalesLists()
  return _INTERNAL_CHROME_LOCALES


def IosUnsupportedLocales():
  """Return the list of locales that are unsupported on iOS."""
  if not _INTERNAL_IOS_UNSUPPORTED_LOCALES:
    _ExtractAllChromeLocalesLists()
  return _INTERNAL_IOS_UNSUPPORTED_LOCALES


def _PrepareTinyGnWorkspace(work_dir, out_subdir_name='out'):
  """Populate an empty directory with a tiny set of working GN config files.

  This allows us to run 'gn gen <out> --root <work_dir>' as fast as possible
  to generate files containing the locales list. This takes about 300ms on
  a decent machine, instead of more than 5 seconds when running the equivalent
  commands from a real Chromium workspace, which requires regenerating more
  than 23k targets.

  Args:
    work_dir: target working directory.
    out_subdir_name: Name of output sub-directory.
  Returns:
    Full path of output directory created inside |work_dir|.
  """
  # Create top-level .gn file that must point to the BUILDCONFIG.gn.
  _WriteFile(os.path.join(work_dir, '.gn'),
             'buildconfig = "//BUILDCONFIG.gn"\n')
  # Create BUILDCONFIG.gn which must set a default toolchain. Also add
  # all variables that may be used in locales.gni in a declare_args() block.
  _WriteFile(
      os.path.join(work_dir, 'BUILDCONFIG.gn'),
      r'''set_default_toolchain("toolchain")
declare_args () {
  is_ios = false
  is_android = true
}
''')

  # Create fake toolchain required by BUILDCONFIG.gn.
  os.mkdir(os.path.join(work_dir, 'toolchain'))
  _WriteFile(os.path.join(work_dir, 'toolchain', 'BUILD.gn'),
             r'''toolchain("toolchain") {
  tool("stamp") {
    command = "touch {{output}}"  # Required by action()
  }
}
''')

  # Create top-level BUILD.gn, GN requires at least one target to build so do
  # that with a fake action which will never be invoked. Also write the locales
  # to misc files in the output directory.
  _WriteFile(
      os.path.join(work_dir, 'BUILD.gn'), r'''import("//locales.gni")

action("create_foo") {   # fake action to avoid GN complaints.
  script = "//build/create_foo.py"
  inputs = []
  outputs = [ "$target_out_dir/$target_name" ]
}

# Write the locales lists to files in the output directory.
_filename = root_build_dir + "/foo"
write_file(_filename + ".locales", locales, "json")
write_file(_filename + ".ios_unsupported_locales",
            ios_unsupported_locales,
            "json")
''')

  # Copy build/config/locales.gni to the workspace, as required by BUILD.gn.
  shutil.copyfile(os.path.join(_TOP_SRC_DIR, 'build', 'config', 'locales.gni'),
                  os.path.join(work_dir, 'locales.gni'))

  # Create output directory.
  out_path = os.path.join(work_dir, out_subdir_name)
  os.mkdir(out_path)

  # And ... we're good.
  return out_path


# Set this global variable to the path of a given temporary directory
# before calling _ExtractAllChromeLocalesLists() if you want to debug
# the locales list extraction process.
_DEBUG_LOCALES_WORK_DIR = None


def _ReadJsonList(file_path):
  """Read a JSON file that must contain a list, and return it."""
  with open(file_path) as f:
    data = json.load(f)
    assert isinstance(data, list), "JSON file %s is not a list!" % file_path
  return [item.encode('utf8') for item in data]


def _ExtractAllChromeLocalesLists():
  with build_utils.TempDir() as tmp_path:
    if _DEBUG_LOCALES_WORK_DIR:
      tmp_path = _DEBUG_LOCALES_WORK_DIR
      build_utils.DeleteDirectory(tmp_path)
      build_utils.MakeDirectory(tmp_path)

    out_path = _PrepareTinyGnWorkspace(tmp_path, 'out')

    # NOTE: The file suffixes used here should be kept in sync with
    # build/config/locales.gni
    gn_executable = _FindGnExecutable()
    try:
      subprocess.check_output(
          [gn_executable, 'gen', out_path, '--root=' + tmp_path])
    except subprocess.CalledProcessError as e:
      print(e.output)
      raise e

    global _INTERNAL_CHROME_LOCALES
    _INTERNAL_CHROME_LOCALES = _ReadJsonList(
        os.path.join(out_path, 'foo.locales'))

    global _INTERNAL_IOS_UNSUPPORTED_LOCALES
    _INTERNAL_IOS_UNSUPPORTED_LOCALES = _ReadJsonList(
        os.path.join(out_path, 'foo.ios_unsupported_locales'))


##########################################################################
##########################################################################
#####
#####    G R D   H E L P E R   F U N C T I O N S
#####
##########################################################################
##########################################################################

# Technical note:
#
# Even though .grd files are XML, an xml parser library is not used in order
# to preserve the original file's structure after modification. ElementTree
# tends to re-order attributes in each element when re-writing an XML
# document tree, which is undesirable here.
#
# Thus simple line-based regular expression matching is used instead.
#

# Misc regular expressions used to match elements and their attributes.
_RE_OUTPUT_ELEMENT = re.compile(r'<output (.*)\s*/>')
_RE_TRANSLATION_ELEMENT = re.compile(r'<file( | .* )path="(.*\.xtb)".*/>')
_RE_FILENAME_ATTRIBUTE = re.compile(r'filename="([^"]*)"')
_RE_LANG_ATTRIBUTE = re.compile(r'lang="([^"]*)"')
_RE_PATH_ATTRIBUTE = re.compile(r'path="([^"]*)"')
_RE_TYPE_ANDROID_ATTRIBUTE = re.compile(r'type="android"')



def _IsGritInputFile(input_file):
  """Returns True iff this is a GRIT input file."""
  return input_file.endswith('.grd')


def _GetXmlLangAttribute(xml_line):
  """Extract the lang attribute value from an XML input line."""
  m = _RE_LANG_ATTRIBUTE.search(xml_line)
  if not m:
    return None
  return m.group(1)


class _GetXmlLangAttributeTest(unittest.TestCase):
  TEST_DATA = {
      '': None,
      'foo': None,
      'lang=foo': None,
      'lang="foo"': 'foo',
      '<something lang="foo bar" />': 'foo bar',
      '<file lang="fr-CA" path="path/to/strings_fr-CA.xtb" />': 'fr-CA',
  }

  def test_GetXmlLangAttribute(self):
    for test_line, expected in self.TEST_DATA.items():
      self.assertEquals(_GetXmlLangAttribute(test_line), expected)


def _SortGrdElementsRanges(grd_lines, element_predicate):
  """Sort all .grd elements of a given type by their lang attribute."""
  return _SortElementsRanges(grd_lines, element_predicate, _GetXmlLangAttribute)


def _CheckGrdElementRangeLang(grd_lines, start, end, wanted_locales):
  """Check the element 'lang' attributes in specific .grd lines range.

  This really checks the following:
    - Each item has a correct 'lang' attribute.
    - There are no duplicated lines for the same 'lang' attribute.
    - That there are no extra locales that Chromium doesn't want.
    - That no wanted locale is missing.

  Args:
    grd_lines: Input .grd lines.
    start: Sub-range start position in input line list.
    end: Sub-range limit position in input line list.
    wanted_locales: Set of wanted Chromium locale names.
  Returns:
    List of error message strings for this input. Empty on success.
  """
  errors = []
  locales = set()
  for pos in xrange(start, end):
    line = grd_lines[pos]
    lang = _GetXmlLangAttribute(line)
    if not lang:
      errors.append('%d: Missing "lang" attribute in <output> element' % pos +
                    1)
      continue
    cr_locale = _FixChromiumLangAttribute(lang)
    if cr_locale in locales:
      errors.append(
          '%d: Redefinition of <output> for "%s" locale' % (pos + 1, lang))
    locales.add(cr_locale)

  extra_locales = locales.difference(wanted_locales)
  if extra_locales:
    errors.append('%d-%d: Extra locales found: %s' % (start + 1, end + 1,
                                                      sorted(extra_locales)))

  missing_locales = wanted_locales.difference(locales)
  if missing_locales:
    errors.append('%d-%d: Missing locales: %s' % (start + 1, end + 1,
                                                  sorted(missing_locales)))

  return errors


##########################################################################
##########################################################################
#####
#####    G R D   A N D R O I D   O U T P U T S
#####
##########################################################################
##########################################################################

def _IsGrdAndroidOutputLine(line):
  """Returns True iff this is an Android-specific <output> line."""
  m = _RE_OUTPUT_ELEMENT.search(line)
  if m:
    return 'type="android"' in m.group(1)
  return False

assert _IsGrdAndroidOutputLine('  <output type="android"/>')

# Many of the functions below have unused arguments due to genericity.
# pylint: disable=unused-argument

def _CheckGrdElementRangeAndroidOutputFilename(grd_lines, start, end,
                                               wanted_locales):
  """Check all <output> elements in specific input .grd lines range.

  This really checks the following:
    - Filenames exist for each listed locale.
    - Filenames are well-formed.

  Args:
    grd_lines: Input .grd lines.
    start: Sub-range start position in input line list.
    end: Sub-range limit position in input line list.
    wanted_locales: Set of wanted Chromium locale names.
  Returns:
    List of error message strings for this input. Empty on success.
  """
  errors = []
  for pos in xrange(start, end):
    line = grd_lines[pos]
    lang = _GetXmlLangAttribute(line)
    if not lang:
      continue
    cr_locale = _FixChromiumLangAttribute(lang)

    m = _RE_FILENAME_ATTRIBUTE.search(line)
    if not m:
      errors.append('%d: Missing filename attribute in <output> element' % pos +
                    1)
    else:
      filename = m.group(1)
      if not filename.endswith('.xml'):
        errors.append(
            '%d: Filename should end with ".xml": %s' % (pos + 1, filename))

      dirname = os.path.basename(os.path.dirname(filename))
      prefix = ('values-%s' % resource_utils.ToAndroidLocaleName(cr_locale)
                if cr_locale != _DEFAULT_LOCALE else 'values')
      if dirname != prefix:
        errors.append(
            '%s: Directory name should be %s: %s' % (pos + 1, prefix, filename))

  return errors


def _CheckGrdAndroidOutputElements(grd_file, grd_lines, wanted_locales):
  """Check all <output> elements related to Android.

  Args:
    grd_file: Input .grd file path.
    grd_lines: List of input .grd lines.
    wanted_locales: set of wanted Chromium locale names.
  Returns:
    List of error message strings. Empty on success.
  """
  intervals = _BuildIntervalList(grd_lines, _IsGrdAndroidOutputLine)
  errors = []
  for start, end in intervals:
    errors += _CheckGrdElementRangeLang(grd_lines, start, end, wanted_locales)
    errors += _CheckGrdElementRangeAndroidOutputFilename(grd_lines, start, end,
                                                         wanted_locales)
  return errors


def _AddMissingLocalesInGrdAndroidOutputs(grd_file, grd_lines, wanted_locales):
  """Fix an input .grd line by adding missing Android outputs.

  Args:
    grd_file: Input .grd file path.
    grd_lines: Input .grd line list.
    wanted_locales: set of Chromium locale names.
  Returns:
    A new list of .grd lines, containing new <output> elements when needed
    for locales from |wanted_locales| that were not part of the input.
  """
  intervals = _BuildIntervalList(grd_lines, _IsGrdAndroidOutputLine)
  for start, end in reversed(intervals):
    locales = set()
    for pos in xrange(start, end):
      lang = _GetXmlLangAttribute(grd_lines[pos])
      locale = _FixChromiumLangAttribute(lang)
      locales.add(locale)

    missing_locales = wanted_locales.difference(locales)
    if not missing_locales:
      continue

    src_locale = 'bg'
    src_lang_attribute = 'lang="%s"' % src_locale
    src_line = None
    for pos in xrange(start, end):
      if src_lang_attribute in grd_lines[pos]:
        src_line = grd_lines[pos]
        break

    if not src_line:
      raise Exception(
          'Cannot find <output> element with "%s" lang attribute' % src_locale)

    line_count = end - 1
    for locale in missing_locales:
      android_locale = resource_utils.ToAndroidLocaleName(locale)
      dst_line = src_line.replace(
          'lang="%s"' % src_locale, 'lang="%s"' % locale).replace(
              'values-%s/' % src_locale, 'values-%s/' % android_locale)
      grd_lines.insert(line_count, dst_line)
      line_count += 1

  # Sort the new <output> elements.
  return _SortGrdElementsRanges(grd_lines, _IsGrdAndroidOutputLine)


##########################################################################
##########################################################################
#####
#####    G R D   T R A N S L A T I O N S
#####
##########################################################################
##########################################################################


def _IsTranslationGrdOutputLine(line):
  """Returns True iff this is an output .xtb <file> element."""
  m = _RE_TRANSLATION_ELEMENT.search(line)
  return m is not None


class _IsTranslationGrdOutputLineTest(unittest.TestCase):

  def test_GrdTranslationOutputLines(self):
    _VALID_INPUT_LINES = [
        '<file path="foo/bar.xtb" />',
        '<file path="foo/bar.xtb"/>',
        '<file lang="fr-CA" path="translations/aw_strings_fr-CA.xtb"/>',
        '<file lang="fr-CA" path="translations/aw_strings_fr-CA.xtb" />',
        '  <file path="translations/aw_strings_ar.xtb" lang="ar" />',
    ]
    _INVALID_INPUT_LINES = ['<file path="foo/bar.xml" />']

    for line in _VALID_INPUT_LINES:
      self.assertTrue(
          _IsTranslationGrdOutputLine(line),
          '_IsTranslationGrdOutputLine() returned False for [%s]' % line)

    for line in _INVALID_INPUT_LINES:
      self.assertFalse(
          _IsTranslationGrdOutputLine(line),
          '_IsTranslationGrdOutputLine() returned True for [%s]' % line)


def _CheckGrdTranslationElementRange(grd_lines, start, end,
                                     wanted_locales):
  """Check all <translations> sub-elements in specific input .grd lines range.

  This really checks the following:
    - Each item has a 'path' attribute.
    - Each such path value ends up with '.xtb'.

  Args:
    grd_lines: Input .grd lines.
    start: Sub-range start position in input line list.
    end: Sub-range limit position in input line list.
    wanted_locales: Set of wanted Chromium locale names.
  Returns:
    List of error message strings for this input. Empty on success.
  """
  errors = []
  for pos in xrange(start, end):
    line = grd_lines[pos]
    lang = _GetXmlLangAttribute(line)
    if not lang:
      continue
    m = _RE_PATH_ATTRIBUTE.search(line)
    if not m:
      errors.append('%d: Missing path attribute in <file> element' % pos +
                    1)
    else:
      filename = m.group(1)
      if not filename.endswith('.xtb'):
        errors.append(
            '%d: Path should end with ".xtb": %s' % (pos + 1, filename))

  return errors


def _CheckGrdTranslations(grd_file, grd_lines, wanted_locales):
  """Check all <file> elements that correspond to an .xtb output file.

  Args:
    grd_file: Input .grd file path.
    grd_lines: List of input .grd lines.
    wanted_locales: set of wanted Chromium locale names.
  Returns:
    List of error message strings. Empty on success.
  """
  wanted_locales = wanted_locales - set([_DEFAULT_LOCALE])
  intervals = _BuildIntervalList(grd_lines, _IsTranslationGrdOutputLine)
  errors = []
  for start, end in intervals:
    errors += _CheckGrdElementRangeLang(grd_lines, start, end, wanted_locales)
    errors += _CheckGrdTranslationElementRange(grd_lines, start, end,
                                              wanted_locales)
  return errors


# Regular expression used to replace the lang attribute inside .xtb files.
_RE_TRANSLATIONBUNDLE = re.compile('<translationbundle lang="(.*)">')


def _CreateFakeXtbFileFrom(src_xtb_path, dst_xtb_path, dst_locale):
  """Create a fake .xtb file.

  Args:
    src_xtb_path: Path to source .xtb file to copy from.
    dst_xtb_path: Path to destination .xtb file to write to.
    dst_locale: Destination locale, the lang attribute in the source file
      will be substituted with this value before its lines are written
      to the destination file.
  """
  with open(src_xtb_path) as f:
    src_xtb_lines = f.readlines()

  def replace_xtb_lang_attribute(line):
    m = _RE_TRANSLATIONBUNDLE.search(line)
    if not m:
      return line
    return line[:m.start(1)] + dst_locale + line[m.end(1):]

  dst_xtb_lines = [replace_xtb_lang_attribute(line) for line in src_xtb_lines]
  with build_utils.AtomicOutput(dst_xtb_path) as tmp:
    tmp.writelines(dst_xtb_lines)


def _AddMissingLocalesInGrdTranslations(grd_file, grd_lines, wanted_locales):
  """Fix an input .grd line by adding missing Android outputs.

  This also creates fake .xtb files from the one provided for 'en-GB'.

  Args:
    grd_file: Input .grd file path.
    grd_lines: Input .grd line list.
    wanted_locales: set of Chromium locale names.
  Returns:
    A new list of .grd lines, containing new <output> elements when needed
    for locales from |wanted_locales| that were not part of the input.
  """
  wanted_locales = wanted_locales - set([_DEFAULT_LOCALE])
  intervals = _BuildIntervalList(grd_lines, _IsTranslationGrdOutputLine)
  for start, end in reversed(intervals):
    locales = set()
    for pos in xrange(start, end):
      lang = _GetXmlLangAttribute(grd_lines[pos])
      locale = _FixChromiumLangAttribute(lang)
      locales.add(locale)

    missing_locales = wanted_locales.difference(locales)
    if not missing_locales:
      continue

    src_locale = 'en-GB'
    src_lang_attribute = 'lang="%s"' % src_locale
    src_line = None
    for pos in xrange(start, end):
      if src_lang_attribute in grd_lines[pos]:
        src_line = grd_lines[pos]
        break

    if not src_line:
      raise Exception(
          'Cannot find <file> element with "%s" lang attribute' % src_locale)

    src_path = os.path.join(
        os.path.dirname(grd_file),
        _RE_PATH_ATTRIBUTE.search(src_line).group(1))

    line_count = end - 1
    for locale in missing_locales:
      dst_line = src_line.replace(
          'lang="%s"' % src_locale, 'lang="%s"' % locale).replace(
              '_%s.xtb' % src_locale, '_%s.xtb' % locale)
      grd_lines.insert(line_count, dst_line)
      line_count += 1

      dst_path = src_path.replace('_%s.xtb' % src_locale, '_%s.xtb' % locale)
      _CreateFakeXtbFileFrom(src_path, dst_path, locale)


  # Sort the new <output> elements.
  return _SortGrdElementsRanges(grd_lines, _IsTranslationGrdOutputLine)


##########################################################################
##########################################################################
#####
#####    G N   A N D R O I D   O U T P U T S
#####
##########################################################################
##########################################################################

_RE_GN_VALUES_LIST_LINE = re.compile(
    r'^\s*".*values(\-([A-Za-z0-9-]+))?/.*\.xml",\s*$')

def _IsBuildGnInputFile(input_file):
  """Returns True iff this is a BUILD.gn file."""
  return os.path.basename(input_file) == 'BUILD.gn'


def _GetAndroidGnOutputLocale(line):
  """Check a GN list, and return its Android locale if it is an output .xml"""
  m = _RE_GN_VALUES_LIST_LINE.match(line)
  if not m:
    return None

  if m.group(1):  # First group is optional and contains group 2.
    return m.group(2)

  return resource_utils.ToAndroidLocaleName(_DEFAULT_LOCALE)


def _IsAndroidGnOutputLine(line):
  """Returns True iff this is an Android-specific localized .xml output."""
  return _GetAndroidGnOutputLocale(line) != None


def _CheckGnOutputsRangeForLocalizedStrings(gn_lines, start, end):
  """Check that a range of GN lines corresponds to localized strings.

  Special case: Some BUILD.gn files list several non-localized .xml files
  that should be ignored by this function, e.g. in
  components/cronet/android/BUILD.gn, the following appears:

    inputs = [
      ...
      "sample/res/layout/activity_main.xml",
      "sample/res/layout/dialog_url.xml",
      "sample/res/values/dimens.xml",
      "sample/res/values/strings.xml",
      ...
    ]

  These are non-localized strings, and should be ignored. This function is
  used to detect them quickly.
  """
  for pos in xrange(start, end):
    if not 'values/' in gn_lines[pos]:
      return True
  return False


def _CheckGnOutputsRange(gn_lines, start, end, wanted_locales):
  if not _CheckGnOutputsRangeForLocalizedStrings(gn_lines, start, end):
    return []

  errors = []
  locales = set()
  for pos in xrange(start, end):
    line = gn_lines[pos]
    android_locale = _GetAndroidGnOutputLocale(line)
    assert android_locale != None
    cr_locale = resource_utils.ToChromiumLocaleName(android_locale)
    if cr_locale in locales:
      errors.append('%s: Redefinition of output for "%s" locale' %
                    (pos + 1, android_locale))
    locales.add(cr_locale)

  extra_locales = locales.difference(wanted_locales)
  if extra_locales:
    errors.append('%d-%d: Extra locales: %s' % (start + 1, end + 1,
                                                sorted(extra_locales)))

  missing_locales = wanted_locales.difference(locales)
  if missing_locales:
    errors.append('%d-%d: Missing locales: %s' % (start + 1, end + 1,
                                                  sorted(missing_locales)))

  return errors


def _CheckGnAndroidOutputs(gn_file, gn_lines, wanted_locales):
  intervals = _BuildIntervalList(gn_lines, _IsAndroidGnOutputLine)
  errors = []
  for start, end in intervals:
    errors += _CheckGnOutputsRange(gn_lines, start, end, wanted_locales)
  return errors


def _AddMissingLocalesInGnAndroidOutputs(gn_file, gn_lines, wanted_locales):
  intervals = _BuildIntervalList(gn_lines, _IsAndroidGnOutputLine)
  # NOTE: Since this may insert new lines to each interval, process the
  # list in reverse order to maintain valid (start,end) positions during
  # the iteration.
  for start, end in reversed(intervals):
    if not _CheckGnOutputsRangeForLocalizedStrings(gn_lines, start, end):
      continue

    locales = set()
    for pos in xrange(start, end):
      lang = _GetAndroidGnOutputLocale(gn_lines[pos])
      locale = resource_utils.ToChromiumLocaleName(lang)
      locales.add(locale)

    missing_locales = wanted_locales.difference(locales)
    if not missing_locales:
      continue

    src_locale = 'bg'
    src_values = 'values-%s/' % resource_utils.ToAndroidLocaleName(src_locale)
    src_line = None
    for pos in xrange(start, end):
      if src_values in gn_lines[pos]:
        src_line = gn_lines[pos]
        break

    if not src_line:
      raise Exception(
          'Cannot find output list item with "%s" locale' % src_locale)

    line_count = end - 1
    for locale in missing_locales:
      if locale == _DEFAULT_LOCALE:
        dst_line = src_line.replace('values-%s/' % src_locale, 'values/')
      else:
        dst_line = src_line.replace(
            'values-%s/' % src_locale,
            'values-%s/' % resource_utils.ToAndroidLocaleName(locale))
      gn_lines.insert(line_count, dst_line)
      line_count += 1

    gn_lines = _SortListSubRange(
        gn_lines, start, line_count,
        lambda line: _RE_GN_VALUES_LIST_LINE.match(line).group(1))

  return gn_lines


##########################################################################
##########################################################################
#####
#####    T R A N S L A T I O N   E X P E C T A T I O N S
#####
##########################################################################
##########################################################################

_EXPECTATIONS_FILENAME = 'translation_expectations.pyl'

# Technical note: the format of translation_expectations.pyl
# is a 'Python literal', which defines a python dictionary, so should
# be easy to parse. However, when modifying it, care should be taken
# to respect the line comments and the order of keys within the text
# file.


def _ReadPythonLiteralFile(pyl_path):
  """Read a .pyl file into a Python data structure."""
  with open(pyl_path) as f:
    pyl_content = f.read()
  # Evaluate as a Python data structure, use an empty global
  # and local dictionary.
  return eval(pyl_content, dict(), dict())


def _UpdateLocalesInExpectationLines(pyl_lines,
                                     wanted_locales,
                                     available_width=79):
  """Update the locales list(s) found in an expectations file.

  Args:
    pyl_lines: Iterable of input lines from the file.
    wanted_locales: Set or list of new locale names.
    available_width: Optional, number of character colums used
      to word-wrap the new list items.
  Returns:
    New list of updated lines.
  """
  locales_list = ['"%s"' % loc for loc in sorted(wanted_locales)]
  result = []
  line_count = len(pyl_lines)
  line_num = 0
  DICT_START = '"languages": ['
  while line_num < line_count:
    line = pyl_lines[line_num]
    line_num += 1
    result.append(line)
    # Look for start of "languages" dictionary.
    pos = line.find(DICT_START)
    if pos < 0:
      continue

    start_margin = pos
    start_line = line_num
    # Skip over all lines from the list.
    while (line_num < line_count and
           not pyl_lines[line_num].rstrip().endswith('],')):
      line_num += 1
      continue

    if line_num == line_count:
      raise Exception('%d: Missing list termination!' % start_line)

    # Format the new list according to the new margin.
    locale_width = available_width - (start_margin + 2)
    locale_lines = _PrettyPrintListAsLines(
        locales_list, locale_width, trailing_comma=True)
    for locale_line in locale_lines:
      result.append(' ' * (start_margin + 2) + locale_line)
    result.append(' ' * start_margin + '],')
    line_num += 1

  return result


class _UpdateLocalesInExpectationLinesTest(unittest.TestCase):

  def test_simple(self):
    self.maxDiff = 1000
    input_text = r'''
# This comment should be preserved
# 23456789012345678901234567890123456789
{
  "android_grd": {
    "languages": [
      "aa", "bb", "cc", "dd", "ee",
      "ff", "gg", "hh", "ii", "jj",
      "kk"],
  },
  # Example with bad indentation in input.
  "another_grd": {
         "languages": [
  "aa", "bb", "cc", "dd", "ee", "ff", "gg", "hh", "ii", "jj", "kk",
      ],
  },
}
'''
    expected_text = r'''
# This comment should be preserved
# 23456789012345678901234567890123456789
{
  "android_grd": {
    "languages": [
      "A2", "AA", "BB", "CC", "DD",
      "E2", "EE", "FF", "GG", "HH",
      "I2", "II", "JJ", "KK",
    ],
  },
  # Example with bad indentation in input.
  "another_grd": {
         "languages": [
           "A2", "AA", "BB", "CC", "DD",
           "E2", "EE", "FF", "GG", "HH",
           "I2", "II", "JJ", "KK",
         ],
  },
}
'''
    input_lines = input_text.splitlines()
    test_locales = ([
        'AA', 'BB', 'CC', 'DD', 'EE', 'FF', 'GG', 'HH', 'II', 'JJ', 'KK', 'A2',
        'E2', 'I2'
    ])
    expected_lines = expected_text.splitlines()
    self.assertListEqual(
        _UpdateLocalesInExpectationLines(input_lines, test_locales, 40),
        expected_lines)

  def test_missing_list_termination(self):
    input_lines = r'''
  "languages": ['
    "aa", "bb", "cc", "dd"
'''.splitlines()
    with self.assertRaises(Exception) as cm:
      _UpdateLocalesInExpectationLines(input_lines, ['a', 'b'], 40)

    self.assertEqual(str(cm.exception), '2: Missing list termination!')


def _UpdateLocalesInExpectationFile(pyl_path, wanted_locales):
  """Update all locales listed in a given expectations file.

  Args:
    pyl_path: Path to .pyl file to update.
    wanted_locales: List of locales that need to be written to
      the file.
  """
  tc_locales = {
      _FixTranslationConsoleLocaleName(locale)
      for locale in set(wanted_locales) - set([_DEFAULT_LOCALE])
  }

  with open(pyl_path) as f:
    input_lines = [l.rstrip() for l in f.readlines()]

  updated_lines = _UpdateLocalesInExpectationLines(input_lines, tc_locales)
  with build_utils.AtomicOutput(pyl_path) as f:
    f.writelines('\n'.join(updated_lines) + '\n')


##########################################################################
##########################################################################
#####
#####    C H E C K   E V E R Y T H I N G
#####
##########################################################################
##########################################################################

# pylint: enable=unused-argument


def _IsAllInputFile(input_file):
  return _IsGritInputFile(input_file) or _IsBuildGnInputFile(input_file)


def _CheckAllFiles(input_file, input_lines, wanted_locales):
  errors = []
  if _IsGritInputFile(input_file):
    errors += _CheckGrdTranslations(input_file, input_lines, wanted_locales)
    errors += _CheckGrdAndroidOutputElements(
        input_file, input_lines, wanted_locales)
  elif _IsBuildGnInputFile(input_file):
    errors += _CheckGnAndroidOutputs(input_file, input_lines, wanted_locales)
  return errors


def _AddMissingLocalesInAllFiles(input_file, input_lines, wanted_locales):
  if _IsGritInputFile(input_file):
    lines = _AddMissingLocalesInGrdTranslations(
        input_file, input_lines, wanted_locales)
    lines = _AddMissingLocalesInGrdAndroidOutputs(
        input_file, lines, wanted_locales)
  elif _IsBuildGnInputFile(input_file):
    lines = _AddMissingLocalesInGnAndroidOutputs(
        input_file, input_lines, wanted_locales)
  return lines


##########################################################################
##########################################################################
#####
#####    C O M M A N D   H A N D L I N G
#####
##########################################################################
##########################################################################

class _Command(object):
  """A base class for all commands recognized by this script.

  Usage is the following:
    1) Derived classes must re-define the following class-based fields:
       - name: Command name (e.g. 'list-locales')
       - description: Command short description.
       - long_description: Optional. Command long description.
         NOTE: As a convenience, if the first character is a newline,
         it will be omitted in the help output.

    2) Derived classes for commands that take arguments should override
       RegisterExtraArgs(), which receives a corresponding argparse
       sub-parser as argument.

    3) Derived classes should implement a Run() command, which can read
       the current arguments from self.args.
  """
  name = None
  description = None
  long_description = None

  def __init__(self):
    self._parser = None
    self.args = None

  def RegisterExtraArgs(self, subparser):
    pass

  def RegisterArgs(self, parser):
    subp = parser.add_parser(
        self.name, help=self.description,
        description=self.long_description or self.description,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    self._parser = subp
    subp.set_defaults(command=self)
    group = subp.add_argument_group('%s arguments' % self.name)
    self.RegisterExtraArgs(group)

  def ProcessArgs(self, args):
    self.args = args


class _ListLocalesCommand(_Command):
  """Implement the 'list-locales' command to list locale lists of interest."""
  name = 'list-locales'
  description = 'List supported Chrome locales'
  long_description = r'''
List locales of interest, by default this prints all locales supported by
Chrome, but `--type=ios_unsupported` can be used for the list of locales
unsupported on iOS.

These values are extracted directly from build/config/locales.gni.

Additionally, use the --as-json argument to print the list as a JSON list,
instead of the default format (which is a space-separated list of locale names).
'''

  # Maps type argument to a function returning the corresponding locales list.
  TYPE_MAP = {
      'all': ChromeLocales,
      'ios_unsupported': IosUnsupportedLocales,
  }

  def RegisterExtraArgs(self, group):
    group.add_argument(
        '--as-json',
        action='store_true',
        help='Output as JSON list.')
    group.add_argument(
        '--type',
        choices=tuple(self.TYPE_MAP.viewkeys()),
        default='all',
        help='Select type of locale list to print.')

  def Run(self):
    locale_list = self.TYPE_MAP[self.args.type]()
    if self.args.as_json:
      print('[%s]' % ", ".join("'%s'" % loc for loc in locale_list))
    else:
      print(' '.join(locale_list))


class _CheckInputFileBaseCommand(_Command):
  """Used as a base for other _Command subclasses that check input files.

  Subclasses should also define the following class-level variables:

  - select_file_func:
      A predicate that receives a file name (not path) and return True if it
      should be selected for inspection. Used when scanning directories with
      '--scan-dir <dir>'.

  - check_func:
  - fix_func:
      Two functions passed as parameters to _ProcessFile(), see relevant
      documentation in this function's definition.
  """
  select_file_func = None
  check_func = None
  fix_func = None

  def RegisterExtraArgs(self, group):
    group.add_argument(
      '--scan-dir',
      action='append',
      help='Optional directory to scan for input files recursively.')
    group.add_argument(
      'input',
      nargs='*',
      help='Input file(s) to check.')
    group.add_argument(
      '--fix-inplace',
      action='store_true',
      help='Try to fix the files in-place too.')
    group.add_argument(
      '--add-locales',
      help='Space-separated list of additional locales to use')

  def Run(self):
    args = self.args
    input_files = []
    if args.input:
      input_files = args.input
    if args.scan_dir:
      input_files.extend(_ScanDirectoriesForFiles(
          args.scan_dir, self.select_file_func.__func__))
    locales = ChromeLocales()
    if args.add_locales:
      locales.extend(args.add_locales.split(' '))

    locales = set(locales)

    for input_file in input_files:
      _ProcessFile(input_file,
                   locales,
                   self.check_func.__func__,
                   self.fix_func.__func__ if args.fix_inplace else None)
    print('%sDone.' % (_CONSOLE_START_LINE))


class _CheckGrdAndroidOutputsCommand(_CheckInputFileBaseCommand):
  name = 'check-grd-android-outputs'
  description = (
      'Check the Android resource (.xml) files outputs in GRIT input files.')
  long_description = r'''
Check the Android .xml files outputs in one or more input GRIT (.grd) files
for the following conditions:

    - Each item has a correct 'lang' attribute.
    - There are no duplicated lines for the same 'lang' attribute.
    - That there are no extra locales that Chromium doesn't want.
    - That no wanted locale is missing.
    - Filenames exist for each listed locale.
    - Filenames are well-formed.
'''
  select_file_func = _IsGritInputFile
  check_func = _CheckGrdAndroidOutputElements
  fix_func = _AddMissingLocalesInGrdAndroidOutputs


class _CheckGrdTranslationsCommand(_CheckInputFileBaseCommand):
  name = 'check-grd-translations'
  description = (
      'Check the translation (.xtb) files outputted by .grd input files.')
  long_description = r'''
Check the translation (.xtb) file outputs in one or more input GRIT (.grd) files
for the following conditions:

    - Each item has a correct 'lang' attribute.
    - There are no duplicated lines for the same 'lang' attribute.
    - That there are no extra locales that Chromium doesn't want.
    - That no wanted locale is missing.
    - Each item has a 'path' attribute.
    - Each such path value ends up with '.xtb'.
'''
  select_file_func = _IsGritInputFile
  check_func = _CheckGrdTranslations
  fix_func = _AddMissingLocalesInGrdTranslations


class _CheckGnAndroidOutputsCommand(_CheckInputFileBaseCommand):
  name = 'check-gn-android-outputs'
  description = 'Check the Android .xml file lists in GN build files.'
  long_description = r'''
Check one or more BUILD.gn file, looking for lists of Android resource .xml
files, and checking that:

  - There are no duplicated output files in the list.
  - Each output file belongs to a wanted Chromium locale.
  - There are no output files for unwanted Chromium locales.
'''
  select_file_func = _IsBuildGnInputFile
  check_func = _CheckGnAndroidOutputs
  fix_func = _AddMissingLocalesInGnAndroidOutputs


class _CheckAllCommand(_CheckInputFileBaseCommand):
  name = 'check-all'
  description = 'Check everything.'
  long_description = 'Equivalent to calling all other check-xxx commands.'
  select_file_func = _IsAllInputFile
  check_func = _CheckAllFiles
  fix_func = _AddMissingLocalesInAllFiles


class _UpdateExpectationsCommand(_Command):
  name = 'update-expectations'
  description = 'Update translation expectations file.'
  long_description = r'''
Update %s files to match the current list of locales supported by Chromium.
This is especially useful to add new locales before updating any GRIT or GN
input file with the --add-locales option.
''' % _EXPECTATIONS_FILENAME

  def RegisterExtraArgs(self, group):
    group.add_argument(
        '--add-locales',
        help='Space-separated list of additional locales to use.')

  def Run(self):
    locales = ChromeLocales()
    add_locales = self.args.add_locales
    if add_locales:
      locales.extend(add_locales.split(' '))

    expectation_paths = [
        'tools/gritsettings/translation_expectations.pyl',
        'clank/tools/translation_expectations.pyl',
    ]
    missing_expectation_files = []
    for path in enumerate(expectation_paths):
      file_path = os.path.join(_TOP_SRC_DIR, path)
      if not os.path.exists(file_path):
        missing_expectation_files.append(file_path)
        continue
      _UpdateLocalesInExpectationFile(file_path, locales)

    if missing_expectation_files:
      sys.stderr.write('WARNING: Missing file(s): %s\n' %
                       (', '.join(missing_expectation_files)))


class _UnitTestsCommand(_Command):
  name = 'unit-tests'
  description = 'Run internal unit-tests for this script'

  def RegisterExtraArgs(self, group):
    group.add_argument(
        '-v', '--verbose', action='count', help='Increase test verbosity.')
    group.add_argument('args', nargs=argparse.REMAINDER)

  def Run(self):
    argv = [_SCRIPT_NAME] + self.args.args
    unittest.main(argv=argv, verbosity=self.args.verbose)


# List of all commands supported by this script.
_COMMANDS = [
    _ListLocalesCommand,
    _CheckGrdAndroidOutputsCommand,
    _CheckGrdTranslationsCommand,
    _CheckGnAndroidOutputsCommand,
    _CheckAllCommand,
    _UpdateExpectationsCommand,
    _UnitTestsCommand,
]


def main(argv):
  parser = argparse.ArgumentParser(
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)

  subparsers = parser.add_subparsers()
  commands = [clazz() for clazz in _COMMANDS]
  for command in commands:
    command.RegisterArgs(subparsers)

  if not argv:
    argv = ['--help']

  args = parser.parse_args(argv)
  args.command.ProcessArgs(args)
  args.command.Run()


if __name__ == "__main__":
  main(sys.argv[1:])