# Copyright 2012 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
'''Collections of messages and their translations, called cliques. Also
collections of cliques (uber-cliques).
'''
import re
from grit import constants
from grit import exception
from grit import lazy_re
from grit import pseudo
from grit import pseudolocales
from grit import tclib
class UberClique:
'''A factory (NOT a singleton factory) for making cliques. It has several
methods for working with the cliques created using the factory.
'''
def __init__(self):
# A map from message ID to list of cliques whose source messages have
# that ID. This will contain all cliques created using this factory.
# Different messages can have the same ID because they have the
# same translateable portion and placeholder names, but occur in different
# places in the resource tree.
#
# Each list of cliques is kept sorted by description, to achieve
# stable results from the BestClique method, see below.
self.cliques_ = {}
# A map of clique IDs to list of languages to indicate translations where we
# fell back to English.
self.fallback_translations_ = {}
# A map of clique IDs to list of languages to indicate missing translations.
self.missing_translations_ = {}
def _AddMissingTranslation(self, lang, clique, is_error):
tl = self.fallback_translations_
if is_error:
tl = self.missing_translations_
id = clique.GetId()
if id not in tl:
tl[id] = {}
if lang not in tl[id]:
tl[id][lang] = 1
def HasMissingTranslations(self):
return len(self.missing_translations_) > 0
def MissingTranslationsReport(self):
'''Returns a string suitable for printing to report missing
and fallback translations to the user.
'''
def ReportTranslation(clique, langs):
text = clique.GetMessage().GetPresentableContent()
# The text 'error' (usually 'Error:' but we are conservative)
# can trigger some build environments (Visual Studio, we're
# looking at you) to consider invocation of grit to have failed,
# so we make sure never to output that word.
extract = re.sub(r'(?i)error', 'REDACTED', text[0:40])[0:40]
ellipsis = ''
if len(text) > 40:
ellipsis = '...'
langs_extract = langs[0:6]
describe_langs = ','.join(langs_extract)
if len(langs) > 6:
describe_langs += " and %d more" % (len(langs) - 6)
return " %s \"%s%s\" %s" % (clique.GetId(), extract, ellipsis,
describe_langs)
lines = []
if len(self.fallback_translations_):
lines.append(
"WARNING: Fell back to English for the following translations:")
for (id, langs) in self.fallback_translations_.items():
lines.append(
ReportTranslation(self.cliques_[id][0], list(langs.keys())))
if len(self.missing_translations_):
lines.append("ERROR: The following translations are MISSING:")
for (id, langs) in self.missing_translations_.items():
lines.append(
ReportTranslation(self.cliques_[id][0], list(langs.keys())))
return '\n'.join(lines)
def MakeClique(self, message, translateable=True):
'''Create a new clique initialized with a message.
Args:
message: tclib.Message()
translateable: True | False
'''
clique = MessageClique(self, message, translateable)
# Enable others to find this clique by its message ID
if message.GetId() in self.cliques_:
presentable_text = clique.GetMessage().GetPresentableContent()
if not message.HasAssignedId():
for c in self.cliques_[message.GetId()]:
assert c.GetMessage().GetPresentableContent() == presentable_text
self.cliques_[message.GetId()].append(clique)
# We need to keep each list of cliques sorted by description, to
# achieve stable results from the BestClique method, see below.
self.cliques_[message.GetId()].sort(
key=lambda c:c.GetMessage().GetDescription())
else:
self.cliques_[message.GetId()] = [clique]
return clique
def FindCliqueAndAddTranslation(self, translation, language):
'''Adds the specified translation to the clique with the source message
it is a translation of.
Args:
translation: tclib.Translation()
language: 'en' | 'fr' ...
Return:
True if the source message was found, otherwise false.
'''
if translation.GetId() in self.cliques_:
for clique in self.cliques_[translation.GetId()]:
clique.AddTranslation(translation, language)
return True
else:
return False
def BestClique(self, id):
'''Returns the "best" clique from a list of cliques. All the cliques
must have the same ID. The "best" clique is chosen in the following
order of preference:
- The first clique that has a non-ID-based description.
- If no such clique found, the first clique with an ID-based description.
- Otherwise the first clique.
This method is stable in terms of always returning a clique with
an identical description (on different runs of GRIT on the same
data) because self.cliques_ is sorted by description.
'''
clique_list = self.cliques_[id]
clique_with_id = None
clique_default = None
for clique in clique_list:
if not clique_default:
clique_default = clique
description = clique.GetMessage().GetDescription()
if description and len(description) > 0:
if not description.startswith('ID:'):
# this is the preferred case so we exit right away
return clique
elif not clique_with_id:
clique_with_id = clique
if clique_with_id:
return clique_with_id
else:
return clique_default
def BestCliquePerId(self):
'''Iterates over the list of all cliques and returns the best clique for
each ID. This will be the first clique with a source message that has a
non-empty description, or an arbitrary clique if none of them has a
description.
'''
for id in self.cliques_:
yield self.BestClique(id)
def BestCliqueByOriginalText(self, text, meaning):
'''Finds the "best" (as in BestClique()) clique that has original text
'text' and meaning 'meaning'. Returns None if there is no such clique.
'''
# If needed, this can be optimized by maintaining a map of
# fingerprints of original text+meaning to cliques.
for c in self.BestCliquePerId():
msg = c.GetMessage()
if msg.GetRealContent() == text and msg.GetMeaning() == meaning:
return msg
return None
def AllMessageIds(self):
'''Returns a list of all defined message IDs.
'''
return list(self.cliques_.keys())
def AllCliques(self):
'''Iterates over all cliques. Note that this can return multiple cliques
with the same ID.
'''
for cliques in self.cliques_.values():
yield from cliques
def GenerateXtbParserCallback(self, lang, debug=False):
'''Creates a callback function as required by grit.xtb_reader.Parse().
This callback will create Translation objects for each message from
the XTB that exists in this uberclique, and add them as translations for
the relevant cliques. The callback will add translations to the language
specified by 'lang'
Args:
lang: 'fr'
debug: True | False
'''
def Callback(id, structure):
if id not in self.cliques_:
if debug:
print("Ignoring translation #%s" % id)
return
if debug:
print("Adding translation #%s" % id)
# We fetch placeholder information from the original message (the XTB file
# only contains placeholder names).
original_msg = self.BestClique(id).GetMessage()
translation = tclib.Translation(id=id)
for is_ph,text in structure:
if not is_ph:
translation.AppendText(text)
else:
found_placeholder = False
for ph in original_msg.GetPlaceholders():
if ph.GetPresentation() == text:
translation.AppendPlaceholder(tclib.Placeholder(
ph.GetPresentation(), ph.GetOriginal(), ph.GetExample()))
found_placeholder = True
break
if not found_placeholder:
raise exception.MismatchingPlaceholders(
'Translation for message ID %s had <ph name="%s"/>, no match\n'
'in original message' % (id, text))
self.FindCliqueAndAddTranslation(translation, lang)
return Callback
class CustomType:
'''A base class you should implement if you wish to specify a custom type
for a message clique (i.e. custom validation and optional modification of
translations).'''
def Validate(self, message):
'''Returns true if the message (a tclib.Message object) is valid,
otherwise false.
'''
raise NotImplementedError()
def ValidateAndModify(self, lang, translation):
'''Returns true if the translation (a tclib.Translation object) is valid,
otherwise false. The language is also passed in. This method may modify
the translation that is passed in, if it so wishes.
'''
raise NotImplementedError()
def ModifyTextPart(self, lang, text):
'''If you call ModifyEachTextPart, it will turn around and call this method
for each text part of the translation. You should return the modified
version of the text, or just the original text to not change anything.
'''
raise NotImplementedError()
def ModifyEachTextPart(self, lang, translation):
'''Call this to easily modify one or more of the textual parts of a
translation. It will call ModifyTextPart for each part of the
translation.
'''
contents = translation.GetContent()
for ix in range(len(contents)):
if (isinstance(contents[ix], str)):
contents[ix] = self.ModifyTextPart(lang, contents[ix])
class OneOffCustomType(CustomType):
'''A very simple custom type that performs the validation expressed by
the input expression on all languages including the source language.
The expression can access the variables 'lang', 'msg' and 'text()' where
'lang' is the language of 'msg', 'msg' is the message or translation being
validated and 'text()' returns the real contents of 'msg' (for shorthand).
'''
def __init__(self, expression):
self.expr = expression
def Validate(self, message):
return self.ValidateAndModify(MessageClique.source_language, message)
def ValidateAndModify(self, lang, msg):
def text():
return msg.GetRealContent()
return eval(self.expr, {},
{'lang' : lang,
'text' : text,
'msg' : msg,
})
class MessageClique:
'''A message along with all of its translations. Also code to bring
translations together with their original message.'''
# change this to the language code of Messages you add to cliques_.
# TODO(joi) Actually change this based on the <grit> node's source language
source_language = 'en'
# A constant translation we use when asked for a translation into the
# special language constants.CONSTANT_LANGUAGE.
CONSTANT_TRANSLATION = tclib.Translation(text='TTTTTT')
# A pattern to match messages that are empty or whitespace only.
WHITESPACE_MESSAGE = lazy_re.compile(r'^\s*$')
def __init__(self, uber_clique, message, translateable=True,
custom_type=None):
'''Create a new clique initialized with just a message.
Note that messages with a body comprised only of whitespace will implicitly
be marked non-translatable.
Args:
uber_clique: Our uber-clique (collection of cliques)
message: tclib.Message()
translateable: True | False
custom_type: instance of clique.CustomType interface
'''
# Our parent
self.uber_clique = uber_clique
# If not translateable, we only store the original message.
self.translateable = translateable
# We implicitly mark messages that have a whitespace-only body as
# non-translateable.
if MessageClique.WHITESPACE_MESSAGE.match(message.GetRealContent()):
self.translateable = False
# A mapping of language identifiers to tclib.BaseMessage and its
# subclasses (i.e. tclib.Message and tclib.Translation).
self.clique = { MessageClique.source_language : message }
# A list of the "shortcut groups" this clique is
# part of. Within any given shortcut group, no shortcut key (e.g. &J)
# must appear more than once in each language for all cliques that
# belong to the group.
self.shortcut_groups = []
# An instance of the CustomType interface, or None. If this is set, it will
# be used to validate the original message and translations thereof, and
# will also get a chance to modify translations of the message.
self.SetCustomType(custom_type)
def GetMessage(self):
'''Retrieves the tclib.Message that is the source for this clique.'''
return self.clique[MessageClique.source_language]
def GetId(self):
'''Retrieves the message ID of the messages in this clique.'''
return self.GetMessage().GetId()
def IsTranslateable(self):
return self.translateable
def AddToShortcutGroup(self, group):
self.shortcut_groups.append(group)
def SetCustomType(self, custom_type):
'''Makes this clique use custom_type for validating messages and
translations, and optionally modifying translations.
'''
self.custom_type = custom_type
if custom_type and not custom_type.Validate(self.GetMessage()):
raise exception.InvalidMessage(self.GetMessage().GetRealContent())
def MessageForLanguage(self, lang, pseudo_if_no_match=True,
fallback_to_english=False):
'''Returns the message/translation for the specified language, providing
a pseudotranslation if there is no available translation and a pseudo-
translation is requested.
The translation of any message whatsoever in the special language
'x_constant' is the message "TTTTTT".
Args:
lang: 'en'
pseudo_if_no_match: True
fallback_to_english: False
Return:
tclib.BaseMessage
'''
if not self.translateable:
return self.GetMessage()
if lang == constants.CONSTANT_LANGUAGE:
return self.CONSTANT_TRANSLATION
for msglang in self.clique:
if lang == msglang:
return self.clique[msglang]
if pseudo_if_no_match:
if lang == constants.PSEUDOLOCALE_LONG_STRINGS:
return pseudolocales.PseudoLongStringMessage(self.GetMessage())
elif lang == constants.PSEUDOLOCALE_RTL:
return pseudolocales.PseudoRTLMessage(self.GetMessage())
if fallback_to_english:
self.uber_clique._AddMissingTranslation(lang, self, is_error=False)
return self.GetMessage()
# If we're not supposed to generate pseudotranslations, we add an error
# report to a list of errors, then fail at a higher level, so that we
# get a list of all messages that are missing translations.
if not pseudo_if_no_match:
self.uber_clique._AddMissingTranslation(lang, self, is_error=True)
return pseudo.PseudoMessage(self.GetMessage())
def AllMessagesThatMatch(self, lang_re, include_pseudo = True):
'''Returns a map of all messages that match 'lang', including the pseudo
translation if requested.
Args:
lang_re: re.compile(r'fr|en')
include_pseudo: True
Return:
{ 'en' : tclib.Message,
'fr' : tclib.Translation,
pseudo.PSEUDO_LANG : tclib.Translation }
'''
if not self.translateable:
return [self.GetMessage()]
matches = {}
for msglang in self.clique:
if lang_re.match(msglang):
matches[msglang] = self.clique[msglang]
if include_pseudo:
matches[pseudo.PSEUDO_LANG] = pseudo.PseudoMessage(self.GetMessage())
return matches
def AddTranslation(self, translation, language):
'''Add a translation to this clique. The translation must have the same
ID as the message that is the source for this clique.
If this clique is not translateable, the function just returns.
Args:
translation: tclib.Translation()
language: 'en'
Throws:
grit.exception.InvalidTranslation if the translation you're trying to add
doesn't have the same message ID as the source message of this clique.
'''
if not self.translateable:
return
if translation.GetId() != self.GetId():
raise exception.InvalidTranslation(
'Msg ID %s, transl ID %s' % (self.GetId(), translation.GetId()))
assert not language in self.clique
# Because two messages can differ in the original content of their
# placeholders yet share the same ID (because they are otherwise the
# same), the translation we are getting may have different original
# content for placeholders than our message, yet it is still the right
# translation for our message (because it is for the same ID). We must
# therefore fetch the original content of placeholders from our original
# English message.
#
# See grit.clique_unittest.MessageCliqueUnittest.testSemiIdenticalCliques
# for a concrete explanation of why this is necessary.
original = self.MessageForLanguage(self.source_language, False)
if len(original.GetPlaceholders()) != len(translation.GetPlaceholders()):
print("ERROR: '%s' translation of message id %s does not match" %
(language, translation.GetId()))
assert False
transl_msg = tclib.Translation(id=self.GetId(),
text=translation.GetPresentableContent(),
placeholders=original.GetPlaceholders())
if (self.custom_type and
not self.custom_type.ValidateAndModify(language, transl_msg)):
print("WARNING: %s translation failed validation: %s" %
(language, transl_msg.GetId()))
self.clique[language] = transl_msg