# -*- coding: utf-8 -*- # i18n.py --- Utility functions and classes for FlightGear's # internationalization # Copyright (C) 2017 Florent Rougon # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # ***************************************************************************** # Terminology: # # category: this corresponds to a “resource” in FlightGear's C++ code # (e.g., flightgear/src/Main/locale.cxx). # Examples: menu, options, sys, tips. # # master string: # a translatable string before it is translated. In FlightGear, # this is in English---I believe U.S. English (en_US) to be # accurate. In Qt Linguist's source code (C++), this is called # “source text” (cf. TranslatorMessage::sourceText() in # qt5.git/qttools/src/linguist/shared/translatormessage.h). # # master translation: # also called the “default translation”. It is made of the English # strings (in $FG_ROOT/Translations/default) that are to be # translated into other languages. # # tid: variable name I use for an instance of a subclass of # AbstractTranslationUnitId # # ***************************************************************************** import abc import collections import enum import functools import os import pprint import re import sys try: import xml.etree.ElementTree as et except ImportError: import elementtree.ElementTree as et from textwrap import indent, dedent from . import misc from .logging import DummyLogger from .exceptions import FGPyException dummyLogger = DummyLogger() # Not including "atc", because it has no translation. Please keep this sorted. CATEGORIES = ("menu", "options", "sys", "tips") # Directory name for the default (master) translation DEFAULT_LANG_DIR = "default" # Root of the base name for the default output files (XLIFF...) L10N_FILENAME_BASE = "FlightGear-nonQt" # Every subclass of AbstractFormatHandler should register itself here # using registerFormatHandler(). This allows automatic selection of the # proper format handler based on user input (e.g., a command-line option # such as --format=xliff). FORMAT_HANDLERS_MAP = {} FORMAT_HANDLERS_NAMES = [] # The plural forms for each language should be listed in the same order # as in Qt Linguist (either look in the Linguist GUI or in # qttools/src/linguist/shared/numerus.cpp). PLURAL_FORMS = { None: [""], # for the default (= master) translation "de": ["singular", "plural"], "en": ["singular", "plural"], "es": ["singular", "plural"], "fr": ["singular", "plural"], "it": ["singular", "plural"], "nl": ["singular", "plural"], "pl": ["singular", "paucal", "plural"], "pt": ["singular", "plural"], "ru": ["singular", "plural"], "zh": ["universal"] # universal form } # Regexps for parsing language codes FGLocale_cre = re.compile( "(?P[a-zA-Z]+)(_(?P[a-zA-Z0-9]+))?") # This is a simplified version compared to what the RFC allows RFC4646Locale_cre = re.compile( "(?P[a-zA-Z]+)(-(?P[a-zA-Z0-9]+))?") def pluralFormsForLanguage(langCode): try: pluralForms = PLURAL_FORMS[langCode] except KeyError: mo = FGLocale_cre.match(langCode) assert mo is not None try: pluralForms = PLURAL_FORMS[mo.group("language")] except KeyError: raise MissingLocaleMetadata( "PLURAL_FORMS data is missing for locale {!r}".format(langCode)) return pluralForms # Trivial, but this is what we'll need most of the times here. def nbPluralFormsForLanguage(langCode): return len(pluralFormsForLanguage(langCode)) def registerFormatHandler(fmtName, fmtHandlerClass): global FORMAT_HANDLERS_NAMES FORMAT_HANDLERS_MAP[fmtName] = fmtHandlerClass FORMAT_HANDLERS_NAMES = sorted(FORMAT_HANDLERS_MAP.keys()) # ***************************************************************************** # * Custom exceptions * # ***************************************************************************** class error(FGPyException): """Base class for exceptions raised in this module.""" ExceptionShortDescription = "Generic exception" class BadAPIUse(error): """Exception raised when this module's API is used incorrectly.""" ExceptionShortDescription = "Bad API use" class TranslationFileParseError(error): """Exception raised when parsing a translation file fails.""" ExceptionShortDescription = "Error parsing a translation file" class XliffParseError(TranslationFileParseError): """Exception raised when parsing an XLIFF file fails.""" ExceptionShortDescription = "Error parsing an XLIFF file" class XliffLogicalWriteError(error): """ Exception raised when writing an XLIFF file fails for some logical reason.""" ExceptionShortDescription = "Error writing an XLIFF file" class MissingLocaleMetadata(error): """ Exception raised when locale-specific metadata is needed but unavailable.""" ExceptionShortDescription = "Missing locale metadata" # ***************************************************************************** # * TranslationUnit & friends * # ***************************************************************************** # Abstract base class class AbstractTranslationUnitId(metaclass=abc.ABCMeta): """Abstract base class for the ID of a TranslationUnit (“tid“). This key is used to access a given TranslationUnit from a Translation instance. If 't' is a Translation instance and 'cat' a category, then t[cat] is a mapping whose keys are instances of a subclass of AbstractTranslationUnitId, and values are TranslationUnit instances: t[cat][tid] is a TranslationUnit instance for appropriate tid objects. Each subclass must define (as instance or class member) a 'cat' attribute that must contain a non-empty string. """ @abc.abstractmethod def id(self): raise NotImplementedError @abc.abstractmethod def __str__(self): raise NotImplementedError @abc.abstractmethod def __eq__(self, other): raise NotImplementedError @abc.abstractmethod def __lt__(self, other): raise NotImplementedError @abc.abstractmethod def __hash__(self): raise NotImplementedError @functools.total_ordering class BasicTranslationUnitId(AbstractTranslationUnitId): # Helper regexp for parsing the result of str() applied to an instance of # this class. regexp = re.compile(r"""^(?P [^/:]+) / (?P [^/:]+) : (?P \d+)$""", re.VERBOSE) # Same as above with one more field (group), 'pluralIndex'. It is used for # in XLIFF files generated by Qt Linguist. In # this library, we pack all plural forms belonging together into *one* # TranslationUnit instance, which only has one associated id that we call # “tid”, not containing any pluralIndex. So, what this regexp parses is a # “tid” followed by an optional plural form index inside brackets. xliffRegexp = re.compile(r"""^(?P [^/:]+) / (?P [^/:]+) : (?P \d+) (\[ (?P\d+) \])?$""", re.VERBOSE) def __init__(self, cat, basicId, index): self.cat = cat # category ("menu", "tips", options"...) self.basicId = basicId # string (an XML tag name) self.index = index # integer (a PropertyList node index) def id(self): return "{}:{}".format(self.basicId, self.index) def __str__(self): return "{}/{}".format(self.cat, self.id()) def __repr__(self): return "{}.{}({!r}, {!r}, {!r})".format( __name__, type(self).__name__, self.cat, self.basicId, self.index) def _key(self): return (self.cat, (self.basicId, self.index)) # The other comparisons are deduced from these by the # functools.total_ordering decorator. def __lt__(self, other): if type(self) is type(other): return self._key() < other._key() else: return NotImplemented def __eq__(self, other): return type(self) is type(other) and self._key() == other._key() def __hash__(self): return hash((type(self), self._key())) class ContextDevComment: """Class representing a context developer comment. Such a comment is crafted from the XLIFF output of Qt Linguist, it may have subcomments. """ def __init__(self, mainComment, *, translatorComments=None, developerComments=None): """Initialize a ContextDevComment instance.""" self.mainComment = mainComment self.translatorComments = ( list(translatorComments) if translatorComments is not None else []) self.developerComments = ( list(developerComments) if developerComments is not None else []) def customRepr(self, className): """Represent an instance of this class. Multiline representation with indentation before all args but the first. The 'className' parameter simply lets the caller decide whether he wants a qualified or unqualified name, because the qualified one is likely to shift the second and subsequent lines a lot to the right. """ joint = ",\n" + (" "*(len(className) + 1)) args = [repr(self.mainComment), "translatorComments={!r}".format(self.translatorComments), "developerComments={!r}".format(self.developerComments)] return "{}({})".format(className, joint.join(args)) def __repr__(self): # Qualified class name return self.customRepr("{}.{}".format(__name__, type(self).__name__)) def __str__(self): # Just the class name: much shorter than what we use in __repr__() return self.customRepr(type(self).__name__) def copy(self): """Return a new TranslationUnit instance that is a copy of 'self'.""" return type(self)(self.mainComment, translatorComments=self.translatorComments, developerComments=self.developerComments) def strings(self): return dedent("""\ mainComment = {self.mainComment!r} translatorComments = {self.translatorComments!r} developerComments = {self.developerComments!r}""").format(self=self) @functools.total_ordering class TranslationUnit: """Class containing a source string and its translations for a given locale. Roughly corresponds to XLIFF's element or Qt Linguist's TranslatorMessage class. """ def __init__(self, targetLanguage, sourceText, targetTexts, *, approved=False, translate=True, translatorComments=None, developerComments=None, isPlural=False): """Initialize a TranslationUnit instance. The default values for 'approved' and 'translate' correspond to the defaults in the XLIFF 1.2 specification when the identically-named attributes aren't specified. In Qt Linguist's TranslatorMessage class, the combination of 'approved' and 'translate' corresponds to an enum value: enum Type { Unfinished, Finished, Vanished, Obsolete }: \ | \ translate | True False approved \ | ---------------------+------------------------------------ True | Finished Vanished | False | Unfinished Obsolete 'targetLanguage' is the target language code (e.g., 'de' or 'fr_BE'). It is used to determine the number of plural forms, and thus the number of elements 'targetTexts' must evaluate to (see below). 'targetLanguage' should be None for all TranslationUnit instances of the default translation. 'targetTexts' must be an iterable of strings with at least one element. If it has several, they denote plural forms. """ self.sourceText = sourceText self.targetLanguage = targetLanguage for attr in ("approved", "translate", "isPlural"): setattr(self, attr, bool(locals()[attr])) self.setTargetTexts(targetTexts) # *after* setting isPlural # Note: Linguist 5.7.1 only keeps the last comment of each type when # reading an XLIFF file containing several consecutive elements. self.translatorComments = ( list(translatorComments) if translatorComments is not None else []) self.developerComments = ( list(developerComments) if developerComments is not None else []) def setTargetTexts(self, targetTexts): if isinstance(targetTexts, str): # prevent an easy error raise TypeError( "'targetTexts' should not be a string: {!r}" .format(targetTexts)) l = list(targetTexts) # enforce the type and copy nbPluralForms = nbPluralFormsForLanguage(self.targetLanguage) if self.isPlural and len(l) != nbPluralForms: raise BadAPIUse( "trying to set the targetTexts list for a plural " "TranslationUnit, however len(targetTexts) doesn't match the " "number of plural forms for the target language:\n" " targetTexts = {targetTexts!r}\n" " nb plural forms = {nbPluralForms}".format( targetTexts=l, nbPluralForms=nbPluralForms)) elif not self.isPlural and len(l) != 1: raise BadAPIUse( "a non-plural TranslationUnit instance must have " "len(targetTexts) == 1, however we have targetTexts = {!r}" .format(l)) # This check is most likely redundant with the previous ones, but # doesn't hurt. if not l: raise BadAPIUse("the 'targetTexts' iterable should not be empty") self.targetTexts = l def customRepr(self, className): """Represent an instance of this class. Multiline representation with indentation before all args but the first. The 'className' parameter simply lets the caller decide whether he wants a qualified or unqualified name, because the qualified one is likely to shift the second and subsequent lines a lot to the right. """ joint = ",\n" + (" "*(len(className) + 1)) args = [repr(self.targetLanguage), repr(self.sourceText), repr(self.targetTexts), "approved={!r}".format(self.approved), "translate={!r}".format(self.translate), "translatorComments={!r}".format(self.translatorComments), "developerComments={!r}".format(self.developerComments), "isPlural={!r}".format(self.isPlural)] return "{}({})".format(className, joint.join(args)) def __repr__(self): # Qualified class name return self.customRepr("{}.{}".format(__name__, type(self).__name__)) def __str__(self): # Just the class name: much shorter than what we use in __repr__() return self.customRepr(type(self).__name__) def copy(self): """Return a new TranslationUnit instance that is a copy of 'self'.""" return type(self)(self.targetLanguage, self.sourceText, self.targetTexts, approved=self.approved, translate=self.translate, translatorComments=self.translatorComments, developerComments=self.developerComments, isPlural=self.isPlural) def _key(self): """Key used to compare two TranslationUnit instances.""" return (self.targetLanguage, self.sourceText, self.targetTexts, self.isPlural, self.developerComments, self.translatorComments, self.approved, self.translate) # The other comparisons are deduced from these by the # functools.total_ordering decorator. def __lt__(self, other): if type(self) is type(other): return self._key() < other._key() else: return NotImplemented def __eq__(self, other): return type(self) is type(other) and self._key() == other._key() def __hash__(self): return hash(self._key()) def _stringsKey(self): """Key used to compare the strings of two TranslationUnit instances.""" return (self.self.sourceText, self.targetTexts, self.developerComments, self.translatorComments) def sameStrings(self, other): return self._stringsKey() == other._stringsKey() def strings(self): # Note that this omits the 'translate' and 'approved' attributes (which # are not strings). return dedent("""\ sourceText = {self.sourceText!r} targetTexts = {self.targetTexts!r} translatorComments = {self.translatorComments!r} developerComments = {self.developerComments!r}""").format(self=self) def mayNeedReview(self, other): return ((self.sourceText, self.isPlural, self.developerComments) != (other.sourceText, other.isPlural, other.developerComments)) def fixSizeOfTargetTexts(self): if self.isPlural: nbPluralForms = nbPluralFormsForLanguage(self.targetLanguage) else: nbPluralForms = 1 if len(self.targetTexts) > nbPluralForms: # Too long -> trim self.targetTexts del self.targetTexts[nbPluralForms:] elif len(self.targetTexts) < nbPluralForms: # Too short -> add empty translations self.targetTexts.extend( [""] * (nbPluralForms - len(self.targetTexts))) def mergeMasterTranslationUnit(self, masterTu, *, approved=False): """Merge a master translation unit into self. self.targetLanguage and self.translatorComments are not touched; self.targetTexts is only trimmed or extended as needed if isPlural is changed; self.approved is set according to the corresponding argument, other attributes are copied. """ self.sourceText = masterTu.sourceText self.developerComments = list(masterTu.developerComments) self.approved = approved self.translate = masterTu.translate self.isPlural = masterTu.isPlural self.fixSizeOfTargetTexts() # needed because of the change to 'isPlural' class Translation: def __init__(self, sourceLanguage, targetLanguage): """Initialize a Translation instance. 'sourceLanguage' and' targetLanguage' must be of the form ll or ll_TT (e.g., en, en_GB, fr, fr_FR, fr_CA...), except for the default translation (see below). The default translation (master) is characterized by the fact that its 'targetLanguage' attribute is None. For each TranslationUnit instance it contains (cf. the 'translations' attribute and __iter__()), the 'sourceText' is an en_US string and the 'targetTexts' is a list containing one element: the empty string. """ for attr in ("sourceLanguage", "targetLanguage"): setattr(self, attr, locals()[attr]) # Allows straightforward iteration over sorted categories self.translations = collections.OrderedDict() # Qt Linguist uses empty-source-text comments as “context comments”, # which are developer comments about a context. Each of these is # written as a in XLIFF. Two such comments compare equal # in Linguist as soon as they are in the same # x-trolltech-linguist-context. cf. bool # operator==(TranslatorMessageContentPtr tmp1, # TranslatorMessageContentPtr tmp2) in # qt5.git/qttools/src/linguist/shared/translator.cpp. self.contextDevComments = collections.OrderedDict() for cat in CATEGORIES: # Keys: instances of a subclass of AbstractTranslationUnitId # (“tid”) # Values: TranslationUnit instances self.translations[cat] = {} # List of ContextDevComment instances self.contextDevComments[cat] = [] def __str__(self): l = [dedent("""\ Translation: sourceLanguage = {!r} targetLanguage = {!r}""").format(self.sourceLanguage, self.targetLanguage)] for cat, d in self.translations.items(): if self.contextDevComments[cat]: s = "\n\n".join(( indent(c.strings(), " ") for c in self.contextDevComments[cat] )) ctxDevComments = "Context developer comments:\n\n{}".format(s) else: ctxDevComments = "Context developer comments: none" tUnits = ["{}\n{}".format(tid, tu) for tid, tu in sorted(d.items())] translUnits = "Translation units:\n\n{}".format( "\n\n".join(tUnits)) categoryHeading = "Category: {cat!r}".format(cat=cat) l.append("\n\n{categoryHeading}\n{underline}\n\n" "{ctxDevComments}\n\n" "{translUnits}".format( categoryHeading=categoryHeading, underline="-"*len(categoryHeading), ctxDevComments=ctxDevComments, translUnits=translUnits)) return ''.join(l) def __getitem__(self, cat): """Get all translations for a given category. Return a mapping where each key is a tid (instance of a subclass of AbstractTranslationUnitId) and each value a TranslationUnit instance. """ return self.translations[cat] def __setitem__(self, cat, mapping): """Replace all translations for a given category. cat: a category name (string such as 'sys', 'options', etc.) mapping: a mapping where each key is a tid (instance of a subclass of AbstractTranslationUnitId) and each value a TranslationUnit instance. """ self.translations[cat] = dict(mapping) def __iter__(self): return iter(self.translations) def __contains__(self, cat): return (cat in self.translations) def resetCategory(self, cat): self.translations[cat] = {} # tid: an instance of a subclass of AbstractTranslationUnitId. def addMasterString(self, tid, sourceText, isPlural=False): # - target language -> None # - only the master string (source text) # - one empty target text # - carry the plural status self.translations[tid.cat][tid] = TranslationUnit( None, sourceText, [""], isPlural=isPlural) def addTranslation(self, masterTransl, tid, sourceText, targetTexts, *, translatorComments=None, developerComments=None, isPlural=False, logger=dummyLogger): """Add a TranslationUnit to a Translation instance, with some checks. sourceText: string targetTexts: iterable of strings """ category = tid.cat if tid not in masterTransl[category]: # Is it the “best” behavior? logger.warning( "{lang}/{cat}: translated string not in master file: {id!r}" .format(lang=self.targetLanguage, cat=category, id=tid.id())) return t = TranslationUnit(self.targetLanguage, sourceText, targetTexts, isPlural=isPlural, translatorComments=translatorComments, developerComments=developerComments) thisCatTranslations = self.translations[category] if tid in thisCatTranslations: if thisCatTranslations[tid].sameStrings(t): if thisCatTranslations[tid].isPlural != t.isPlural: complement = " one has plural forms, the other not" else: complement = " identical strings" else: complement = "\nold:\n{old}\n\nnew:\n{new}".format( old=indent(thisCatTranslations[tid].strings(), " "), new=indent(t.strings(), " ")) logger.warning("{lang}/{cat}: duplicate translated string: {id!r}:" "{complement}" .format(lang=self.targetLanguage, cat=category, id=tid.id(), complement=complement)) thisCatTranslations[tid] = t def markObsoleteOrVanishedInCategory(self, masterTransl, cat, logger=dummyLogger): thisCatTranslations = self.translations[cat] masterIdsList = frozenset( ( str(tid) for tid in masterTransl[cat].keys() )) for tid, translUnit in thisCatTranslations.items(): if (str(tid) not in masterIdsList and thisCatTranslations[tid].translate): # Obsolete or vanished (depending on whether it is approved) logger.info( "{lang}: translatable string '{id}' not found in the " "default translation -> setting translate='no'" .format(lang=self.targetLanguage, id=tid)) thisCatTranslations[tid].translate = False def markObsoleteOrVanished(self, masterTransl, *, logger=dummyLogger): for cat in self.translations: self.markObsoleteOrVanishedInCategory(masterTransl, cat, logger=logger) def removeObsoleteOrVanishedInCategory(self, cat, *, logger=dummyLogger): thisCatTranslations = self.translations[cat] # Find all tid's from self.translations[cat] whose corresponding # translation unit 'tu' has tu.translate == False. tidsToRemove = [ tid for tid, translUnit in thisCatTranslations.items() if not translUnit.translate ] # Remove the corresponding elements from self.translations[cat] for tid in tidsToRemove: translUnit = thisCatTranslations[tid] qualifier = "vanished" if translUnit.approved else "obsolete" logger.info( "{lang}: removing {qualifier} translated string '{id}'" .format(lang=self.targetLanguage, qualifier=qualifier, id=tid)) del thisCatTranslations[tid] def removeObsoleteOrVanished(self, *, logger=dummyLogger): for cat in self.translations: self.removeObsoleteOrVanishedInCategory(cat, logger=logger) def mergeMasterForCategory(self, masterTransl, cat, logger=dummyLogger): if cat not in masterTransl: raise BadAPIUse("category {!r} not in 'masterTransl'".format(cat)) elif cat not in self: # Category appeared in 'masterTransl' that wasn't in 'self' self.resetCategory(cat) self.contextDevComments[cat] = \ [ comment.copy() for comment in masterTransl.contextDevComments[cat] ] thisCatTranslations = self.translations[cat] idsSet = { str(tid) for tid in thisCatTranslations.keys() } for masterTid, masterTu in masterTransl.translations[cat].items(): if str(masterTid) not in idsSet: logger.info( "{lang}: adding new translatable string '{id}'" .format(lang=self.targetLanguage, id=masterTid)) self.addTranslation( masterTransl, masterTid, masterTu.sourceText, [""], developerComments=masterTu.developerComments, isPlural=masterTu.isPlural, logger=logger) idsSet.add(masterTid) elif thisCatTranslations[masterTid].mayNeedReview(masterTu): thisCatTranslations[masterTid].mergeMasterTranslationUnit( masterTu, approved=False) logger.info( "{lang}: '{id}': source text, developer comments or " "plural/non plural status changed -> needs translator " "review".format(lang=self.targetLanguage, id=masterTid)) # At this point, thisCatTranslations has a translation unit with id # masterTid. At the time of this writing, all translation units in # the default translation have translate=True, but just in case, # let's copy this attribute from the master translation unit if # they are different. current = thisCatTranslations[masterTid].translate new = masterTu.translate if current != new: logger.info( "{lang}: setting translate='{translateVal}' for " "translatable string '{id}'" .format(lang=self.targetLanguage, id=masterTid, translateVal="yes" if new else "no")) thisCatTranslations[masterTid].translate = new self.markObsoleteOrVanishedInCategory(masterTransl, cat, logger=logger) def mergeMasterTranslation(self, masterTransl, logger=dummyLogger): """Update all categories in 'self' based on 'masterTransl'.""" for cat in masterTransl: self.mergeMasterForCategory(masterTransl, cat, logger=logger) # Find all empty categories in 'self' that are not in 'masterTransl' categoriesToRemove = [ cat for cat in self if not self[cat] and cat not in masterTransl ] # Now, remove them from 'self' for cat in categoriesToRemove: logger.info( "{lang}: removing empty category '{cat}' not found in master" .format(lang=self.targetLanguage, cat=cat)) del self[cat] # Helper method for mergeNonMasterTranslForCategory() def _mergeNonMasterTranslForCategory_CheckMatchingParams( self, cat, tid, srcTu, logger): translUnit = self.translations[cat][tid] if srcTu.targetLanguage != translUnit.targetLanguage: logger.warning( "ignoring translatable string '{id}', because the target " "languages don't match between the two translations" .format(id=tid)) return False if srcTu.sourceText != translUnit.sourceText: logger.warning( "ignoring translatable string '{id}', because the source " "texts differ between the two translations" .format(id=tid)) return False if len(srcTu.targetTexts) != len(translUnit.targetTexts): logger.warning( "ignoring translatable string '{id}', because the lists " "of target texts (= number of singular + plural forms) differ " "between the two translations".format(id=tid)) return False if srcTu.isPlural != translUnit.isPlural: logger.warning( "ignoring translatable string '{id}', because the plural " "statuses don't match".format(id=tid)) return False return True def mergeNonMasterTranslForCategory(self, srcTransl, cat, logger=dummyLogger): """Merge a non-master Translation into 'self' for category 'cat'. See mergeNonMasterTransl()'s docstring for more info. """ if cat not in srcTransl: return # nothing to merge in this category elif cat not in self: raise BadAPIUse( "cowardly refusing to create category {!r} in the destination " "translation for an XLIFF-to-XLIFF merge operation " "(new categories should be first added to the master " "translation, then merged into each XLIFF translation file)" .format(cat)) if srcTransl.targetLanguage != self.targetLanguage: raise BadAPIUse( "cowardly refusing to merge two XLIFF files with different " "target languages") thisCatTranslations = self.translations[cat] idsSet = { str(tid) for tid in thisCatTranslations.keys() } for tid, srcTu in srcTransl.translations[cat].items(): if str(tid) not in idsSet: logger.warning( "translatable string '{id}' not found in the " "destination translation during an XLIFF-to-XLIFF merge " "operation. The string will be ignored, because new " "translatable strings must be brought by the default " "translation.".format(id=tid)) continue # If some parameters don't match (sourceText, isPlural...), the # translation in 'srcTu' is probably outdated, so don't use it. elif not self._mergeNonMasterTranslForCategory_CheckMatchingParams( cat, tid, srcTu, logger): continue else: translUnit = thisCatTranslations[tid] translUnit.targetTexts = srcTu.targetTexts[:] # copy translUnit.approved = srcTu.approved translUnit.translatorComments = srcTu.translatorComments[:] def mergeNonMasterTransl(self, srcTransl, logger=dummyLogger): """Merge the non-master Translation 'srcTransl' into 'self'. Contrary to mergeMasterTranslation(), this method doesn't add new translatable strings to 'self', doesn't mark strings as obsolete or vanished, nor does it add or remove categories in 'self'. It only updates strings in 'self' from 'srcTransl' when they: - already exist in 'self'; - have the same target language, source text, plural status and number of plural forms in 'self' and in 'srcTransl'. Expected use case: suppose that a translator is working on a translation file, and meanwhile the official XLIFF file (for instance) for this translation is updated in the project repository (new translatable strings added, obsolete strings marked or removed, etc.). This method can then be used to merge the translator work into the project file for all strings for which it makes sense (source text unchanged, same plural status, etc.). """ for cat in srcTransl: self.mergeNonMasterTranslForCategory(srcTransl, cat, logger=logger) def nbPluralForms(self): return nbPluralFormsForLanguage(self.targetLanguage) def langCodeForXliff(langCode): """Convert a string from ll_TT format to ll-TT (RFC 4646). It's okay if only the 'll' part is given, with no underscore. """ mo = FGLocale_cre.match(langCode) if not mo: assert False, "Unexpected FG locale: '{}'".format(langCode) lang, territory = mo.group("language", "territory") assert lang, repr(lang) # neither None nor the empty string if territory is None: return lang.lower() else: # Complies with RFC 4646, as specified in the XLIFF 1.2 spec. return "{}-{}".format(lang.lower(), territory.upper()) def langCodeInll_TTformat(langCode): """Convert a string from ll-TT format (RFC 4646) to ll_TT. It's okay if only the 'll' part is given, with no hyphen. """ mo = RFC4646Locale_cre.match(langCode) if not mo: assert False, "Unexpected RFC 4646-style locale: '{}'".format(langCode) lang, territory = mo.group("language", "territory") assert lang, repr(lang) # neither None nor the empty string if territory is None: return lang.lower() else: return "{}_{}".format(lang.lower(), territory.upper()) class XliffVariables(enum.Enum): QtContext, gettextContext, gettextPreviousContext, translate, \ lineNumber, sourceFile = range(6) class NestedScopes: """Simple implementation of nested scopes for XLIFF “variables”.""" def __init__(self): self.scopes = collections.deque() def enterScope(self): self.scopes.append({}) def exitScope(self): self.scopes.pop() def __setitem__(self, variable, value): """Set a variable at the innermost scope.""" self.scopes[-1][variable] = value def __getitem__(self, variable): """Get a variable value. Traverse scopes as needed.""" for scope in reversed(self.scopes): if variable in scope: return scope[variable] raise KeyError(variable) def __iter__(self): return iter(frozenset(( var for scope in self.scopes for var in scope.keys() ))) def __contains__(self, variable): try: self[variable] except KeyError: return False return True def hasAtInnerMostScope(self, variable): """Tell if a variable is set in the innermost scope.""" return variable in self.scopes[-1] def insideScope(method): """Decorator: create a scope upon method entry and leave it upon exit.""" @functools.wraps(method) def wrapper(self, *args, **kwargs): self.scopedVars.enterScope() try: res = method(self, *args, **kwargs) finally: self.scopedVars.exitScope() return res return wrapper # Abstract base class class AbstractFormatHandler(metaclass=abc.ABCMeta): """Abstract base class for format handlers such as XLIFF.""" # Subclasses should generally override this (file extension, with no dot) standardExtension = None @classmethod def defaultFileStem(cls, targetLanguage): """Expected file stem (for FlightGear) for a given language code.""" # Currently: no use of the language code here, because the directories # we put these files in are named after the language code. return L10N_FILENAME_BASE @classmethod def defaultFileBaseName(cls, targetLanguage): """Expected file basename (for FlightGear) for a given language code.""" return "{}.{}".format(cls.defaultFileStem(targetLanguage), cls.standardExtension) @classmethod def defaultFilePath(cls, translationsDir, targetLanguage): """ Expected file path for a given translations directory and language.""" baseName = cls.defaultFileBaseName(targetLanguage) return os.path.join(translationsDir, targetLanguage, baseName) @abc.abstractmethod def writeTranslation(self, transl, filePath): """Write a Translation instance to a file.""" raise NotImplementedError class XliffFormatReader: """Read from XLIFF files.""" xliffNamespaceURI = "urn:oasis:names:tc:xliff:document:1.2" # URI reserved for the 'xml' prefix, cf. # xmlNamespaceURI = "http://www.w3.org/XML/1998/namespace" # Mapping from each prefix to the associated namespace nsMap = {"xliff": xliffNamespaceURI, "xml": xmlNamespaceURI} def __init__(self, file_): self.file = file_ # Used to implement “XLIFF variables” such as the current 'translate' # value: they have scoping properties that generally match elements # nesting in the XML markup, except, e.g., for contexts defined in a # itself inside a , which affect *subsequent* # elements inside the : # # “All , , , and # non-XLIFF elements pertain to the subsequent elements in the tree but # can be overridden within a child element.” # # () self.scopedVars = NestedScopes() # Filling this object is the main purpose of this class self.transl = Translation(None, None) self.insidePluralGroup = False # List of (tid, pluralIndex, transl) tuples where each 'transl' is a # temporary TranslationUnit instance. They will be merged into one when # the relevant ends (several plural forms of the same string). self.pluralGroupContents = [] def _readXliffBool(self, string_): if string_ not in ("yes", "no"): raise XliffParseError( "{file}: not a valid XLIFF boolean: {val!r}" .format(file=self.file, val=string_)) return (string_ == "yes") @classmethod def qualTagName(cls, unqualified): """Return a tag name in the XLIFF namespace (using XPath syntax).""" return "{" + cls.xliffNamespaceURI + "}" + unqualified @classmethod def xmlQualName(cls, unqualified): """Return a qualified tag or attribute name for the 'xml' prefix. This prefix is special and reserved (): The prefix xml is by definition bound to the namespace name http://www.w3.org/XML/1998/namespace. """ return "{" + cls.xmlNamespaceURI + "}" + unqualified def parse(self): tree = et.parse(self.file) rootNode = tree.getroot() if (rootNode.tag != self.qualTagName("xliff") or rootNode.get("version") != "1.2"): raise XliffParseError( "{file}: this parser only supports (parts of) the XLIFF 1.2 " "standard, and the root node doesn't seem to conform to this " "(tag name = {tag!r}, 'version' attribute = {version!r})" .format(file=self.file, tag=rootNode.tag, version=rootNode.get("version"))) self.scopedVars.enterScope() # so that we can define scoped variables try: # Set default value according to the XLIFF specification self.scopedVars[XliffVariables.translate] = True for fileNode in rootNode.iterfind("./xliff:file", self.nsMap): self._handleFileNode(fileNode) finally: self.scopedVars.exitScope() return self.transl @insideScope def _handleFileNode(self, fileNode): if "source-language" in fileNode.attrib: self.transl.sourceLanguage = langCodeInll_TTformat( fileNode.get("source-language")) if "target-language" in fileNode.attrib: self.transl.targetLanguage = langCodeInll_TTformat( fileNode.get("target-language")) headerSeen = False bodySeen = False for node in fileNode: if node.tag == self.qualTagName("header"): if bodySeen: raise XliffParseError( "{file}: 'header' element found after a 'body' element " "inside a 'file' element".format(file=self.file)) elif headerSeen: raise XliffParseError( "{file}: found more than one 'header' element inside a " "'file' element, this doesn't conform to the XLIFF 1.2 " "specification".format(file=self.file)) else: headerSeen = True elif node.tag == self.qualTagName("body"): if bodySeen: raise XliffParseError( "{file}: found more than one 'body' element inside a " "'file' element, this doesn't conform to the XLIFF 1.2 " "specification".format(file=self.file)) else: bodySeen = True self._handleBodyNode(node) @insideScope def _handleBodyNode(self, bodyNode): for node in bodyNode: if node.tag == self.qualTagName("group"): self._handleGroupNode(node) elif node.tag == self.qualTagName("trans-unit"): self._handleTransUnitNode(node) elif node.tag == self.qualTagName("bin-unit"): pass # not implemented else: raise XliffParseError( "{file}: illegal element inside a 'body' element: {tag!r}" .format(file=self.file, tag=node.tag)) def _handlePluralGroup(self, notesDict): """Handle a group containing related plural forms.""" sourceTexts = set() tids = set() pluralIdxMap = {} # to put the plural indices back in order # May only be set in and elements approved = True # May come from an enclosing translate = self.scopedVars[XliffVariables.translate] tmpTargetTexts = [] if len(self.pluralGroupContents) != self.transl.nbPluralForms(): raise XliffParseError( "{file}: found a plural group with {found} 'transl-unit' " "elements, however the expected number of plural forms for " "language {lang!r} is {expected}. Plural group contents: " "{pluralGroup!r}".format( file=self.file, lang=self.transl.targetLanguage, found=len(self.pluralGroupContents), expected=self.transl.nbPluralForms(), pluralGroup=self.pluralGroupContents)) for i, (tid, pluralIndex, transl) in \ enumerate(self.pluralGroupContents): assert isinstance(pluralIndex, int), pluralIndex pluralIdxMap[pluralIndex] = i sourceTexts.add(transl.sourceText) tids.add(tid) approved = approved and transl.approved translate = translate or transl.translate # 'transl' has exactly one target text (temporary, non-plural # TranslationUnit) tmpTargetTexts.append(transl.targetTexts[0]) obtainedIndices = frozenset(pluralIdxMap.keys()) if (frozenset(range(len(self.pluralGroupContents))) != obtainedIndices): raise XliffParseError( '{file}: incorrect set of indices for plural forms ' 'inside a group: ' "{indices!r}".format(file=self.file, indices=sorted(obtainedIndices))) elif len(tids) > 1: raise XliffParseError( "{file}: all plural forms for the same master string " "should have the same tid. 'tid's found: {tids!r}" .format(file=self.file, tids=sorted(tids))) elif len(sourceTexts) > 1: raise XliffParseError( "{file}: all plural forms inside a given " ' group ' "should have the same sourceText. 'sourceText's found: " "{sourceTexts!r}" .format(file=self.file, sourceTexts=sorted(sourceTexts))) elif not tids: pass # empty plural group... else: assert len(sourceTexts) == 1, sourceTexts assert len(tids) == 1, tids tid = tids.pop() # get the only value # Reorder the target texts (= plural forms) in proper order in # case they weren't (which would be surprising...) targetTexts = [ tmpTargetTexts[pluralIdxMap[i]] for i in range(len(self.pluralGroupContents)) ] translUnit = TranslationUnit( self.transl.targetLanguage, sourceTexts.pop(), targetTexts, translatorComments=notesDict["translator"], developerComments=notesDict["developer"], approved=approved, translate=translate, isPlural=True) # Add the TranslationUnit containing all related plural forms self.transl[tid.cat][tid] = translUnit self.pluralGroupContents.clear() @insideScope def _handleGroupNode(self, node): pluralGroup = False if node.get("restype") == "x-trolltech-linguist-context": QtContext = node.get("resname") if QtContext is None: raise XliffParseError( "{file}: 'restype' attribute in a group without any " "corresponding 'resname'".format(file=self.file)) else: self.scopedVars[XliffVariables.QtContext] = QtContext elif node.get("restype") == "x-gettext-plurals": # Qt Linguist's way pluralGroup = self.insidePluralGroup = True translate = node.get("translate") if translate is not None: self.scopedVars[XliffVariables.translate] = \ self._readXliffBool(translate) notesDict = {"developer": [], "translator": []} for subnode in node: if subnode.tag == self.qualTagName("group"): self._handleGroupNode(subnode) elif subnode.tag == self.qualTagName("context-group"): self._handleContextGroupNode(subnode) elif subnode.tag == self.qualTagName("note"): self._handleNoteNode(subnode, notesDict) elif subnode.tag == self.qualTagName("trans-unit"): self._handleTransUnitNode(subnode) if pluralGroup: self.insidePluralGroup = False # for other methods of this class self._handlePluralGroup(notesDict) # Intentionally no @insideScope here! This way, the innermost scope is the # one created by the parent element of the . def _handleContextGroupNode(self, node): for subnode in node: if subnode.tag == self.qualTagName("context"): self._handleContextNode(subnode) else: raise XliffParseError( "{file}: illegal element inside a 'context-group' " "element: {tag!r}".format(file=self.file, tag=subnode.tag)) # Intentionally no @insideScope here! def _handleContextNode(self, node): # ctxName = node.get("context-name") # optional, unused so far here ctxType = node.get("context-type") if ctxType is None: raise XliffParseError( "{file}: invalid 'context' element found with no " "'context-type' attribute".format(file=self.file)) # See # # for other context types if ctxType == "linenumber": self.scopedVars[XliffVariables.lineNumber] = int(node.text) if ctxType == "sourcefile": self.scopedVars[XliffVariables.sourceFile] = node.text or "" elif ctxType == "x-gettext-msgctxt": # Trolltech invention self.scopedVars[XliffVariables.gettextContext] = node.text or "" elif ctxType == "x-gettext-previous-msgctxt": # Trolltech invention self.scopedVars[XliffVariables.gettextPreviousContext] = \ node.text or "" # Intentionally no @insideScope here! def _handleNoteNode(self, node, notesDict): """Add a translator or developer note to 'noteDict'.""" origin = node.get("from") if origin in ("developer", "translator"): notesDict[origin].append(node.text or "") elif origin is not None: # Maybe a bit harsh to raise for this... raise XliffParseError( "{file}: unknown 'origin' value for a 'note' element: " "'{origin}'".format(file=self.file, origin=origin)) # There can also be annotates="source" (output by Qt Linguist for # developer comments in addition to the 'origin' attribute), we don't # use this attribute. @insideScope def _handleTransUnitNode(self, node): tuId = node.get("id") if tuId is None: raise XliffParseError( "{file}: the 'id' attribute is required for 'trans-unit' " "elements".format(file=self.file)) approved = self._readXliffBool(node.get("approved", "no")) # This one is trickier, because it may be set either in an enclosing # group or here. translate = node.get("translate") if translate is not None: # This overrides any value from higher levels in the XLIFF input self.scopedVars[XliffVariables.translate] = \ self._readXliffBool(translate) mo = BasicTranslationUnitId.xliffRegexp.match(tuId) if mo is None: raise XliffParseError( "{file}: this 'id' attribute found on a 'trans-unit' element " "doesn't have the expected format: '{val}'".format( file=self.file, val=tuId)) tid = BasicTranslationUnitId(mo.group("cat"), mo.group("basicId"), int(mo.group("index"))) pluralIndex = mo.group("pluralIndex") if pluralIndex is not None: pluralIndex = int(pluralIndex) sourceText = targetText = None notesDict = {"developer": [], "translator": []} for subnode in node: if subnode.tag == self.qualTagName("source"): if sourceText is not None: raise XliffParseError( "{file}: several 'source' elements inside the same " "'trans-unit' element".format(file=self.file)) sourceText = self._handleSourceOrTargetNode(subnode, node.tag) elif subnode.tag == self.qualTagName("target"): if targetText is not None: raise XliffParseError( "{file}: several 'target' elements inside the same " "'trans-unit' element".format(file=self.file)) targetText = self._handleSourceOrTargetNode(subnode, node.tag) elif subnode.tag == self.qualTagName("note"): self._handleNoteNode(subnode, notesDict) elif subnode.tag == self.qualTagName("context-group"): # This holds context dev comments, for one, and sets # XliffVariables.gettextContext in our scope self._handleContextGroupNode(subnode) if sourceText is None: raise XliffParseError( "{file}: invalid 'trans-unit' element: doesn't contain any " "'source' element".format(file=self.file)) # The 'else' clause handles two cases: no element, or an empty # one. targetTexts = [targetText] if targetText else [""] translUnit = TranslationUnit( self.transl.targetLanguage, sourceText, targetTexts, approved=approved, translate=self.scopedVars[XliffVariables.translate]) if self.insidePluralGroup: if pluralIndex is None: raise XliffParseError( "{file}: invalid plural group: the id attribute value for " "each form must end with the form's plural index inside " "brackets (an integer)".format(file=self.file)) # Related plural forms will be merged into one TranslationUnit when # the containing ends. self.pluralGroupContents.append((tid, pluralIndex, translUnit)) elif tid.cat not in self.transl: raise XliffParseError( "{file}: unknown category: '{cat}'" .format(file=self.file, cat=tid.cat)) # Source text empty + inside an x-gettext-msgctxt -> context dev comment # (this is how Qt Linguist works) elif (not sourceText and XliffVariables.gettextContext in self.scopedVars): comment = ContextDevComment( self.scopedVars[XliffVariables.gettextContext], translatorComments=notesDict["translator"], developerComments=notesDict["developer"]) self.transl.contextDevComments[tid.cat].append(comment) elif tid in self.transl[tid.cat]: raise XliffParseError( "{file}: the same TranslationUnit id (tid) appeared several " "times, this is fishy: '{tid}'".format(file=self.file, tid=tid)) else: translUnit.translatorComments = notesDict["translator"] translUnit.developerComments = notesDict["developer"] # Add a simple TranslationUnit (no plural forms) self.transl[tid.cat][tid] = translUnit def _handleSourceOrTargetNode(self, node, containingTag): xmlLang = node.get(self.xmlQualName("lang")) if node.tag == self.qualTagName("source"): outerLanguage = langCodeForXliff(self.transl.sourceLanguage) else: assert node.tag == self.qualTagName("target"), node.tag outerLanguage = langCodeForXliff(self.transl.targetLanguage) # Error for , but not for if (containingTag == self.qualTagName("trans-unit") and xmlLang is not None and xmlLang != outerLanguage): raise XliffParseError( "{file}: the 'xml:lang' attribute of a '{thisTag}' element " "inside a 'trans-unit' element ({xmlLang}) disagrees with the " "'{thisTag}-language' attribute found on the enclosing 'file' " "element' ({outerLang})".format( file=self.file, thisTag=node.tag, xmlLang=xmlLang, outerLang=outerLanguage)) return node.text or "" # is Qt Linguist's way of # storing the _context_ allowing to distinguish between several # translations that have the same source string. The way described in # the XLIFF standard, using inside , is only # usable in Qt Linguist with context-type="x-gettext-msgctxt" for the # 'context' element. It is also a Trolltech invention, and is stored as # TranslatorMessage::m_comment instead of TranslatorMessage::m_context. # The comparison rules in # bool operator==(TranslatorMessageContentPtr tmp1, # TranslatorMessageContentPtr tmp2) # (qt5.git/qttools/src/linguist/shared/translator.cpp) wouldn't suit our # needs, because two TranslatorMessage instances with the same context() # and an empty sourceText() (= master) are considered duplicates even if # they have different values for the comment(). IOW, Qt Linguist's # notion of TranslatorMessage::comment() can't be used to distinguish # between two empty master strings that might have different # translations in different categories. class XliffFormatWriter: """Write to XLIFF files.""" def _insertComments(self, element, container): """Insert translator and developer comments into 'element'.""" for transComment in container.translatorComments: noteElt = et.SubElement(element, "note", attrib={"from": "translator"}) noteElt.text = transComment for devComment in container.developerComments: # Linguist doesn't seem to show developer comments unless # annotates="source" is given. noteElt = et.SubElement(element, "note", attrib={"from": "developer", "annotates": "source"}) noteElt.text = devComment def _appendSimpleTranslationUnit(self, groupElement, idsUsed, tid, translUnit): """Append a TranslationUnit that has no plural forms.""" # The XLIFF 1.2 standard wouldn't require the leading tid.cat here if # we were using one per category, because the XLIFF id only has # to be unique within each element. However: # # 1) Qt Linguist doesn't support multiple elements per XLIFF # file well (they are collapsed upon export). # # 2) It would consider for instance elements with the # same id 'rendering-options:0' from the 'options' and 'menu' # categories as identical, which is undesirable (e.g., the current # Spanish translation capitalizes them differently). # # Therefore, we prepend the category to make sure all XLIFF # ids are unique within the whole XLIFF file (this is done # by AbstractTranslationUnitId.__str__(), called here with str(tid)). idInXliff = str(tid) if idInXliff in idsUsed: raise XliffLogicalWriteError( "{file}: id '{id}' would be used for several 'trans-unit' " "elements. Either the input or the algorithm is buggy." .format(file=self.file, id=idInXliff)) # If you change things here, don't forget # _appendTranslationUnitWithPlural() attrs = {"id": idInXliff, "translate": "yes" if translUnit.translate else "no", "approved": "yes" if translUnit.approved else "no" } transUnitElt = et.SubElement(groupElement, "trans-unit", attrib=attrs) sourceElt = et.SubElement(transUnitElt, "source") sourceElt.text = translUnit.sourceText # This list should never be empty (i.e., one or more translations) assert translUnit.targetTexts, translUnit.targetTexts targetElt = et.SubElement(transUnitElt, "target") targetElt.text = translUnit.targetTexts[0] self._insertComments(transUnitElt, translUnit) return idInXliff # value used for the 'id' attr of the elt def _appendTranslationUnitWithPlural(self, groupElement, idsUsed, tid, translUnit): subgroupElt = et.SubElement(groupElement, "group", id=str(tid), restype="x-gettext-plurals") self._insertComments(subgroupElt, translUnit) idsInXliff = [] for i, pluralForm in enumerate(translUnit.targetTexts): # This is the way Qt Linguist 5.7.1 handles plural forms idInXliff = "{idStr}[{pluralFormIndex}]".format(idStr=tid, pluralFormIndex=i) # If you change things here, don't forget # _appendSimpleTranslationUnit() attrs = {"id": idInXliff, "translate": "yes" if translUnit.translate else "no", "approved": "yes" if translUnit.approved else "no" } transUnitElt = et.SubElement(subgroupElt, "trans-unit", attrib=attrs) sourceElt = et.SubElement(transUnitElt, "source") sourceElt.text = translUnit.sourceText targetElt = et.SubElement(transUnitElt, "target") targetElt.text = pluralForm idsInXliff.append(idInXliff) idsAlreadyUsed = idsUsed.intersection(idsInXliff) if idsAlreadyUsed: raise XliffLogicalWriteError( "{file}: several ids would be reused for different 'trans-unit' " "elements (problematic ids: {ids}). Either the input or the " "algorithm is buggy." .format(file=self.file, ids=idsAlreadyUsed)) # Values used for the 'id' attributes of elements return frozenset(idsInXliff) def _appendContextDevCommentsTranslUnits(self, groupElement, idsUsed, cat, comments): idsInXliff = [] for i, ctxDevComment in enumerate(comments): idInXliff = "{cat}/_contextDevComment-{num}:0".format(cat=cat, num=i) if idInXliff in idsUsed: raise XliffLogicalWriteError( "{file}: id '{id}' would be used for several 'trans-unit' " "elements. This looks like a bug in the algorithm (or an " "extreme coincidence!)." .format(file=self.file, id=idInXliff)) transUnitElt = et.SubElement(groupElement, "trans-unit", id=idInXliff) sourceElt = et.SubElement(transUnitElt, "source") sourceElt.text = "" targetElt = et.SubElement(transUnitElt, "target") targetElt.text = "" ctxGroupElt = et.SubElement(transUnitElt, "context-group") ctxElt = et.SubElement(ctxGroupElt, "context", attrib={"context-type": "x-gettext-msgctxt"}) ctxElt.text = ctxDevComment.mainComment self._insertComments(transUnitElt, ctxDevComment) idsInXliff.append(idInXliff) return idsInXliff # values used for 'id' attrs of elts def writeTranslation(self, transl, filePath): """Write a translation to an XLIFF file or to the standard output. transl: a Translation instance filePath: path to a file, or '-' to designate the standard output """ xliffAttrs = { "version": "1.2", "xmlns": "urn:oasis:names:tc:xliff:document:1.2", "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", "xsi:schemaLocation": "urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2.xsd" } xliffElt = et.Element("xliff", attrib=xliffAttrs) attrs = { # Since Qt Linguist (at least 5.7.1) will collapse all # elements into one upon export, with this attribute empty, # let's do the same here to minimize the size of diffs. "original": "", "source-language": langCodeForXliff(transl.sourceLanguage), "target-language": langCodeForXliff(transl.targetLanguage), # If we could use according to the XLIFF 1.2 standard, # the correct datatype would be 'xml'. Since we can't, let's # use the same type as Linguist upon export, to minimize diff # size again. "datatype": "plaintext", "xml:space": "preserve" } fileElt = et.SubElement(xliffElt, "file", attrib=attrs) bodyElt = et.SubElement(fileElt, "body") idsUsed = set() # values used for the 'id' attrs of elements for cat, t in transl.translations.items(): # already sorted (OrderedDict) # See the comment above the class definition groupElt = et.SubElement(bodyElt, "group", restype="x-trolltech-linguist-context", resname=cat) contextDevComments = transl.contextDevComments[cat] if contextDevComments: idsUsed.update(self._appendContextDevCommentsTranslUnits( groupElt, idsUsed, cat, contextDevComments)) for tid, translUnit in sorted(t.items()): if translUnit.isPlural: idsUsed.update( self._appendTranslationUnitWithPlural( groupElt, idsUsed, tid, translUnit)) else: idsUsed.add(self._appendSimpleTranslationUnit( groupElt, idsUsed, tid, translUnit)) misc.indentXmlTree(xliffElt) if filePath == "-": enc = "unicode" # ElementTree.write() will output str objects filePathOrObj = sys.stdout else: enc = "UTF-8" filePathOrObj = filePath et.ElementTree(xliffElt).write(filePathOrObj, encoding=enc, xml_declaration=True) class XliffFormatHandler(AbstractFormatHandler): """Read from, and write to XLIFF files.""" standardExtension = "xlf" # used by some base class methods def readTranslation(self, filePath): reader = XliffFormatReader(filePath) return reader.parse() def writeTranslation(self, transl, filePath): """Write a translation to an XLIFF file or to the standard output. transl: a Translation instance filePath: path to a file, or '-' to designate the standard output """ writer = XliffFormatWriter() return writer.writeTranslation(transl, filePath) registerFormatHandler("xliff", XliffFormatHandler) # ***************************************************************************** # * Classes for reading FlightGear's XML localization files * # ***************************************************************************** # Could also be a dict def L10nResMgrForCat(category): """Map from category/resource name to L10NResourceManager class.""" if category in ("menu", "options", "sys", "tips"): return BasicL10NResourceManager else: assert False, "unexpected category: {!r}".format(category) # Convenience class for holding the result returned by some high-level # methods reading FlightGear's XML localization files. # # transl: a Translation instance # nbWhitespacePbs: number of whitespace “problems” encountered in this # translation (leading or trailing whitespace in # strings...). Note that for a non-default Translation, # only the problems in translations (targetTexts) are # counted: those strings contained in the particular # non-default FlightGear XML localization file. TranslationData = collections.namedtuple("TranslationData", ["transl", "nbWhitespacePbs"]) class L10NResourcePoolManager: def __init__(self, translationsDir, logger=dummyLogger): """Initialize a L10NResourcePoolManager instance. translationsDir should contain subdirs such as 'en_GB', 'fr_FR', 'de', 'it'... and the value of DEFAULT_LANG_DIR. """ self.translationsDir = translationsDir self.logger = logger self.masterTranslDir = os.path.join(translationsDir, DEFAULT_LANG_DIR) def readFgMasterTranslationFile(self, xmlFilePath, targetTransl, cat): """Read the FlightGear default translation for a given category. This is an XML PropertyList file, $FG_ROOT/Translations/default/.xml at the time of this writing. Return the number of whitespace (potential) problems found. """ resMgr = L10nResMgrForCat(cat) return resMgr._readFgResourceFile(xmlFilePath, None, targetTransl, cat, None, logger=self.logger) def readFgTranslationFile(self, xmlFilePath, masterTransl, targetTransl, cat, langCode): """Read a FlightGear translation file for a given category. This is an XML PropertyList file, $FG_ROOT/Translations//.xml directory at the time of this writing. Return the number of whitespace (potential) problems found. """ resMgr = L10nResMgrForCat(cat) return resMgr._readFgResourceFile(xmlFilePath, masterTransl, targetTransl, cat, langCode, logger=self.logger) def readFgMasterTranslation(self): """Read the FlightGear default translation. This is built from XML PropertyList files in directory 'masterTranslDir' (normally $FG_ROOT/Translations/default, at the time of this writing). """ transl = Translation("en_US", None) # master translation nbWhitespaceProblems = 0 for cat in CATEGORIES: xmlFilePath = os.path.join(self.masterTranslDir, cat + ".xml") resMgr = L10nResMgrForCat(cat) nbWhitespaceProblems += self.readFgMasterTranslationFile( xmlFilePath, transl, cat) # I don't put the number of whitespace problems in an attribute # of the Translation, otherwise there could be expectations that # it is updated when the Translation is modified... return TranslationData(transl, nbWhitespaceProblems) def readFgTranslation(self, masterTransl, langCode): """Read a FlightGear non-default translation. This is built from XML PropertyList files in directory 'languageDir' (normally $FG_ROOT/Translations/, at the time of this writing). """ languageDir = os.path.join(self.translationsDir, langCode) self.logger.info("processing language dir {!r}".format(languageDir)) # I assume (and believe) the default translation in FlightGear # corresponds to U.S. English. translation = Translation("en_US", langCode) nbWhitespaceProblems = 0 for cat in CATEGORIES: xmlFilePath = os.path.join(languageDir, cat + ".xml") if os.path.isfile(xmlFilePath): nbWhitespaceProblems += self.readFgTranslationFile( xmlFilePath, masterTransl, translation, cat, langCode) # See comment in readFgMasterTranslation() return TranslationData(translation, nbWhitespaceProblems) def writeTranslation(self, formatHandler, transl, filePath=None): """Generic writing of a Translation instance. formatHandler: instance of a subclass of AbstractFormatHandler transl: Translation object """ if filePath is None: filePath = formatHandler.defaultFilePath(self.translationsDir, transl.targetLanguage) if filePath != "-": d = os.path.dirname(filePath) if not os.path.exists(d): self.logger.notice("creating directory '{}'".format(d)) os.makedirs(os.path.dirname(filePath), exist_ok=True) return formatHandler.writeTranslation(transl, filePath) def genSkeletonTranslation(self, langCode): """Generate a skeleton Translation instance for a particular language. The Translation object will have the 'targetTexts' attribute of each TranslationUnit set to denote only one empty translation. This method is useful when adding a translation for a new language. """ # Create a new master translation translation = self.readFgMasterTranslation().transl # This is not a master translation anymore translation.targetLanguage = langCode return translation def writeSkeletonTranslation(self, formatHandler, langCode, filePath=None): transl = self.genSkeletonTranslation(langCode) return self.writeTranslation(formatHandler, transl, filePath) class L10NResourceManagerBase: """Base class for *L10NResourceManager classes.""" @classmethod def checkForLeadingOrTrailingWhitespace(cls, langCode, tid, string_, logger=dummyLogger): whitespacePb = None nbWhitespaceProblems = 0 if string_.lstrip() != string_: whitespacePb = "leading" if string_.rstrip() != string_: if whitespacePb is not None: whitespacePb = "leading and trailing" else: whitespacePb = "trailing" if whitespacePb is not None: nbWhitespaceProblems += 1 if langCode is None: place = "default translation" langDir = DEFAULT_LANG_DIR else: place = "translation" langDir = langCode logger.warning("{langDir}/{cat}: {kind} whitespace in {place} for " "string {id!r}: {string!r}" .format(langDir=langDir, cat=tid.cat, id=tid.id(), place=place, string=string_, kind=whitespacePb)) return nbWhitespaceProblems class BasicL10NResourceManager(L10NResourceManagerBase): """Resource manager for FG XML i18n files with the simplest structure. This is suitable for resources (menu, options, tips, etc.) where translations are in direct children of the element, with no more structure. """ @classmethod def _findMainNode(cls, rootNode): """ Return the node directly containing the translations in an FG XML file. This method was added when sys.xml had all its useful contents inside a 'splash' top-level node, instead of having a flat structure like the other FG XML i18n files (options.xml, menu.xml, etc.). At that time, we thus had a SysL10NResourceManager class derived from this class only to override this method. """ assert rootNode.tag == "PropertyList", rootNode.tag return rootNode @classmethod def _readFgResourceFile(cls, xmlFilePath, masterTransl, targetTransl, cat, langCode, logger=dummyLogger): """Read a FlightGear XML localization file. If 'masterTransl' and 'langCode' are None, read the default (i.e., master) translation, normally en_US. The method updates 'targetTransl', without clearing it first (it should probably be empty when the method is called). This method has to know how data is laid out inside the FlightGear XML localization file to be read ('xmlFilePath'). For this reason, it is typically overridden in subclasses of L10NResourceManagerBase. """ if masterTransl is None: assert langCode is None, langCode nbWhitespaceProblems = 0 tree = et.parse(xmlFilePath) rootNode = tree.getroot() mainNode = cls._findMainNode(rootNode) for childNode in mainNode: n = int(childNode.get("n", default=0)) tid = BasicTranslationUnitId(cat, childNode.tag, n) # childNode.text could be None for an empty translation text = childNode.text or "" nbWhitespaceProblems += cls.checkForLeadingOrTrailingWhitespace( langCode, tid, text, logger) pluralAttr = childNode.get("with-plural", default="false") if pluralAttr in ("true", "false"): isPlural = (pluralAttr == "true") else: logger.warning( "{file}: invalid value for the 'with-plural' attribute of " "{tid} (expected 'true' or 'false'): {val!r}".format( file=xmlFilePath, tid=tid, val=pluralAttr)) continue if masterTransl is None: targetTransl.addMasterString(tid, text, isPlural=isPlural) elif tid not in masterTransl[cat]: logger.warning( "{file}: translated string not in the default " "translation: {tid}".format(file=xmlFilePath, tid=tid)) else: targetTransl.addTranslation( masterTransl, tid, masterTransl[cat][tid].sourceText, [text], isPlural=isPlural, logger=logger) return nbWhitespaceProblems