diff --git a/tools/mod_translation_updater.py b/tools/mod_translation_updater.py new file mode 100644 index 000000000..0134996c9 --- /dev/null +++ b/tools/mod_translation_updater.py @@ -0,0 +1,526 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Script to generate Luanti translation template files and update +# translation files. +# +# Copyright (C) 2019 Joachim Stolberg, 2020 FaceDeer, 2020 Louis Royer, +# 2023 Wuzzy. +# License: LGPLv2.1 or later (see LICENSE file for details) + +import os, fnmatch, re, shutil, errno +from sys import argv as _argv +from sys import stderr as _stderr +from collections import defaultdict + +# Running params +params = {"recursive": False, + "help": False, + "verbose": False, + "folders": [], + "old-file": False, + "break-long-lines": False, + "print-source": False, + "truncate-unused": False, + "dofile-order": False, +} +# Available CLI options +options = {"recursive": ['--recursive', '-r'], + "help": ['--help', '-h'], + "verbose": ['--verbose', '-v'], + "old-file": ['--old-file', '-o'], + "break-long-lines": ['--break-long-lines', '-b'], + "print-source": ['--print-source', '-p'], + "truncate-unused": ['--truncate-unused', '-t'], + "dofile-order": ['--dofile-order', '-d'] +} + +# Strings longer than this will have extra space added between +# them in the translation files to make it easier to distinguish their +# beginnings and endings at a glance +doublespace_threshold = 80 + +# These symbols mark comment lines showing the source file name. +# A comment may look like "##[ init.lua ]##". +symbol_source_prefix = "##[ " +symbol_source_suffix = " ]##" + +# comment to mark the section of old/unused strings +comment_unused = "##### not used anymore #####" + +def set_params_folders(tab: list): + '''Initialize params["folders"] from CLI arguments.''' + # Discarding argument 0 (tool name) + for param in tab[1:]: + stop_param = False + for option in options: + if param in options[option]: + stop_param = True + break + if not stop_param: + params["folders"].append(os.path.abspath(param)) + +def set_params(tab: list): + '''Initialize params from CLI arguments.''' + for option in options: + for option_name in options[option]: + if option_name in tab: + params[option] = True + break + +def print_help(name): + '''Prints some help message.''' + print(f'''SYNOPSIS + {name} [OPTIONS] [PATHS...] +DESCRIPTION + {', '.join(options["help"])} + prints this help message + {', '.join(options["recursive"])} + run on all subfolders of paths given + {', '.join(options["old-file"])} + create *.old files + {', '.join(options["break-long-lines"])} + add extra line breaks before and after long strings + {', '.join(options["print-source"])} + add comments denoting the source file + {', '.join(options["verbose"])} + add output information + {', '.join(options["truncate-unused"])} + delete unused strings from files + {', '.join(options["dofile-order"])} + try to order files by their order from init.lua (not recursive) +''') + +def main(): + '''Main function''' + set_params(_argv) + set_params_folders(_argv) + if params["help"]: + print_help(_argv[0]) + else: + # Add recursivity message + print("Running ", end='') + if params["recursive"]: + print("recursively ", end='') + # Running + if len(params["folders"]) >= 2: + print("on folder list:", params["folders"]) + for f in params["folders"]: + if params["recursive"]: + run_all_subfolders(f) + else: + update_folder(f) + elif len(params["folders"]) == 1: + print("on folder", params["folders"][0]) + if params["recursive"]: + run_all_subfolders(params["folders"][0]) + else: + update_folder(params["folders"][0]) + else: + print("on folder", os.path.abspath("./")) + if params["recursive"]: + run_all_subfolders(os.path.abspath("./")) + else: + update_folder(os.path.abspath("./")) + +# Compile pattern for matching lua function call +def compile_func_call_pattern(argument_pattern): + return re.compile( + # Look for beginning of file or anything that isn't a function identifier + r'(?:^|[\.=,{\(\s])' + + # Matches S, FS, NS, or NFS function call + r'N?F?S\s*' + + # The pattern to match argument + argument_pattern, + re.DOTALL) + +# Add parentheses around a pattern +def parenthesize_pattern(pattern): + return ( + # Start of argument: open parentheses and space (optional) + r'\(\s*' + + # The pattern to be parenthesized + pattern + + # End of argument or function call: space, comma, or close parentheses + r'[\s,\)]') + +# Quoted string +# Group 2 will be the string, group 1 and group 3 will be the delimiters (" or ') +# See https://stackoverflow.com/questions/46967465/regex-match-text-in-either-single-or-double-quote +pattern_lua_quoted_string = r'(["\'])((?:\\\1|(?:(?!\1)).)*)(\1)' + +# Double square bracket string (multiline) +pattern_lua_square_bracket_string = r'\[\[(.*?)\]\]' + +# Handles the " ... " or ' ... ' string delimiters +pattern_lua_quoted = compile_func_call_pattern(parenthesize_pattern(pattern_lua_quoted_string)) + +# Handles the [[ ... ]] string delimiters +pattern_lua_bracketed = compile_func_call_pattern(parenthesize_pattern(pattern_lua_square_bracket_string)) + +# Handles like pattern_lua_quoted, but for single parameter (without parentheses) +# See https://www.lua.org/pil/5.html for informations about single argument call +pattern_lua_quoted_single = compile_func_call_pattern(pattern_lua_quoted_string) + +# Same as pattern_lua_quoted_single, but for [[ ... ]] string delimiters +pattern_lua_bracketed_single = compile_func_call_pattern(pattern_lua_square_bracket_string) + +# Handles "concatenation" .. " of strings" +pattern_concat = re.compile(r'["\'][\s]*\.\.[\s]*["\']', re.DOTALL) + +# Handles a translation line in *.tr file. +# Group 1 is the source string left of the equals sign. +# Group 2 is the translated string, right of the equals sign. +pattern_tr = re.compile( + r'(.*)' # Source string + # the separating equals sign, if NOT preceded by @, unless + # that @ is preceded by another @ + r'(?:(?[^\"\']+\.lua)' # filename + r'\1' # matching closing quote + r'\)\s*(?:--.*)?$' # end of line, optional comment + , re.M # multiline flag: ^ and $ are line ends not string ends +) + +# Attempt to read the mod's name from the mod.conf file or folder name. Returns None on failure +def get_modname(folder): + try: + with open(os.path.join(folder, "mod.conf"), "r", encoding='utf-8') as mod_conf: + for line in mod_conf: + match = pattern_name.match(line) + if match: + return match.group(1) + except FileNotFoundError: + folder_name = os.path.basename(folder) + # Special case when run in Luanti's builtin directory + return "__builtin" if folder_name == "builtin" else folder_name + +# If there are already .tr files in /locale, returns a list of their names +def get_existing_tr_files(folder): + out = [] + for root, dirs, files in os.walk(os.path.join(folder, 'locale/')): + for name in files: + if pattern_tr_filename.search(name): + out.append(name) + return out + +# Converts the template dictionary to a text to be written as a file +# dGroupedKeyStrings is a dictionary of source file sets to localized strings +# dOld is a dictionary of existing translations and comments from +# the previous version of this text +def strings_to_text(dGroupedKeyStrings, dOld, mod_name, header_comments, textdomain, templ = None): + # if textdomain is specified, insert it at the top + if textdomain != None: + lOut = [textdomain] # argument is full textdomain line + # otherwise, use mod name as textdomain automatically + else: + lOut = [f"# textdomain: {mod_name}"] + if templ is not None and templ[2] and (header_comments is None or not header_comments.startswith(templ[2])): + # header comments in the template file + lOut.append(templ[2]) + if header_comments is not None: + lOut.append(header_comments) + + for source, localizedStrings in dGroupedKeyStrings.items(): + if params["print-source"] and len(source) > 0: + lOut.append(symbol_source_prefix + " ".join(x.replace("r\\","/") for x in source) + symbol_source_suffix) + for localizedString in localizedStrings: + val = dOld.get(localizedString, {}) + translation = val.get("translation", "") + comment = val.get("comment") + templ_comment = None + if templ: + templ_val = templ[0].get(localizedString, {}) + templ_comment = templ_val.get("comment") + if params["break-long-lines"] and len(localizedString) > doublespace_threshold and not lOut[-1] == "": + lOut.append("") + if templ_comment != None and templ_comment != "" and (comment is None or comment == "" or not comment.startswith(templ_comment)): + lOut.append(templ_comment) + if comment != None and comment != "" and not comment.startswith("# textdomain:"): + lOut.append(comment) + lOut.append(f"{localizedString}={translation}") + if params["break-long-lines"] and len(localizedString) > doublespace_threshold: + lOut.append("") + + # all strings, to report unused strings + dkeyStrings = set(x for y in dGroupedKeyStrings.values() for x in y) + + unusedExist = False + if not params["truncate-unused"]: + for key in dOld: + if key not in dkeyStrings: + val = dOld[key] + translation = val.get("translation") + comment = val.get("comment") + # only keep an unused translation if there was translated + # text or a comment associated with it + if translation != None and (translation != "" or comment): + if not unusedExist: + unusedExist = True + lOut.append("\n\n" + comment_unused + "\n") + if params["break-long-lines"] and len(key) > doublespace_threshold and not lOut[-1] == "": + lOut.append("") + if comment != None: + lOut.append(comment) + lOut.append(f"{key}={translation}") + if params["break-long-lines"] and len(key) > doublespace_threshold: + lOut.append("") + return "\n".join(lOut) + '\n' + +# Gets all translatable strings from a lua file +def read_lua_file_strings(lua_file): + lOut = [] + with open(lua_file, encoding='utf-8') as text_file: + text = text_file.read() + + strings = [] + + for s in pattern_lua_quoted_single.findall(text): + strings.append(s[1]) + for s in pattern_lua_bracketed_single.findall(text): + strings.append(s) + + # Only concatenate strings after matching + # single parameter call (without parantheses) + text = re.sub(pattern_concat, "", text) + + for s in pattern_lua_quoted.findall(text): + strings.append(s[1]) + for s in pattern_lua_bracketed.findall(text): + strings.append(s) + + for s in strings: + found_bad = pattern_bad_luastring.search(s) + if found_bad: + print("SYNTAX ERROR: Unescaped '@' in Lua string: " + s) + continue + s = s.replace('\\"', '"') + s = s.replace("\\'", "'") + s = s.replace("\n", "@n") + s = s.replace("\\n", "@n") + s = s.replace("=", "@=") + lOut.append(s) + return lOut + +# Gets strings from an existing translation file +# returns both a dictionary of translations +# and the full original source text so that the new text +# can be compared to it for changes. +# Returns also header comments in the third return value. +def import_tr_file(tr_file): + dOut = {} + text = None + in_header = True + header_comments = None + textdomain = None + if os.path.exists(tr_file): + with open(tr_file, "r", encoding='utf-8') as existing_file : + # save the full text to allow for comparison + # of the old version with the new output + text = existing_file.read() + existing_file.seek(0) + # a running record of the current comment block + # we're inside, to allow preceeding multi-line comments + # to be retained for a translation line + latest_comment_block = None + for line in existing_file.readlines(): + line = line.rstrip('\n') + # "##### not used anymore #####" comment + if line == comment_unused: + # Always delete the 'not used anymore' comment. + # It will be re-added to the file if neccessary. + latest_comment_block = None + if header_comments != None: + in_header = False + continue + # Comment lines + elif line.startswith("#"): + # Source file comments: ##[ file.lua ]## + if line.startswith(symbol_source_prefix) and line.endswith(symbol_source_suffix): + # This line marks the end of header comments. + if params["print-source"]: + in_header = False + # Remove those comments; they may be added back automatically. + continue + + # Store first occurance of textdomain + # discard all subsequent textdomain lines + if line.startswith("# textdomain:"): + if textdomain == None: + textdomain = line + continue + elif in_header: + # Save header comments (normal comments at top of file) + if not header_comments: + header_comments = line + else: + header_comments = header_comments + "\n" + line + else: + # Save normal comments + if line.startswith("# textdomain:") and textdomain == None: + textdomain = line + elif not latest_comment_block: + latest_comment_block = line + else: + latest_comment_block = latest_comment_block + "\n" + line + + continue + + match = pattern_tr.match(line) + if match: + # this line is a translated line + outval = {} + outval["translation"] = match.group(2) + if latest_comment_block: + # if there was a comment, record that. + outval["comment"] = latest_comment_block + latest_comment_block = None + in_header = False + + dOut[match.group(1)] = outval + return (dOut, text, header_comments, textdomain) + +# Get the order of filenames included as "dofile" +# This is very rough, but "good enough" for now +def read_lua_dofile_order(lua_file): + with open(lua_file, encoding='utf-8') as text_file: + for s in pattern_dofile.finditer(text_file.read()): + yield s.group("filename") + +# Walks all lua files in the mod folder, collects translatable strings, +# and writes it to a template.txt file +# Returns a dictionary of localized strings to source file lists +# that can be used with the strings_to_text function. +def generate_template(folder, mod_name): + dOut = defaultdict(set) + dofile_order = [] + for root, _, files in sorted(list(os.walk(folder))): + for filename in sorted(files, key=str.lower): + if not fnmatch.fnmatch(filename, "*.lua"): continue + fullpath_filename = os.path.join(root, filename) + + found = read_lua_file_strings(fullpath_filename) + if params["verbose"]: + print(f"{fullpath_filename}: {str(len(found))} translatable strings") + + for s in found: + dOut[s].add(os.path.relpath(fullpath_filename, start=folder)) + + # Note the import sequence in init.lua only: + if params["dofile-order"] and root == folder and filename == "init.lua": + if len(dofile_order) == 0: dofile_order.append("init.lua") + dofile_order.extend(read_lua_dofile_order(fullpath_filename)) + + if len(dOut) == 0: + return (None, None) + + # group strings by source occurence + groupedStrings = defaultdict(list) + for d, sources in dOut.items(): + sources = sorted(list(sources), key=str.lower) + if len(dofile_order) > 0 and len(sources) > 1: + # first use the known sources in the given order; then the remainder sorted + sources = [x for x in dofile_order if x in sources] + [x for x in sources if not x in dofile_order] + sources = (sources[0],) # we only care about the first occurence now + if len(sources) == 1 and sources[0] == "init.lua": sources = tuple() # omit init.lua + groupedStrings[sources].append(d) + + # honor "dofile" order (we use that python 3.6+ dicts are insertion-ordered) + if params["dofile-order"] and len(groupedStrings) > 1: + # this is not very efficient, but it really does not matter + # first init.lua, then in dofile order, then rest alphabetically + temp = [(tuple(), groupedStrings[()])] + temp.extend((s,v) for x in dofile_order for s,v in groupedStrings.items() if len(s) > 0 and s[0] == x) + temp.extend(sorted((s,v) for s,v in groupedStrings.items() if len(s) > 0 and not s[0] in dofile_order)) + groupedStrings = dict(temp) + + templ_file = os.path.join(folder, "locale/template.txt") + if not os.path.exists(os.path.dirname(templ_file)): os.makedirs(os.path.dirname(templ_file)) + + # read existing template file to preserve comments + existing_template = import_tr_file(templ_file) + text = strings_to_text(groupedStrings, existing_template[0], mod_name, existing_template[2], existing_template[3]) + with open(templ_file, "wt", encoding='utf-8') as template_file: + template_file.write(text) + + new_template = import_tr_file(templ_file) # re-import to get all new data + return (groupedStrings, new_template) + +# Updates an existing .tr file, copying the old one to a ".old" file +# if any changes have happened +# dNew is the data used to generate the template, it has all the +# currently-existing localized strings +def update_tr_file(dNew, templ, mod_name, tr_file): + if params["verbose"]: + print(f"updating {tr_file}") + + tr_import = import_tr_file(tr_file) + dOld = tr_import[0] + textOld = tr_import[1] + + textNew = strings_to_text(dNew, dOld, mod_name, tr_import[2], tr_import[3], templ) + + if textOld and textOld != textNew: + print(f"{tr_file} has changed.") + if params["old-file"]: + shutil.copyfile(tr_file, f"{tr_file}.old") + + with open(tr_file, "w", encoding='utf-8') as new_tr_file: + new_tr_file.write(textNew) + +# Updates translation files for the mod in the given folder +def update_mod(folder): + if not os.path.exists(os.path.join(folder, "init.lua")): + print(f"Mod folder {folder} is missing init.lua, aborting.") + exit(1) + assert not is_modpack(folder) + modname = get_modname(folder) + print(f"Updating translations for {modname}") + (data, templ) = generate_template(folder, modname) + if data == None: + print(f"No translatable strings found in {modname}") + else: + for tr_file in get_existing_tr_files(folder): + update_tr_file(data, templ, modname, os.path.join(folder, "locale/", tr_file)) + +def is_modpack(folder): + return os.path.exists(os.path.join(folder, "modpack.txt")) or os.path.exists(os.path.join(folder, "modpack.conf")) + +def is_game(folder): + return os.path.exists(os.path.join(folder, "game.conf")) and os.path.exists(os.path.join(folder, "mods")) + +# Determines if the folder being pointed to is a game, mod or a mod pack +# and then runs update_mod accordingly +def update_folder(folder): + if is_game(folder): + run_all_subfolders(os.path.join(folder, "mods")) + elif is_modpack(folder): + run_all_subfolders(folder) + else: + update_mod(folder) + print("Done.") + +def run_all_subfolders(folder): + for modfolder in [f.path for f in os.scandir(folder) if f.is_dir() and not f.name.startswith('.')]: + update_folder(modfolder) + +main()