Get rid of Lua string processing, use UTF-8 strings

This commit is contained in:
Mikita Wiśniewski 2025-01-31 15:33:11 +07:00
parent ee3e358d8f
commit 666f2ebacb
3 changed files with 82 additions and 34 deletions
mods/ITEMS/mcl_signs

View file

@ -1,5 +1,25 @@
# `mcl_signs` API Reference
## Specifics
The signs code internally uses Lua lists (array tables) of UTF-8 codepoints to
process text, as Lua 5.1 treats strings stupid-simple.
From [Lua 5.1 Reference Manual, §2.2](https://www.lua.org/manual/5.1/manual.html#2.2):
> _String_ represents arrays of characters. Lua is 8-bit clean: strings can
> contain any 8-bit character, including embedded zeros (`'\0'`).
This is OK when all you have is ASCII, where each character really does take up
just 8 bits, or 1 byte. And the code prior to the rework even made some
workarounds to support 2 byte values for the Latin-1 character set. But a UTF-8
character can take up from 1 to 4 bytes! And when you try to treat a 4 byte
character as a 2 byte one, you'll get 2 invalid characters! Unthinkable!
Luckily, modlib's `utf8.lua` comes to rescue with its codepoint handlers. We
use `utf8.codes` to cycle through user input strings and convert them to those
Lua codepoint lists, which we call _UTF-8 strings_, or _u-strings_ for short.
## Functions
* `mcl_signs.register_sign(name, color, [definition])`
@ -13,9 +33,14 @@
* `mcl_signs.get_text_entity(pos, [force_remove])`
* Finds and returns ObjectRef for text entity for the sign at `pos`
* `force_remove` automatically removes the found entity if truthy
* `mcl_signs.create_lines(str)`
* Converts a string to a line-broken (with hyphens) sequence table of UTF-8
codepoints
* `mcl_signs.string_to_ustring(str, [max_characters])`
* `str` is the string to convert to u-string
* `max_characters` is optional, defines the codepoint index to stop reading
at. 256 by default
* `mcl_signs.ustring_to_string(ustr)`
* Converts a u-string to string. Used for displaying text in sign formspec
* `mcl_signs.ustring_to_line_array(ustr)`
* Converts a u-string to line-broken list of u-strings aka _a line array_
* `mcl_signs.generate_line(codepoints, ypos)`
* Generates a texture string from a codepoints sequence table (for a single
line) using the character map

View file

@ -16,9 +16,12 @@ local CHAR_WIDTH = 5
local SIGN_GLOW_INTENSITY = 14
local LF_CODEPOINT = utf8.codepoint("\n")
local CR_CODEPOINT = utf8.codepoint("\r")
local SP_CODEPOINT = utf8.codepoint(" ")
local DS_CODEPOINT = utf8.codepoint("-") -- used as the wrapping character
--local INVALID_UTF8_STR = {"<", "I", "n", "v", "a", "l", "i", "d", " ", "U", "T", "F", "-", "8", ">"}
local DEFAULT_COLOR = "#000000"
local DYE_TO_COLOR = {
["white"] = "#d0d6d7",
@ -76,7 +79,7 @@ local function get_signdata(pos)
if not def or core.get_item_group(node.name, "sign") < 1 then return end
local meta = core.get_meta(pos)
local text = meta:get_string("text")
local text = core.deserialize(meta:get_string("utext"), true) or {}
local color = meta:get_string("color")
if color == "" then
color = DEFAULT_COLOR
@ -107,20 +110,51 @@ local function get_signdata(pos)
}
end
local function set_signmeta(pos, def)
local function set_signmeta(pos, tbl)
local meta = core.get_meta(pos)
if def.text then meta:set_string("text", def.text) end
if def.color then meta:set_string("color", def.color) end
if def.glow then meta:set_string("glow", def.glow) end
if tbl.text then meta:set_string("utext", core.serialize(tbl.text)) end
if tbl.color then meta:set_string("color", tbl.color) end
if tbl.glow then meta:set_string("glow", tbl.glow) end
end
-- Text processing
local function string_to_line_array(str)
local function string_to_ustring(str, max_characters)
-- limit saved text to 256 characters by default
-- (4 lines x 15 chars = 60 so this should be more than is ever needed)
max_characters = max_characters or 256
local ustr = {}
local iter = utf8.codes(str)
while true do
local success, i, code = pcall(iter)
if not success or not i or i >= max_characters
or code == CR_CODEPOINT then
break
end
table.insert(ustr, code)
end
return ustr
end
mcl_signs.string_to_ustring = string_to_ustring
local function ustring_to_string(ustr)
local str = ""
for _, code in ipairs(ustr) do
str = str .. utf8.char(code)
end
return str
end
mcl_signs.ustring_to_string = ustring_to_string
local function ustring_to_line_array(ustr)
local lines = {}
local line = {}
str = string.gsub(str, "\r\n?", "\n")
for _, code in utf8.codes(str) do
--str = string.gsub(str, "\r\n?", "\n")
for _, code in pairs(ustr) do
if #lines >= NUMBER_OF_LINES then break end
if code == LF_CODEPOINT
@ -139,7 +173,7 @@ local function string_to_line_array(str)
return lines
end
mcl_signs.create_lines = string_to_line_array
mcl_signs.ustring_to_line_array = ustring_to_line_array
local function generate_line(codepoints, ypos)
local parsed = {}
@ -167,7 +201,7 @@ end
mcl_signs.generate_line = generate_line
local function generate_texture(data)
local lines = string_to_line_array(data.text or "")
local lines = ustring_to_line_array(data.text)
local texture = "[combine:" .. SIGN_WIDTH .. "x" .. SIGN_WIDTH
local ypos = 0
local letter_color = data.color or DEFAULT_COLOR
@ -260,7 +294,7 @@ core.register_entity("mcl_signs:text", {
local function show_formspec(player, pos)
if not pos then return end
local meta = core.get_meta(pos)
local old_text = meta:get_string("text")
local old_text = ustring_to_string(core.deserialize(meta:get_string("utext"), true) or {})
core.show_formspec(player:get_player_name(), "mcl_signs:set_text_"..pos.x.."_"..pos.y.."_"..pos.z, table.concat({
"size[6,3]textarea[0.25,0.25;6,1.5;text;",
F(S("Enter sign text:")), ";", F(old_text), "]",
@ -279,25 +313,8 @@ core.register_on_player_receive_fields(function(player, formname, fields)
local pos = vector.new(tonumber(x), tonumber(y), tonumber(z))
if not fields or not fields.text then return end
if not mcl_util.check_position_protection(pos, player) then
-- limit saved text to 256 characters
-- (4 lines x 15 chars = 60 so this should be more than is ever needed)
local text = tostring(fields.text):sub(1, 256)
do -- guard against invalid UTF-8 and crashes down the line
local iter = utf8.codes(text)
while true do
local success, idx_or_error, _ = pcall(iter)
if not success then
text = "Invalid UTF-8"
core.log("warning", ("[mcl_signs] %s tried to insert invalid UTF-8 into a sign at %s"):format(player:get_player_name(), tostring(pos)))
break
elseif not idx_or_error then
break
end
end
end
set_signmeta(pos, {text = text})
local utext = string_to_ustring(fields.text)
set_signmeta(pos, {text = utext})
update_sign(pos)
end
end
@ -306,7 +323,7 @@ end)
-- Node definition callbacks
function sign_tpl.on_place(itemstack, placer, pointed_thing)
local under = pointed_thing.under
do
do -- ensure the node we attach to can actually be attached to
local node = core.get_node(under)
local def = core.registered_nodes[node.name]
if def and def.buildable_to then return itemstack end

View file

@ -59,6 +59,7 @@ local function upgrade_sign_meta(pos)
local meta = core.get_meta(pos)
local color = meta:get_string("mcl_signs:text_color")
local glow = meta:get_string("mcl_signs:glowing_sign")
local text = meta:get_string("text")
if color ~= "" then
meta:set_string("color", color)
meta:set_string("mcl_signs:text_color", "")
@ -69,6 +70,11 @@ local function upgrade_sign_meta(pos)
if glow ~= "" then
meta:set_string("mcl_signs:glowing_sign", "")
end
if text ~= "" then
local ustr = mcl_signs.string_to_ustring(text)
meta:set_string("utext", core.serialize(ustr))
meta:set_string("text", "")
end
mcl_signs.get_text_entity(pos, true) -- the 2nd "true" arg means deleting the entity for respawn
end