Skip to content

Commit

Permalink
Merge pull request #645 from evo-lua/iconv-bindings-update
Browse files Browse the repository at this point in the history
Expose the low-level conversion interface as part of the iconv FFI bindings
  • Loading branch information
rdw-software authored Jan 16, 2025
2 parents 42de395 + 10c5135 commit 08b2007
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 3 deletions.
74 changes: 74 additions & 0 deletions Benchmarks/iconv-charset-conversion.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
local console = require("console")
local iconv = require("iconv")
local ffi = require("ffi")

local SAMPLE_SIZE = 500000

local function iconv_lowlevel()
local input = "\192\175\192\250\192\206\197\205\198\228\192\204\189\186"
local descriptor = iconv.bindings.iconv_open("UTF-8", "CP949")

local inputSize = ffi.new("size_t[1]", #input)
local inputBuffer = ffi.new("char[?]", #input, input)
local inputRef = ffi.new("char*[1]", inputBuffer)

local worstCaseOutputSize = #input * 4
local outputSize = ffi.new("size_t[1]", worstCaseOutputSize)
local outputBuffer = ffi.new("char[256]")
local outputRef = ffi.new("char*[1]", outputBuffer)

local result = iconv.bindings.iconv(descriptor, inputRef, inputSize, outputRef, outputSize)
local numConversionsPerformed = worstCaseOutputSize - outputSize[0]
local converted = ffi.string(outputBuffer, numConversionsPerformed)

iconv.bindings.iconv_close(descriptor)
return converted, result
end

local function iconv_lua()
local input = "\192\175\192\250\192\206\197\205\198\228\192\204\189\186"
local output, message = iconv.convert(input, "CP949", "UTF-8")
return output, message
end

local function iconv_cpp()
local inputBuffer = buffer.new()
local outputBuffer = buffer.new(1024)
local ptr, len = outputBuffer:reserve(1024)
local result = iconv.bindings.iconv_convert(inputBuffer, #inputBuffer, "CP949", "UTF-8", ptr, len)
return result
end

math.randomseed(os.clock())
local availableBenchmarks = {
function()
local label = "[FFI] Low-level API (tedious and slow, but the most flexible)"
console.startTimer(label)
for i = 1, SAMPLE_SIZE, 1 do
iconv_lowlevel()
end
console.stopTimer(label)
end,
function()
local label = "[FFI] One-shot C++ conversion (fast but less flexible)"
console.startTimer(label)
for i = 1, SAMPLE_SIZE, 1 do
iconv_cpp()
end
console.stopTimer(label)
end,
function()
local label = "[FFI] Lua-friendly wrapper (safer, but slower)"
console.startTimer(label)
for i = 1, SAMPLE_SIZE, 1 do
iconv_lua()
end
console.stopTimer(label)
end,
}

table.shuffle(availableBenchmarks)

for _, benchmark in ipairs(availableBenchmarks) do
benchmark()
end
23 changes: 21 additions & 2 deletions Runtime/Bindings/FFI/iconv/iconv.lua
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ local ffi_string = ffi.string
local tonumber = tonumber
local tostring = tostring

local iconv = {}
local iconv = {
errorMessages = {
INVALID_CONVERSION_HANDLE = "Cannot close an invalid iconv_t descriptor",
},
}

iconv.cdefs = [[
typedef void* iconv_t;
typedef struct iconv_result_t {
uint8_t status_code;
size_t num_bytes_written;
Expand All @@ -19,6 +23,12 @@ typedef struct iconv_result_t {
struct static_iconv_exports_table {
iconv_result_t (*iconv_convert)(char* input, size_t input_size, const char* input_encoding, const char* output_encoding, char* output, size_t output_size);
iconv_t (*iconv_open)(const char* input_encoding, const char* output_encoding);
int (*iconv_close)(iconv_t conversion_descriptor);
size_t (*iconv)(iconv_t conversion_descriptor, char** input, size_t* input_size, char** output, size_t* output_size);
// Shared constants
size_t CHARSET_CONVERSION_FAILED;
};
]]
Expand Down Expand Up @@ -67,4 +77,13 @@ function iconv.convert(input, inputEncoding, outputEncoding)
return tostring(outputBuffer), ffi_strerror(0)
end

function iconv.try_close(descriptor)
if ffi.cast("size_t", descriptor) ~= iconv.bindings.CHARSET_CONVERSION_FAILED then
-- Guard this because MINGW64's iconv can't handle closing invalid descriptors
return iconv.bindings.iconv_close(descriptor)
end

return nil, iconv.errorMessages.INVALID_CONVERSION_HANDLE
end

return iconv
1 change: 1 addition & 0 deletions Runtime/Bindings/FFI/iconv/iconv_aliases.h
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
typedef void* iconv_t;
6 changes: 6 additions & 0 deletions Runtime/Bindings/FFI/iconv/iconv_exports.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,10 @@ typedef struct iconv_result_t {

struct static_iconv_exports_table {
iconv_result_t (*iconv_convert)(char* input, size_t input_size, const char* input_encoding, const char* output_encoding, char* output, size_t output_size);
iconv_t (*iconv_open)(const char* input_encoding, const char* output_encoding);
int (*iconv_close)(iconv_t conversion_descriptor);
size_t (*iconv)(iconv_t conversion_descriptor, char** input, size_t* input_size, char** output, size_t* output_size);

// Shared constants
size_t CHARSET_CONVERSION_FAILED;
};
8 changes: 7 additions & 1 deletion Runtime/Bindings/FFI/iconv/iconv_ffi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ iconv_result_t iconv_convert(char* input, size_t input_length, const char* input
size_t num_input_bytes_left = input_length;

iconv_t conversion_descriptor = iconv_open(output_encoding, input_encoding);
if(conversion_descriptor == (iconv_t)-1) {
if(reinterpret_cast<size_t>(conversion_descriptor) == iconv_ffi::CHARSET_CONVERSION_FAILED) {
result.message = strerror(errno);
result.status_code = errno;
result.num_bytes_written = 0;
Expand Down Expand Up @@ -50,6 +50,12 @@ namespace iconv_ffi {
void* getExportsTable() {
static struct static_iconv_exports_table exports = {
.iconv_convert = &iconv_convert,
.iconv_open = &iconv_open,
.iconv_close = &iconv_close,
.iconv = &iconv,

// Shared constants
.CHARSET_CONVERSION_FAILED = CHARSET_CONVERSION_FAILED,
};

return &exports;
Expand Down
3 changes: 3 additions & 0 deletions Runtime/Bindings/FFI/iconv/iconv_ffi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
#include <cstddef>
#include <cstdint>

#include <iconv.h>
#include "iconv_exports.h"

namespace iconv_ffi {
constexpr std::size_t CHARSET_CONVERSION_FAILED = static_cast<size_t>(-1);

void* getExportsTable();
}
52 changes: 52 additions & 0 deletions Tests/BDD/iconv-library.spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,58 @@ describe("iconv", function()
assertEquals(numBytesWritten, 12)
end)
end)

describe("iconv_open", function()
local descriptors = {}
before(function()
descriptors.valid = iconv.bindings.iconv_open("CP949", "UTF-8")
descriptors.invalid = iconv.bindings.iconv_open("Not-a-real-encoding", "UTF-8")
assertEquals(table.count(descriptors), 2)
end)

after(function()
for label, descriptor in pairs(descriptors) do
iconv.try_close(descriptor)
end
end)

it("should indicate an error if the requested conversion isn't supported", function()
local descriptor = iconv.bindings.iconv_open("Not-a-real-encoding", "UTF-8")
assertEquals(ffi.cast("size_t", descriptor), iconv.bindings.CHARSET_CONVERSION_FAILED)
iconv.try_close(descriptor)
end)

it("should return a valid handle if the conversion is supported", function()
local descriptor = iconv.bindings.iconv_open("CP949", "UTF-8")
assertFalse(ffi.cast("size_t", descriptor) == iconv.bindings.CHARSET_CONVERSION_FAILED)
iconv.try_close(descriptor)
end)
end)

describe("iconv", function()
it("should be able to convert Windows encodings to UTF-8", function()
local descriptor = iconv.bindings.iconv_open("UTF-8", "CP949")
assert(descriptor ~= iconv.bindings.CHARSET_CONVERSION_FAILED, "Failed to create conversion descriptor")

local input = "\192\175\192\250\192\206\197\205\198\228\192\204\189\186"
local inputSize = ffi.new("size_t[1]", #input)
local inputBuffer = ffi.new("char[?]", #input, input)
local inputRef = ffi.new("char*[1]", inputBuffer)

local outputSize = ffi.new("size_t[1]", 256)
local outputBuffer = ffi.new("char[256]")
local outputRef = ffi.new("char*[1]", outputBuffer)

local result = iconv.bindings.iconv(descriptor, inputRef, inputSize, outputRef, outputSize)
assert(result ~= iconv.bindings.CHARSET_CONVERSION_FAILED, "Conversion failed")

local convertedSize = 256 - outputSize[0]
local converted = ffi.string(outputBuffer, convertedSize)
assertEquals(converted, "유저인터페이스")

iconv.bindings.iconv_close(descriptor)
end)
end)
end)

describe("convert", function()
Expand Down

0 comments on commit 08b2007

Please sign in to comment.