From e8dfd22612d7ae316e9d62c4a3c8520d7d0b6782 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 7 Apr 2023 13:23:29 +0200 Subject: [PATCH] Handle integers bigger than 64bit The capacity of handling arbitrary size integers is quite important to be able to replace the stdlib library. Unfortunately RapidJSON isn't very flexible here. For generating JSON it's quite easy to fallback to `Integer#to_s`. However for parsing, the only escape hatch is to parse all integers ourselves, which significantly impact performance. It could be a bit better by parsing in place, but all Ruby functions expect null terminated C-strings, so it wouldn't work well. Ruby number parsing is also not as fast at RapidJSON's. So for this to be performant, RapidJSON would need a flag that allows to only parse over-sized integers ourself, and not all numbers. But that would require a new feature upstream. --- ext/rapidjson/cext.cc | 11 ++++++++++- ext/rapidjson/encoder.hh | 18 +++++++++++++++--- ext/rapidjson/parser.hh | 29 ++++++++++++++++------------- test/test_encoder.rb | 4 ++++ test/test_parser.rb | 9 +++++++++ 5 files changed, 54 insertions(+), 17 deletions(-) diff --git a/ext/rapidjson/cext.cc b/ext/rapidjson/cext.cc index 10478a8..c97e549 100644 --- a/ext/rapidjson/cext.cc +++ b/ext/rapidjson/cext.cc @@ -8,6 +8,8 @@ static VALUE rb_mRapidJSON; static VALUE rb_eParseError; static VALUE rb_eEncodeError; +static VALUE rb_LLONG_MIN = Qnil, rb_ULLONG_MAX = Qnil; + static ID id_to_json; static ID id_to_s; @@ -36,7 +38,8 @@ parse(VALUE _self, VALUE string) { Reader reader; char *cstring = StringValueCStr(string); // fixme? StringStream ss(cstring); - ParseResult ok = reader.Parse(ss, handler); + // TODO: rapidjson::kParseInsituFlag ? + ParseResult ok = reader.Parse(ss, handler); if (!ok) { rb_raise(rb_eParseError, "JSON parse error: %s (%lu)", @@ -67,6 +70,12 @@ Init_rapidjson(void) id_to_s = rb_intern("to_s"); id_to_json = rb_intern("to_json"); + rb_global_variable(&rb_LLONG_MIN); + rb_global_variable(&rb_ULLONG_MAX); + + rb_LLONG_MIN = LL2NUM(LLONG_MIN); + rb_ULLONG_MAX = ULL2NUM(ULLONG_MAX); + rb_mRapidJSON = rb_define_module("RapidJSON"); rb_define_module_function(rb_mRapidJSON, "encode", encode, 1); rb_define_module_function(rb_mRapidJSON, "pretty_encode", pretty_encode, 1); diff --git a/ext/rapidjson/encoder.hh b/ext/rapidjson/encoder.hh index 53cd592..b5f0f78 100644 --- a/ext/rapidjson/encoder.hh +++ b/ext/rapidjson/encoder.hh @@ -67,12 +67,24 @@ class RubyObjectEncoder { bool negative = rb_big_cmp(b, INT2FIX(0)) == INT2FIX(-1); if (negative) { - long long ll = rb_big2ll(b); - writer.Int64(ll); - } else { + if (rb_big_cmp(b, rb_LLONG_MIN) != INT2FIX(-1)) { + long long ll = rb_big2ll(b); + writer.Int64(ll); + return; + } + } else if (rb_big_cmp(b, rb_ULLONG_MAX) == INT2FIX(-1)) { unsigned long long ull = rb_big2ull(b); writer.Uint64(ull); + return; } + + // If the number is too big, we go through Integer#to_s + VALUE str = rb_funcall(b, id_to_s, 0); + Check_Type(str, T_STRING); + + // We should be able to use RawNumber here, but it's buggy + // https://github.com/Tencent/rapidjson/issues/852 + writer.RawValue(RSTRING_PTR(str), RSTRING_LEN(str), kNumberType); } void encode_float(VALUE v) { diff --git a/ext/rapidjson/parser.hh b/ext/rapidjson/parser.hh index 9a17894..b9c4a73 100644 --- a/ext/rapidjson/parser.hh +++ b/ext/rapidjson/parser.hh @@ -33,20 +33,23 @@ struct RubyObjectHandler : public BaseReaderHandler, RubyObjectHandler> { return PutValue(b ? Qtrue : Qfalse); } - bool Int(int i) { - return PutValue(INT2FIX(i)); - } - - bool Uint(unsigned u) { - return PutValue(INT2FIX(u)); - } - - bool Int64(int64_t i) { - return PutValue(RB_LONG2NUM(i)); - } + bool RawNumber(const char *str, SizeType length, bool copy) { + // TODO: rapidjson::kParseInsituFlag ? + // char tmp_string[length + 1]; + // memcpy(tmp_string, str, length); + // tmp_string[length] = '\0'; + + SizeType index = 0; + if (str[0] == '-') { + index++; + } + for (; index < length; index++) { + if (!isdigit(str[index])) { + return Double(rb_cstr_to_dbl(str, false)); + } + } - bool Uint64(uint64_t u) { - return PutValue(RB_ULONG2NUM(u)); + return PutValue(rb_cstr2inum(str, 10)); } bool Double(double d) { diff --git a/test/test_encoder.rb b/test/test_encoder.rb index 60cfe6a..75b8a8f 100644 --- a/test/test_encoder.rb +++ b/test/test_encoder.rb @@ -33,6 +33,10 @@ def test_encode_bignum assert_equal "18446744073709551615", encode(2**64 - 1) end + def test_encore_arbitrary_size_num + assert_equal "340282366920938463463374607431768211456", encode(2**128) + end + def test_encode_fixnum_exponents tests = [] 0.upto(65) do |exponent| diff --git a/test/test_parser.rb b/test/test_parser.rb index fab2bfa..ea28083 100644 --- a/test/test_parser.rb +++ b/test/test_parser.rb @@ -59,6 +59,15 @@ def test_parse_string assert_equal "abcdefghijklmnopqrstuvwxyz", parse('"abcdefghijklmnopqrstuvwxyz"') end + def test_parse_bignum + assert_equal 340282366920938463463374607431768211456, parse("340282366920938463463374607431768211456") + end + + def test_parse_huge_floats + assert_equal 34028236692093846.3463374607431768211456, parse("34028236692093846.3463374607431768211456") + assert_equal 0.0, parse("123.456e-789") + end + def test_parse_invalida ex = assert_raises RapidJSON::ParseError do parse("abc")