Skip to content

Commit

Permalink
Handle integers bigger than 64bit
Browse files Browse the repository at this point in the history
The capacity of handling arbitrary size integers is quite important
to be able to replace the stdlib library.

Unfortunately RapidJSON isn't very flexible here.

For generating JSON it's quite easy to fallback to `Integer#to_s`.

However for parsing, the only escape hatch is to parse all
integers ourselves, which significantly impact performance.

It could be a bit better by parsing in place, but all Ruby functions
expect null terminated C-strings, so it wouldn't work well.

Ruby number parsing is also not as fast at RapidJSON's.

So for this to be performant, RapidJSON would need a flag that
allows to only parse over-sized integers ourself, and not all numbers.

But that would require a new feature upstream.
  • Loading branch information
byroot committed May 16, 2023
1 parent cc26805 commit e8dfd22
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 17 deletions.
11 changes: 10 additions & 1 deletion ext/rapidjson/cext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ static VALUE rb_mRapidJSON;
static VALUE rb_eParseError;
static VALUE rb_eEncodeError;

static VALUE rb_LLONG_MIN = Qnil, rb_ULLONG_MAX = Qnil;

static ID id_to_json;
static ID id_to_s;

Expand Down Expand Up @@ -36,7 +38,8 @@ parse(VALUE _self, VALUE string) {
Reader reader;
char *cstring = StringValueCStr(string); // fixme?
StringStream ss(cstring);
ParseResult ok = reader.Parse(ss, handler);
// TODO: rapidjson::kParseInsituFlag ?
ParseResult ok = reader.Parse<rapidjson::kParseNumbersAsStringsFlag>(ss, handler);

if (!ok) {
rb_raise(rb_eParseError, "JSON parse error: %s (%lu)",
Expand Down Expand Up @@ -67,6 +70,12 @@ Init_rapidjson(void)
id_to_s = rb_intern("to_s");
id_to_json = rb_intern("to_json");

rb_global_variable(&rb_LLONG_MIN);
rb_global_variable(&rb_ULLONG_MAX);

rb_LLONG_MIN = LL2NUM(LLONG_MIN);
rb_ULLONG_MAX = ULL2NUM(ULLONG_MAX);

rb_mRapidJSON = rb_define_module("RapidJSON");
rb_define_module_function(rb_mRapidJSON, "encode", encode, 1);
rb_define_module_function(rb_mRapidJSON, "pretty_encode", pretty_encode, 1);
Expand Down
18 changes: 15 additions & 3 deletions ext/rapidjson/encoder.hh
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,24 @@ class RubyObjectEncoder {

bool negative = rb_big_cmp(b, INT2FIX(0)) == INT2FIX(-1);
if (negative) {
long long ll = rb_big2ll(b);
writer.Int64(ll);
} else {
if (rb_big_cmp(b, rb_LLONG_MIN) != INT2FIX(-1)) {
long long ll = rb_big2ll(b);
writer.Int64(ll);
return;
}
} else if (rb_big_cmp(b, rb_ULLONG_MAX) == INT2FIX(-1)) {
unsigned long long ull = rb_big2ull(b);
writer.Uint64(ull);
return;
}

// If the number is too big, we go through Integer#to_s
VALUE str = rb_funcall(b, id_to_s, 0);
Check_Type(str, T_STRING);

// We should be able to use RawNumber here, but it's buggy
// https://github.com/Tencent/rapidjson/issues/852
writer.RawValue(RSTRING_PTR(str), RSTRING_LEN(str), kNumberType);
}

void encode_float(VALUE v) {
Expand Down
29 changes: 16 additions & 13 deletions ext/rapidjson/parser.hh
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,23 @@ struct RubyObjectHandler : public BaseReaderHandler<UTF8<>, RubyObjectHandler> {
return PutValue(b ? Qtrue : Qfalse);
}

bool Int(int i) {
return PutValue(INT2FIX(i));
}

bool Uint(unsigned u) {
return PutValue(INT2FIX(u));
}

bool Int64(int64_t i) {
return PutValue(RB_LONG2NUM(i));
}
bool RawNumber(const char *str, SizeType length, bool copy) {
// TODO: rapidjson::kParseInsituFlag ?
// char tmp_string[length + 1];
// memcpy(tmp_string, str, length);
// tmp_string[length] = '\0';

SizeType index = 0;
if (str[0] == '-') {
index++;
}
for (; index < length; index++) {
if (!isdigit(str[index])) {
return Double(rb_cstr_to_dbl(str, false));
}
}

bool Uint64(uint64_t u) {
return PutValue(RB_ULONG2NUM(u));
return PutValue(rb_cstr2inum(str, 10));
}

bool Double(double d) {
Expand Down
4 changes: 4 additions & 0 deletions test/test_encoder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ def test_encode_bignum
assert_equal "18446744073709551615", encode(2**64 - 1)
end

def test_encore_arbitrary_size_num
assert_equal "340282366920938463463374607431768211456", encode(2**128)
end

def test_encode_fixnum_exponents
tests = []
0.upto(65) do |exponent|
Expand Down
9 changes: 9 additions & 0 deletions test/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ def test_parse_string
assert_equal "abcdefghijklmnopqrstuvwxyz", parse('"abcdefghijklmnopqrstuvwxyz"')
end

def test_parse_bignum
assert_equal 340282366920938463463374607431768211456, parse("340282366920938463463374607431768211456")
end

def test_parse_huge_floats
assert_equal 34028236692093846.3463374607431768211456, parse("34028236692093846.3463374607431768211456")
assert_equal 0.0, parse("123.456e-789")
end

def test_parse_invalida
ex = assert_raises RapidJSON::ParseError do
parse("abc")
Expand Down

0 comments on commit e8dfd22

Please sign in to comment.