Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JSON::GeneratorError expose invalid object #712

Merged
merged 1 commit into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 41 additions & 13 deletions ext/json/ext/generator/generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,28 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data

static int usascii_encindex, utf8_encindex, binary_encindex;

#ifdef RBIMPL_ATTR_NORETURN
RBIMPL_ATTR_NORETURN()
#endif
static void raise_generator_error_str(VALUE invalid_object, VALUE str)
{
VALUE exc = rb_exc_new_str(eGeneratorError, str);
rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
rb_exc_raise(exc);
}

#ifdef RBIMPL_ATTR_NORETURN
RBIMPL_ATTR_NORETURN()
#endif
static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
VALUE str = rb_vsprintf(fmt, args);
va_end(args);
raise_generator_error_str(invalid_object, str);
}

/* Converts in_string to a JSON string (without the wrapping '"'
* characters) in FBuffer out_buffer.
*
Expand Down Expand Up @@ -867,6 +889,17 @@ static inline int enc_utf8_compatible_p(int enc_idx)
return 0;
}

static VALUE encode_json_string_try(VALUE str)
{
return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
}

static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
{
raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
return Qundef;
}

static inline VALUE ensure_valid_encoding(VALUE str)
{
int encindex = RB_ENCODING_GET(str);
Expand All @@ -886,7 +919,7 @@ static inline VALUE ensure_valid_encoding(VALUE str)
}
}

str = rb_funcall(str, i_encode, 1, Encoding_UTF_8);
str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
}
return str;
}
Expand All @@ -909,7 +942,7 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
}
break;
default:
rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed utf-8");
raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
break;
}
fbuffer_append_char(buffer, '"');
Expand Down Expand Up @@ -957,10 +990,8 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
char allow_nan = state->allow_nan;
VALUE tmp = rb_funcall(obj, i_to_s, 0);
if (!allow_nan) {
if (isinf(value)) {
rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
} else if (isnan(value)) {
rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
if (isinf(value) || isnan(value)) {
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", tmp);
}
}
fbuffer_append_str(buffer, tmp);
Expand Down Expand Up @@ -1008,7 +1039,7 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON
default:
general:
if (state->strict) {
rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
} else if (rb_respond_to(obj, i_to_json)) {
tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
Check_Type(tmp, T_STRING);
Expand Down Expand Up @@ -1036,10 +1067,6 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc)
struct generate_json_data *data = (struct generate_json_data *)d;
fbuffer_free(data->buffer);

if (RBASIC_CLASS(exc) == rb_path2class("Encoding::UndefinedConversionError")) {
exc = rb_exc_new_str(eGeneratorError, rb_funcall(exc, rb_intern("message"), 0));
}

rb_exc_raise(exc);

return Qundef;
Expand Down Expand Up @@ -1537,10 +1564,11 @@ void Init_generator(void)
VALUE mExt = rb_define_module_under(mJSON, "Ext");
VALUE mGenerator = rb_define_module_under(mExt, "Generator");

rb_global_variable(&eGeneratorError);
eGeneratorError = rb_path2class("JSON::GeneratorError");

rb_global_variable(&eNestingError);
eNestingError = rb_path2class("JSON::NestingError");
rb_gc_register_mark_object(eGeneratorError);
rb_gc_register_mark_object(eNestingError);

cState = rb_define_class_under(mGenerator, "State", rb_cObject);
rb_define_alloc_func(cState, cState_s_allocate);
Expand Down
34 changes: 19 additions & 15 deletions java/src/json/ext/Generator.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.jruby.RubyHash;
import org.jruby.RubyString;
import org.jruby.RubySymbol;
import org.jruby.RubyException;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
Expand Down Expand Up @@ -254,7 +255,7 @@ void generate(ThreadContext context, Session session, RubyFloat object, OutputSt

if (Double.isInfinite(value) || Double.isNaN(value)) {
if (!session.getState(context).allowNaN()) {
throw Utils.newException(context, Utils.M_GENERATOR_ERROR, object + " not allowed in JSON");
throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable();
}
}

Expand Down Expand Up @@ -429,20 +430,23 @@ int guessSize(ThreadContext context, Session session, RubyString object) {
void generate(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException {
try {
object = ensureValidEncoding(context, object);
StringEncoder stringEncoder = session.getStringEncoder(context);
ByteList byteList = object.getByteList();
switch (object.scanForCodeRange()) {
case StringSupport.CR_7BIT:
stringEncoder.encodeASCII(context, byteList, buffer);
break;
case StringSupport.CR_VALID:
stringEncoder.encode(context, byteList, buffer);
break;
default:
throw stringEncoder.invalidUtf8(context);
}
} catch (RaiseException re) {
throw Utils.newException(context, Utils.M_GENERATOR_ERROR, re.getMessage());
RubyException exc = Utils.buildGeneratorError(context, object, re.getMessage());
exc.setCause(re.getException());
throw exc.toThrowable();
}

StringEncoder stringEncoder = session.getStringEncoder(context);
ByteList byteList = object.getByteList();
switch (object.scanForCodeRange()) {
case StringSupport.CR_7BIT:
stringEncoder.encodeASCII(context, byteList, buffer);
break;
case StringSupport.CR_VALID:
stringEncoder.encode(context, byteList, buffer);
break;
default:
throw Utils.buildGeneratorError(context, object, "source sequence is illegal/malformed utf-8").toThrowable();
}
}
};
Expand Down Expand Up @@ -506,7 +510,7 @@ void generate(ThreadContext context, Session session, IRubyObject object, Output
RubyString generateNew(ThreadContext context, Session session, IRubyObject object) {
GeneratorState state = session.getState(context);
if (state.strict()) {
throw Utils.newException(context, Utils.M_GENERATOR_ERROR, object + " not allowed in JSON");
throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable();
} else if (object.respondsTo("to_json")) {
IRubyObject result = object.callMethod(context, "to_json", state);
if (result instanceof RubyString) return (RubyString)result;
Expand Down
2 changes: 1 addition & 1 deletion java/src/json/ext/StringEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,6 @@ private void escapeCodeUnit(char c, int auxOffset) {

@Override
protected RaiseException invalidUtf8(ThreadContext context) {
return Utils.newException(context, Utils.M_GENERATOR_ERROR, "source sequence is illegal/malformed utf-8");
return Utils.newException(context, Utils.M_GENERATOR_ERROR, "source sequence is illegal/malformed utf-8");
}
}
12 changes: 12 additions & 0 deletions java/src/json/ext/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,18 @@ static RaiseException newException(ThreadContext context,
return excptn.toThrowable();
}

static RubyException buildGeneratorError(ThreadContext context, IRubyObject invalidObject, RubyString message) {
RuntimeInfo info = RuntimeInfo.forRuntime(context.runtime);
RubyClass klazz = info.jsonModule.get().getClass(M_GENERATOR_ERROR);
RubyException excptn = (RubyException)klazz.newInstance(context, message, Block.NULL_BLOCK);
excptn.setInstanceVariable("@invalid_object", invalidObject);
return excptn;
}

static RubyException buildGeneratorError(ThreadContext context, IRubyObject invalidObject, String message) {
return buildGeneratorError(context, invalidObject, context.runtime.newString(message));
}

static byte[] repeat(ByteList a, int n) {
return repeat(a.unsafeBytes(), a.begin(), a.length(), n);
}
Expand Down
18 changes: 17 additions & 1 deletion lib/json/common.rb
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,23 @@ class CircularDatastructure < NestingError; end
# :startdoc:

# This exception is raised if a generator or unparser error occurs.
class GeneratorError < JSONError; end
class GeneratorError < JSONError
attr_reader :invalid_object

def initialize(message, invalid_object = nil)
super(message)
@invalid_object = invalid_object
end

def detailed_message(...)
if @invalid_object.nil?
super
else
"#{super}\nInvalid object: #{@invalid_object.inspect}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only quibble I've run into when pulling this into Sidekiq is the newline here. It's injecting formatting into the message, typically if I want to log an error message I don't expect it to take multiple lines in the log output. WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course we expect newlines with backtraces... Could exceptions support a hash of context values to be logged with the error, where invalid_object would already be there? This is pretty standard stuff for structured logging, e.g. Golang.

rescue JSON::GeneratorError => ex
  logger.info { ex.with_context(some: value, another: value).detailed_message }

I guess the bigger question would be if ruby-core wants to move toward more formalized structured logging, by providing these types of APIs.

end
end
end

# For backwards compatibility
UnparserError = GeneratorError # :nodoc:

Expand Down
32 changes: 17 additions & 15 deletions lib/json/truffle_ruby/generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def utf8_to_json(string, script_safe = false) # :nodoc:
string
end

def utf8_to_json_ascii(string, script_safe = false) # :nodoc:
string = string.b
def utf8_to_json_ascii(original_string, script_safe = false) # :nodoc:
string = original_string.b
map = script_safe ? SCRIPT_SAFE_MAP : MAP
string.gsub!(/[\/"\\\x0-\x1f]/n) { map[$&] || $& }
string.gsub!(/(
Expand All @@ -74,7 +74,7 @@ def utf8_to_json_ascii(string, script_safe = false) # :nodoc:
)+ |
[\x80-\xc1\xf5-\xff] # invalid
)/nx) { |c|
c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'"
c.size == 1 and raise GeneratorError.new("invalid utf8 byte: '#{c}'", original_string)
s = c.encode(::Encoding::UTF_16BE, ::Encoding::UTF_8).unpack('H*')[0]
s.force_encoding(::Encoding::BINARY)
s.gsub!(/.{4}/n, '\\\\u\&')
Expand All @@ -83,7 +83,7 @@ def utf8_to_json_ascii(string, script_safe = false) # :nodoc:
string.force_encoding(::Encoding::UTF_8)
string
rescue => e
raise GeneratorError.wrap(e)
raise GeneratorError.new(e.message, original_string)
end

def valid_utf8?(string)
Expand Down Expand Up @@ -306,8 +306,10 @@ def generate(obj)
else
result = obj.to_json(self)
end
JSON::TruffleRuby::Generator.valid_utf8?(result) or raise GeneratorError,
"source sequence #{result.inspect} is illegal/malformed utf-8"
JSON::TruffleRuby::Generator.valid_utf8?(result) or raise GeneratorError.new(
"source sequence #{result.inspect} is illegal/malformed utf-8",
obj
)
result
end

Expand Down Expand Up @@ -364,10 +366,10 @@ def generate(obj)
begin
string = string.encode(::Encoding::UTF_8)
rescue Encoding::UndefinedConversionError => error
raise GeneratorError, error.message
raise GeneratorError.new(error.message, string)
end
end
raise GeneratorError, "source sequence is illegal/malformed utf-8" unless string.valid_encoding?
raise GeneratorError.new("source sequence is illegal/malformed utf-8", string) unless string.valid_encoding?

if /["\\\x0-\x1f]/n.match?(string)
buf << string.gsub(/["\\\x0-\x1f]/n, MAP)
Expand Down Expand Up @@ -403,7 +405,7 @@ module Object
# special method #to_json was defined for some object.
def to_json(state = nil, *)
if state && State.from_state(state).strict?
raise GeneratorError, "#{self.class} not allowed in JSON"
raise GeneratorError.new("#{self.class} not allowed in JSON", self)
else
to_s.to_json
end
Expand Down Expand Up @@ -454,7 +456,7 @@ def json_transform(state)

result = +"#{result}#{key_json}#{state.space_before}:#{state.space}"
if state.strict? && !(false == value || true == value || nil == value || String === value || Array === value || Hash === value || Integer === value || Float === value)
raise GeneratorError, "#{value.class} not allowed in JSON"
raise GeneratorError.new("#{value.class} not allowed in JSON", value)
elsif value.respond_to?(:to_json)
result << value.to_json(state)
else
Expand Down Expand Up @@ -507,7 +509,7 @@ def json_transform(state)
result << delim unless first
result << state.indent * depth if indent
if state.strict? && !(false == value || true == value || nil == value || String === value || Array === value || Hash === value || Integer === value || Float === value)
raise GeneratorError, "#{value.class} not allowed in JSON"
raise GeneratorError.new("#{value.class} not allowed in JSON", value)
elsif value.respond_to?(:to_json)
result << value.to_json(state)
else
Expand Down Expand Up @@ -536,13 +538,13 @@ def to_json(state = nil, *)
if state.allow_nan?
to_s
else
raise GeneratorError, "#{self} not allowed in JSON"
raise GeneratorError.new("#{self} not allowed in JSON", self)
end
when nan?
if state.allow_nan?
to_s
else
raise GeneratorError, "#{self} not allowed in JSON"
raise GeneratorError.new("#{self} not allowed in JSON", self)
end
else
to_s
Expand All @@ -558,7 +560,7 @@ def to_json(state = nil, *args)
state = State.from_state(state)
if encoding == ::Encoding::UTF_8
unless valid_encoding?
raise GeneratorError, "source sequence is illegal/malformed utf-8"
raise GeneratorError.new("source sequence is illegal/malformed utf-8", self)
end
string = self
else
Expand All @@ -570,7 +572,7 @@ def to_json(state = nil, *args)
%("#{JSON::TruffleRuby::Generator.utf8_to_json(string, state.script_safe)}")
end
rescue Encoding::UndefinedConversionError => error
raise ::JSON::GeneratorError, error.message
raise ::JSON::GeneratorError.new(error.message, self)
end

# Module that holds the extending methods if, the String module is
Expand Down
17 changes: 12 additions & 5 deletions test/json/json_generator_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -250,17 +250,20 @@ def test_fast_state
end

def test_allow_nan
assert_raise(GeneratorError) { generate([JSON::NaN]) }
error = assert_raise(GeneratorError) { generate([JSON::NaN]) }
assert_same JSON::NaN, error.invalid_object
assert_equal '[NaN]', generate([JSON::NaN], :allow_nan => true)
assert_raise(GeneratorError) { fast_generate([JSON::NaN]) }
assert_raise(GeneratorError) { pretty_generate([JSON::NaN]) }
assert_equal "[\n NaN\n]", pretty_generate([JSON::NaN], :allow_nan => true)
assert_raise(GeneratorError) { generate([JSON::Infinity]) }
error = assert_raise(GeneratorError) { generate([JSON::Infinity]) }
assert_same JSON::Infinity, error.invalid_object
assert_equal '[Infinity]', generate([JSON::Infinity], :allow_nan => true)
assert_raise(GeneratorError) { fast_generate([JSON::Infinity]) }
assert_raise(GeneratorError) { pretty_generate([JSON::Infinity]) }
assert_equal "[\n Infinity\n]", pretty_generate([JSON::Infinity], :allow_nan => true)
assert_raise(GeneratorError) { generate([JSON::MinusInfinity]) }
error = assert_raise(GeneratorError) { generate([JSON::MinusInfinity]) }
assert_same JSON::MinusInfinity, error.invalid_object
assert_equal '[-Infinity]', generate([JSON::MinusInfinity], :allow_nan => true)
assert_raise(GeneratorError) { fast_generate([JSON::MinusInfinity]) }
assert_raise(GeneratorError) { pretty_generate([JSON::MinusInfinity]) }
Expand Down Expand Up @@ -487,9 +490,13 @@ def test_invalid_encoding_string
["\x82\xAC\xEF".b].to_json
end

assert_raise(JSON::GeneratorError) do
{ foo: "\x82\xAC\xEF".b }.to_json
badly_encoded = "\x82\xAC\xEF".b
exception = assert_raise(JSON::GeneratorError) do
{ foo: badly_encoded }.to_json
end

assert_kind_of EncodingError, exception.cause
assert_same badly_encoded, exception.invalid_object
end

class MyCustomString < String
Expand Down