diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 8118e1392..00d9ffda0 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -46,6 +46,8 @@ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *dat static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static int usascii_encindex, utf8_encindex, binary_encindex; + /* Converts in_string to a JSON string (without the wrapping '"' * characters) in FBuffer out_buffer. * @@ -535,7 +537,7 @@ static VALUE mString_to_json_raw_object(VALUE self) VALUE result = rb_hash_new(); rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*")); - rb_hash_aset(result, rb_str_new2("raw"), ary); + rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary); return result; } @@ -822,8 +824,6 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data fbuffer_append_char(buffer, ']'); } -static int usascii_encindex, utf8_encindex, binary_encindex; - static inline int enc_utf8_compatible_p(int enc_idx) { if (enc_idx == usascii_encindex) return 1; @@ -837,13 +837,14 @@ static inline VALUE ensure_valid_encoding(VALUE str) VALUE utf8_string; if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) { if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Deprecate in 2.8.0 - // TODO: Remove in 3.0.0 utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); switch (rb_enc_str_coderange(utf8_string)) { case ENC_CODERANGE_7BIT: + return utf8_string; case ENC_CODERANGE_VALID: + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. + // TODO: Raise in 3.0.0 + rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); return utf8_string; break; } diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 38c546b46..9d2c1a5ea 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -1798,9 +1798,12 @@ static VALUE convert_encoding(VALUE source) if (encindex == binary_encindex) { // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Deprecate in 2.8.0 - // TODO: Remove in 3.0.0 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + switch (rb_enc_str_coderange(utf8_string)) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + return utf8_string; + } } return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding()); @@ -1955,7 +1958,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1959 "parser.c" +#line 1962 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1963,7 +1966,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 867 "parser.rl" +#line 870 "parser.rl" /* @@ -1981,16 +1984,16 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; -#line 1985 "parser.c" +#line 1988 "parser.c" { cs = JSON_start; } -#line 884 "parser.rl" +#line 887 "parser.rl" p = json->source; pe = p + json->len; -#line 1994 "parser.c" +#line 1997 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2024,7 +2027,7 @@ case 1: cs = 0; goto _out; tr2: -#line 859 "parser.rl" +#line 862 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2034,7 +2037,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2038 "parser.c" +#line 2041 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2123,7 +2126,7 @@ case 9: _out: {} } -#line 887 "parser.rl" +#line 890 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 441e58e7b..b03a1d592 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -693,9 +693,12 @@ static VALUE convert_encoding(VALUE source) if (encindex == binary_encindex) { // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Deprecate in 2.8.0 - // TODO: Remove in 3.0.0 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + switch (rb_enc_str_coderange(utf8_string)) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + return utf8_string; + } } return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding()); diff --git a/lib/json/add/bigdecimal.rb b/lib/json/add/bigdecimal.rb index b8d0bb468..5dbc12c07 100644 --- a/lib/json/add/bigdecimal.rb +++ b/lib/json/add/bigdecimal.rb @@ -35,7 +35,7 @@ def self.json_create(object) def as_json(*) { JSON.create_id => self.class.name, - 'b' => _dump, + 'b' => _dump.force_encoding(Encoding::UTF_8), } end diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 2b1d48b61..288cbbbb3 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -507,8 +507,13 @@ def test_valid_utf8_in_different_encoding wrong_encoding_string = utf8_string.b # This behavior is historical. Not necessary desirable. We should deprecated it. # The pure and java version of the gem already don't behave this way. - assert_equal utf8_string.to_json, wrong_encoding_string.to_json - assert_equal JSON.dump(utf8_string), JSON.dump(wrong_encoding_string) + assert_warning(/UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0/) do + assert_equal utf8_string.to_json, wrong_encoding_string.to_json + end + + assert_warning(/UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0/) do + assert_equal JSON.dump(utf8_string), JSON.dump(wrong_encoding_string) + end end def test_string_ext_included_calls_super