ruby · byroot · Oct 30, 2024 · Oct 24, 2024 · casperisfine · Oct 24, 2024
diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c
@@ -46,6 +46,8 @@ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *dat
 static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
 static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
 
+static int usascii_encindex, utf8_encindex, binary_encindex;
+
 /* Converts in_string to a JSON string (without the wrapping '"'
  * characters) in FBuffer out_buffer.
  *
@@ -535,7 +537,7 @@ static VALUE mString_to_json_raw_object(VALUE self)
     VALUE result = rb_hash_new();
     rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
     ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
-    rb_hash_aset(result, rb_str_new2("raw"), ary);
+    rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
     return result;
 }
 
@@ -822,8 +824,6 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
     fbuffer_append_char(buffer, ']');
 }
 
-static int usascii_encindex, utf8_encindex, binary_encindex;
-
 static inline int enc_utf8_compatible_p(int enc_idx)
 {
     if (enc_idx == usascii_encindex) return 1;
@@ -837,13 +837,14 @@ static inline VALUE ensure_valid_encoding(VALUE str)
     VALUE utf8_string;
     if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
         if (encindex == binary_encindex) {
-            // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
-            // TODO: Deprecate in 2.8.0
-            // TODO: Remove in 3.0.0
             utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
             switch (rb_enc_str_coderange(utf8_string)) {
                 case ENC_CODERANGE_7BIT:
+                    return utf8_string;
                 case ENC_CODERANGE_VALID:
+                    // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
+                    // TODO: Raise in 3.0.0
+                    rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
                     return utf8_string;
                     break;
             }

diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c
@@ -1798,9 +1798,12 @@ static VALUE convert_encoding(VALUE source)
 
  if (encindex == binary_encindex) {
     // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
-    // TODO: Deprecate in 2.8.0
-    // TODO: Remove in 3.0.0
-    return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
+    VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
+    switch (rb_enc_str_coderange(utf8_string)) {
+      case ENC_CODERANGE_7BIT:
+      case ENC_CODERANGE_VALID:
+        return utf8_string;
+    }
   }
 
   return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding());
@@ -1955,15 +1958,15 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
 }
 
 
-#line 1959 "parser.c"
+#line 1962 "parser.c"
 enum {JSON_start = 1};
 enum {JSON_first_final = 10};
 enum {JSON_error = 0};
 
 enum {JSON_en_main = 1};
 
 
-#line 867 "parser.rl"
+#line 870 "parser.rl"
 
 
 /*
@@ -1981,16 +1984,16 @@ static VALUE cParser_parse(VALUE self)
     GET_PARSER;
 
 
-#line 1985 "parser.c"
+#line 1988 "parser.c"
 	{
 	cs = JSON_start;
 	}
 
-#line 884 "parser.rl"
+#line 887 "parser.rl"
     p = json->source;
     pe = p + json->len;
 
-#line 1994 "parser.c"
+#line 1997 "parser.c"
 	{
 	if ( p == pe )
 		goto _test_eof;
@@ -2024,7 +2027,7 @@ case 1:
 cs = 0;
 	goto _out;
 tr2:
-#line 859 "parser.rl"
+#line 862 "parser.rl"
 	{
         char *np = JSON_parse_value(json, p, pe, &result, 0);
         if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -2034,7 +2037,7 @@ cs = 0;
 	if ( ++p == pe )
 		goto _test_eof10;
 case 10:
-#line 2038 "parser.c"
+#line 2041 "parser.c"
 	switch( (*p) ) {
 		case 13: goto st10;
 		case 32: goto st10;
@@ -2123,7 +2126,7 @@ case 9:
 	_out: {}
 	}
 
-#line 887 "parser.rl"
+#line 890 "parser.rl"
 
     if (cs >= JSON_first_final && p == pe) {
         return result;

diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl
@@ -693,9 +693,12 @@ static VALUE convert_encoding(VALUE source)
 
  if (encindex == binary_encindex) {
     // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
-    // TODO: Deprecate in 2.8.0
-    // TODO: Remove in 3.0.0
-    return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
+    VALUE utf8_string = rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
+    switch (rb_enc_str_coderange(utf8_string)) {
+      case ENC_CODERANGE_7BIT:
+      case ENC_CODERANGE_VALID:
+        return utf8_string;
+    }
 # Strict mode only allow serializing JSON native types: Hash, Array, 
 # String, Integer, Float, true, false and nil. 
 # Strict mode only allow serializing JSON native types: Hash, Array, 
 # String, Integer, Float, true, false and nil. 
   }
 
   return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding());

diff --git a/lib/json/add/bigdecimal.rb b/lib/json/add/bigdecimal.rb
@@ -35,7 +35,7 @@ def self.json_create(object)
   def as_json(*)
     {
       JSON.create_id => self.class.name,
-      'b'            => _dump,
+      'b'            => _dump.force_encoding(Encoding::UTF_8),
     }
   end
 

diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb
@@ -507,8 +507,13 @@ def test_valid_utf8_in_different_encoding
       wrong_encoding_string = utf8_string.b
       # This behavior is historical. Not necessary desirable. We should deprecated it.
       # The pure and java version of the gem already don't behave this way.
-      assert_equal utf8_string.to_json, wrong_encoding_string.to_json
-      assert_equal JSON.dump(utf8_string), JSON.dump(wrong_encoding_string)
+      assert_warning(/UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0/) do
+        assert_equal utf8_string.to_json, wrong_encoding_string.to_json
+      end
+
+      assert_warning(/UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0/) do
+        assert_equal JSON.dump(utf8_string), JSON.dump(wrong_encoding_string)
+      end
     end
 
     def test_string_ext_included_calls_super