Skip to content

Commit

Permalink
Update docs, distinguish truncated/invalid
Browse files Browse the repository at this point in the history
  • Loading branch information
josevalim committed Jan 18, 2024
1 parent 1a36b68 commit f14bcdc
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 6 deletions.
12 changes: 7 additions & 5 deletions lib/elixir/lib/string.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2586,27 +2586,29 @@ defmodule String do
byte = :binary.at(string, last)

cond do
# This byte is valid, discard all truncated entries
# ASCII byte, discard all truncated entries
byte <= 127 ->
last + 1

# In the middle of a codepoint
byte <= 191 ->
invalid_suffix(string, last - 1, truncated + 1)

# 2 bytes
# 2 bytes codepoint start
byte <= 223 ->
if truncated == 1, do: last + truncated + 1, else: last

# 3 bytes
# 3 bytes codepoint start
byte <= 239 ->
if truncated == 2, do: last + truncated + 1, else: last

# 3 bytes
# 4 bytes codepoint start
byte <= 247 ->
if truncated == 3, do: last + truncated + 1, else: last

# Invalid codepoint, discard it, stop checking
true ->
last
last + 1
end
end

Expand Down
5 changes: 4 additions & 1 deletion lib/elixir/test/elixir/string_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -845,8 +845,11 @@ defmodule StringTest do
assert String.byte_slice("h😍ll😍", 2, 5) == "ll"
assert String.byte_slice("h😍ll😍", 2, 6) == "ll😍"

# Already invalid
# Already truncated
assert String.byte_slice(<<178, "ll", 178>>, 0, 10) == "ll"

# Already invalid
assert String.byte_slice(<<255, "ll", 255>>, 0, 10) == <<255, "ll", 255>>
end

test "valid?/1" do
Expand Down

0 comments on commit f14bcdc

Please sign in to comment.