From 931880c68b56842c7e2e4ddeab01a24bcb573606 Mon Sep 17 00:00:00 2001 From: Lezenn Date: Tue, 17 Dec 2024 20:25:47 +0100 Subject: [PATCH 1/6] Added string.strip_prefix and string.strip_suffix to the string package. --- src/gleam/string.gleam | 81 ++++++++++++++++++++++++++++++++++++ test/gleam/string_test.gleam | 18 ++++++++ 2 files changed, 99 insertions(+) diff --git a/src/gleam/string.gleam b/src/gleam/string.gleam index 8802f09c..2cd96c5d 100644 --- a/src/gleam/string.gleam +++ b/src/gleam/string.gleam @@ -942,3 +942,84 @@ fn do_inspect(term: anything) -> StringTree @external(erlang, "erlang", "byte_size") @external(javascript, "../gleam_stdlib.mjs", "byte_size") pub fn byte_size(string: String) -> Int + +/// Returns a `Result(String, Nil)` of the given string without the given prefix. +/// If the string does not start with the given prefix, the function returns `Error(Nil)` +/// +/// If an empty prefix is given, the result is always `Ok` containing the whole string. +/// If an empty string is given with a non empty prefix, then the result is always `Error(Nil)` +/// +/// ## Examples +/// +/// ```gleam +/// strip_prefix("https://gleam.run", "https://") +/// // -> Ok("gleam.run") +/// +/// strip_prefix("https://gleam.run", "") +/// // -> Ok("https://gleam.run") +/// +/// strip_prefix("", "") +/// // -> Ok("") +/// +/// strip_prefix("https://gleam.run", "Lucy") +/// // -> Error(Nil) +/// +/// strip_prefix("", "Lucy") +/// // -> Error(Nil) +/// ``` +pub fn strip_prefix( + string: String, + prefix prefix: String, +) -> Result(String, Nil) { + case prefix { + "" -> Ok(string) + prefix -> { + let prefix_len = length(prefix) + case starts_with(string, prefix) { + False -> Error(Nil) + True -> Ok(drop_start(string, prefix_len)) + } + } + } +} + +/// Returns a `Result(String, Nil)` of the given string without the given suffix. +/// If the string does not end with the given suffix, the function returns `Error(Nil)` +/// +/// If an empty suffix is given, the result is always `Ok` containing the whole string. +/// If an empty string is given with a non empty suffix, then the result is always `Error(Nil)` +/// +/// ## Examples +/// +/// ```gleam +/// strip_suffix("lucy@gleam.run", "@gleam.run") +/// // -> Ok("lucy") +/// +/// strip_suffix("lucy@gleam.run", "") +/// // -> Ok("lucy@gleam.run") +/// +/// strip_suffix("", "") +/// // -> Ok("") +/// +/// strip_suffix("lucy@gleam.run", "Lucy") +/// // -> Error(Nil) +/// +/// strip_suffix("", "Lucy") +/// // -> Error(Nil) +/// ``` +pub fn strip_suffix( + string: String, + suffix suffix: String, +) -> Result(String, Nil) { + case suffix { + "" -> Ok(string) + suffix -> { + let suffix_len = length(suffix) + + case ends_with(string, suffix) { + False -> Error(Nil) + True -> Ok(drop_end(string, suffix_len)) + } + } + } +} diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam index ffd2f9b9..310d5386 100644 --- a/test/gleam/string_test.gleam +++ b/test/gleam/string_test.gleam @@ -1392,3 +1392,21 @@ pub fn inspect_map_test() { |> string.inspect |> should.equal("dict.from_list([#(\"a\", 1), #(\"b\", 2)])") } + +pub fn strip_prefix_test() { + let assert Ok("gleam.run") = + string.strip_prefix("https://gleam.run", "https://") + let assert Ok("https://gleam.run") = + string.strip_prefix("https://gleam.run", "") + let assert Ok("") = string.strip_prefix("", "") + let assert Error(Nil) = string.strip_prefix("https://gleam.run", "Lucy") + let assert Error(Nil) = string.strip_prefix("", "Lucy") +} + +pub fn strip_suffix_test() { + let assert Ok("lucy") = string.strip_suffix("lucy@gleam.run", "@gleam.run") + let assert Ok("lucy@gleam.run") = string.strip_suffix("lucy@gleam.run", "") + let assert Ok("") = string.strip_suffix("", "") + let assert Error(Nil) = string.strip_suffix("lucy@gleam.run", "Lucy") + let assert Error(Nil) = string.strip_suffix("", "Lucy") +} From f4f84c1674d7d90995e5d66073e1b0eb817eabd6 Mon Sep 17 00:00:00 2001 From: Lezenn Date: Tue, 17 Dec 2024 20:27:13 +0100 Subject: [PATCH 2/6] Increasing stdlib version --- gleam.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gleam.toml b/gleam.toml index 0ca774c2..91070815 100644 --- a/gleam.toml +++ b/gleam.toml @@ -1,5 +1,5 @@ name = "gleam_stdlib" -version = "0.48.0" +version = "0.49.0" gleam = ">= 0.32.0" licences = ["Apache-2.0"] description = "A standard library for the Gleam programming language" From d4dbacd8e1a1bdedfaa456c7b8195e5cff3f38a3 Mon Sep 17 00:00:00 2001 From: Lezenn Date: Tue, 17 Dec 2024 23:37:02 +0100 Subject: [PATCH 3/6] Changed strip_prefix / strip_suffix implementations to (normally) more efficient ones --- CHANGELOG.md | 4 + src/gleam/string.gleam | 46 +++--- src/gleam_stdlib.erl | 311 +++++++++++++++++++++++++++-------------- src/gleam_stdlib.mjs | 32 +++++ 4 files changed, 262 insertions(+), 131 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c3cb318..a86fac46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## v0.49.0 - 2024-12-17 + +- The `string` module gains the `strip_prefix` and `strip_suffix` functions. + ## v0.48.0 - 2024-12-17 - Fixed a bug where `string.utf_codepoint` would erronously accept negative input. diff --git a/src/gleam/string.gleam b/src/gleam/string.gleam index 2cd96c5d..15928123 100644 --- a/src/gleam/string.gleam +++ b/src/gleam/string.gleam @@ -967,21 +967,27 @@ pub fn byte_size(string: String) -> Int /// strip_prefix("", "Lucy") /// // -> Error(Nil) /// ``` +// pub fn strip_prefix( +// string: String, +// prefix prefix: String, +// ) -> Result(String, Nil) { +// case prefix { +// "" -> Ok(string) +// prefix -> { +// let prefix_len = length(prefix) +// case starts_with(string, prefix) { +// False -> Error(Nil) +// True -> Ok(drop_start(string, prefix_len)) +// } +// } +// } +// } +@external(erlang, "gleam_stdlib", "string_strip_prefix") +@external(javascript, "../gleam_stdlib.mjs", "string_strip_prefix") pub fn strip_prefix( string: String, prefix prefix: String, -) -> Result(String, Nil) { - case prefix { - "" -> Ok(string) - prefix -> { - let prefix_len = length(prefix) - case starts_with(string, prefix) { - False -> Error(Nil) - True -> Ok(drop_start(string, prefix_len)) - } - } - } -} +) -> Result(String, Nil) /// Returns a `Result(String, Nil)` of the given string without the given suffix. /// If the string does not end with the given suffix, the function returns `Error(Nil)` @@ -1007,19 +1013,9 @@ pub fn strip_prefix( /// strip_suffix("", "Lucy") /// // -> Error(Nil) /// ``` +@external(erlang, "gleam_stdlib", "string_strip_suffix") +@external(javascript, "../gleam_stdlib.mjs", "string_strip_suffix") pub fn strip_suffix( string: String, suffix suffix: String, -) -> Result(String, Nil) { - case suffix { - "" -> Ok(string) - suffix -> { - let suffix_len = length(suffix) - - case ends_with(string, suffix) { - False -> Error(Nil) - True -> Ok(drop_end(string, suffix_len)) - } - } - } -} +) -> Result(String, Nil) diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index 5c93fd4c..083649f3 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -1,35 +1,85 @@ -module(gleam_stdlib). -export([ - map_get/2, iodata_append/2, identity/1, decode_int/1, decode_bool/1, - decode_float/1, decode_list/1, decode_option/2, decode_field/2, parse_int/1, - parse_float/1, less_than/2, string_pop_grapheme/1, string_pop_codeunit/1, - string_starts_with/2, wrap_list/1, string_ends_with/2, string_pad/4, - decode_map/1, uri_parse/1, - decode_result/1, bit_array_slice/3, decode_bit_array/1, compile_regex/2, - regex_scan/2, percent_encode/1, percent_decode/1, regex_check/2, - regex_split/2, base_decode64/1, parse_query/1, bit_array_concat/1, - bit_array_base64_encode/2, size_of_tuple/1, decode_tuple/1, decode_tuple2/1, - decode_tuple3/1, decode_tuple4/1, decode_tuple5/1, decode_tuple6/1, - tuple_get/2, classify_dynamic/1, print/1, println/1, print_error/1, - println_error/1, inspect/1, float_to_string/1, int_from_base_string/2, - utf_codepoint_list_to_string/1, contains_string/2, crop_string/2, - base16_encode/1, base16_decode/1, string_replace/3, regex_replace/3, - slice/3, bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1 + map_get/2, + iodata_append/2, + identity/1, + decode_int/1, + decode_bool/1, + decode_float/1, + decode_list/1, + decode_option/2, + decode_field/2, + parse_int/1, + parse_float/1, + less_than/2, + string_pop_grapheme/1, + string_pop_codeunit/1, + string_starts_with/2, + wrap_list/1, + string_ends_with/2, + string_pad/4, + decode_map/1, + uri_parse/1, + decode_result/1, + bit_array_slice/3, + decode_bit_array/1, + compile_regex/2, + regex_scan/2, + percent_encode/1, + percent_decode/1, + regex_check/2, + regex_split/2, + base_decode64/1, + parse_query/1, + bit_array_concat/1, + bit_array_base64_encode/2, + size_of_tuple/1, + decode_tuple/1, + decode_tuple2/1, + decode_tuple3/1, + decode_tuple4/1, + decode_tuple5/1, + decode_tuple6/1, + tuple_get/2, + classify_dynamic/1, + print/1, + println/1, + print_error/1, + println_error/1, + inspect/1, + float_to_string/1, + int_from_base_string/2, + utf_codepoint_list_to_string/1, + contains_string/2, + crop_string/2, + base16_encode/1, + base16_decode/1, + string_replace/3, + regex_replace/3, + slice/3, + bit_array_to_int_and_size/1, + bit_array_pad_to_bytes/1, + string_strip_prefix/2, + string_strip_suffix/2 ]). %% Taken from OTP's uri_string module -define(DEC2HEX(X), - if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0; + if + ((X) >= 0) andalso ((X) =< 9) -> (X) + $0; ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10 - end). + end +). %% Taken from OTP's uri_string module -define(HEX2DEC(X), - if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0; + if + ((X) >= $0) andalso ((X) =< $9) -> (X) - $0; ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10; ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10 - end). + end +). -define(is_lowercase_char(X), (X > 96 andalso X < 123)). -define(is_underscore_char(X), (X == 95)). @@ -52,7 +102,8 @@ decode_error_msg(Expected, Data) when is_binary(Expected) -> decode_error(Expected, Got) when is_binary(Expected) andalso is_binary(Got) -> {error, [{decode_error, Expected, Got, []}]}. -classify_dynamic(nil) -> <<"Nil">>; +classify_dynamic(nil) -> + <<"Nil">>; classify_dynamic(X) when is_boolean(X) -> <<"Bool">>; classify_dynamic(X) when is_atom(X) -> <<"Atom">>; classify_dynamic(X) when is_binary(X) -> <<"String">>; @@ -65,11 +116,14 @@ classify_dynamic(X) when is_tuple(X) -> iolist_to_binary(["Tuple of ", integer_to_list(tuple_size(X)), " elements"]); classify_dynamic(X) when is_function(X, 0) orelse is_function(X, 1) orelse is_function(X, 2) orelse - is_function(X, 3) orelse is_function(X, 4) orelse is_function(X, 5) orelse - is_function(X, 6) orelse is_function(X, 7) orelse is_function(X, 8) orelse - is_function(X, 9) orelse is_function(X, 10) orelse is_function(X, 11) orelse - is_function(X, 12) -> <<"Function">>; -classify_dynamic(_) -> <<"Some other type">>. + is_function(X, 3) orelse is_function(X, 4) orelse is_function(X, 5) orelse + is_function(X, 6) orelse is_function(X, 7) orelse is_function(X, 8) orelse + is_function(X, 9) orelse is_function(X, 10) orelse is_function(X, 11) orelse + is_function(X, 12) +-> + <<"Function">>; +classify_dynamic(_) -> + <<"Some other type">>. decode_map(Data) when is_map(Data) -> {ok, Data}; decode_map(Data) -> decode_error_msg(<<"Dict">>, Data). @@ -92,8 +146,7 @@ decode_list(Data) -> decode_error_msg(<<"List">>, Data). decode_field(Data, Key) when is_map(Data) -> case Data of #{Key := Value} -> {ok, {some, Value}}; - _ -> - {ok, none} + _ -> {ok, none} end; decode_field(Data, _) -> decode_error_msg(<<"Dict">>, Data). @@ -107,24 +160,24 @@ tuple_get(Data, Index) -> {ok, element(Index + 1, Data)}. decode_tuple(Data) when is_tuple(Data) -> {ok, Data}; decode_tuple(Data) -> decode_error_msg(<<"Tuple">>, Data). -decode_tuple2({_,_} = A) -> {ok, A}; -decode_tuple2([A,B]) -> {ok, {A,B}}; +decode_tuple2({_, _} = A) -> {ok, A}; +decode_tuple2([A, B]) -> {ok, {A, B}}; decode_tuple2(Data) -> decode_error_msg(<<"Tuple of 2 elements">>, Data). -decode_tuple3({_,_,_} = A) -> {ok, A}; -decode_tuple3([A,B,C]) -> {ok, {A,B,C}}; +decode_tuple3({_, _, _} = A) -> {ok, A}; +decode_tuple3([A, B, C]) -> {ok, {A, B, C}}; decode_tuple3(Data) -> decode_error_msg(<<"Tuple of 3 elements">>, Data). -decode_tuple4({_,_,_,_} = A) -> {ok, A}; -decode_tuple4([A,B,C,D]) -> {ok, {A,B,C,D}}; +decode_tuple4({_, _, _, _} = A) -> {ok, A}; +decode_tuple4([A, B, C, D]) -> {ok, {A, B, C, D}}; decode_tuple4(Data) -> decode_error_msg(<<"Tuple of 4 elements">>, Data). -decode_tuple5({_,_,_,_,_} = A) -> {ok, A}; -decode_tuple5([A,B,C,D,E]) -> {ok, {A,B,C,D,E}}; +decode_tuple5({_, _, _, _, _} = A) -> {ok, A}; +decode_tuple5([A, B, C, D, E]) -> {ok, {A, B, C, D, E}}; decode_tuple5(Data) -> decode_error_msg(<<"Tuple of 5 elements">>, Data). -decode_tuple6({_,_,_,_,_,_} = A) -> {ok, A}; -decode_tuple6([A,B,C,D,E,F]) -> {ok, {A,B,C,D,E,F}}; +decode_tuple6({_, _, _, _, _, _} = A) -> {ok, A}; +decode_tuple6([A, B, C, D, E, F]) -> {ok, {A, B, C, D, E, F}}; decode_tuple6(Data) -> decode_error_msg(<<"Tuple of 6 elements">>, Data). decode_option(Term, F) -> @@ -174,18 +227,48 @@ parse_float(String) -> less_than(Lhs, Rhs) -> Lhs < Rhs. -string_starts_with(_, <<>>) -> true; +string_starts_with(_, <<>>) -> + true; string_starts_with(String, Prefix) when byte_size(Prefix) > byte_size(String) -> false; string_starts_with(String, Prefix) -> PrefixSize = byte_size(Prefix), Prefix == binary_part(String, 0, PrefixSize). -string_ends_with(_, <<>>) -> true; +string_strip_prefix(String, <<>>) when is_binary(String) -> + {ok, String}; +string_strip_prefix(String, _) when is_binary(String), String == <<>> -> + {error, nil}; +string_strip_prefix(String, Prefix) when + is_binary(String), is_binary(Prefix), byte_size(Prefix) > byte_size(String) +-> + {error, nil}; +string_strip_prefix(String, Prefix) when is_binary(String), is_binary(Prefix) -> + PrefixSize = byte_size(Prefix), + case Prefix == binary_part(String, 0, PrefixSize) of + true -> {ok, binary_part(String, PrefixSize, byte_size(String) - PrefixSize)}; + false -> {error, nil} + end. + +string_ends_with(_, <<>>) -> + true; string_ends_with(String, Suffix) when byte_size(Suffix) > byte_size(String) -> false; string_ends_with(String, Suffix) -> SuffixSize = byte_size(Suffix), Suffix == binary_part(String, byte_size(String) - SuffixSize, SuffixSize). +string_strip_suffix(String, <<>>) when is_binary(String) -> {ok, String}; +string_strip_suffix(String, _) when is_binary(String), String == <<>> -> {error, nil}; +string_strip_suffix(String, Suffix) when + is_binary(String), is_binary(Suffix), byte_size(Suffix) > byte_size(String) +-> + {error, nil}; +string_strip_suffix(String, Suffix) when is_binary(String), is_binary(Suffix) -> + SuffixSize = byte_size(Suffix), + case Suffix == binary_part(String, byte_size(String) - SuffixSize, SuffixSize) of + true -> {ok, binary_part(String, 0, byte_size(String) - SuffixSize)}; + false -> {error, nil} + end. + string_pad(String, Length, Dir, PadString) -> Chars = string:pad(String, Length, Dir, binary_to_list(PadString)), case unicode:characters_to_binary(Chars) of @@ -195,13 +278,12 @@ string_pad(String, Length, Dir, PadString) -> string_pop_grapheme(String) -> case string:next_grapheme(String) of - [ Next | Rest ] when is_binary(Rest) -> + [Next | Rest] when is_binary(Rest) -> {ok, {unicode:characters_to_binary([Next]), Rest}}; - - [ Next | Rest ] -> + [Next | Rest] -> {ok, {unicode:characters_to_binary([Next]), unicode:characters_to_binary(Rest)}}; - - _ -> {error, nil} + _ -> + {error, nil} end. string_pop_codeunit(<>) -> {Cp, Rest}; @@ -209,7 +291,8 @@ string_pop_codeunit(Binary) -> {0, Binary}. bit_array_pad_to_bytes(Bin) -> case erlang:bit_size(Bin) rem 8 of - 0 -> Bin; + 0 -> + Bin; TrailingBits -> PaddingBits = 8 - TrailingBits, <> @@ -228,8 +311,10 @@ bit_array_base64_encode(_Bin, _Padding) -> -endif. bit_array_slice(Bin, Pos, Len) -> - try {ok, binary:part(Bin, Pos, Len)} - catch error:badarg -> {error, nil} + try + {ok, binary:part(Bin, Pos, Len)} + catch + error:badarg -> {error, nil} end. compile_regex(String, Options) -> @@ -243,8 +328,7 @@ compile_regex(String, Options) -> FilteredOptions = [Option || Option <- OptionsList, Option /= false], case re:compile(String, FilteredOptions) of {ok, MP} -> {ok, MP}; - {error, {Str, Pos}} -> - {error, {compile_error, unicode:characters_to_binary(Str), Pos}} + {error, {Str, Pos}} -> {error, {compile_error, unicode:characters_to_binary(Str), Pos}} end. regex_check(Regex, String) -> @@ -253,7 +337,8 @@ regex_check(Regex, String) -> regex_split(Regex, String) -> re:split(String, Regex). -regex_submatches(_, {-1, 0}) -> none; +regex_submatches(_, {-1, 0}) -> + none; regex_submatches(String, {Start, Length}) -> BinarySlice = binary:part(String, {Start, Length}), case string:is_empty(binary_to_list(BinarySlice)) of @@ -275,8 +360,10 @@ regex_replace(Regex, Subject, Replacement) -> re:replace(Subject, Regex, Replacement, [global, {return, binary}]). base_decode64(S) -> - try {ok, base64:decode(S)} - catch error:_ -> {error, nil} + try + {ok, base64:decode(S)} + catch + error:_ -> {error, nil} end. wrap_list(X) when is_list(X) -> X; @@ -284,37 +371,41 @@ wrap_list(X) -> [X]. parse_query(Query) -> case uri_string:dissect_query(Query) of - {error, _, _} -> {error, nil}; + {error, _, _} -> + {error, nil}; Pairs -> - Pairs1 = lists:map(fun - ({K, true}) -> {K, <<"">>}; - (Pair) -> Pair - end, Pairs), + Pairs1 = lists:map( + fun + ({K, true}) -> {K, <<"">>}; + (Pair) -> Pair + end, + Pairs + ), {ok, Pairs1} end. percent_encode(B) -> percent_encode(B, <<>>). percent_encode(<<>>, Acc) -> Acc; -percent_encode(<>, Acc) -> +percent_encode(<>, Acc) -> case percent_ok(H) of true -> - percent_encode(T, <>); + percent_encode(T, <>); false -> - <> = <>, - percent_encode(T, <>) + <> = <>, + percent_encode(T, <>) end. percent_decode(Cs) -> percent_decode(Cs, <<>>). percent_decode(<<$%, C0, C1, Cs/binary>>, Acc) -> case is_hex_digit(C0) andalso is_hex_digit(C1) of true -> - B = ?HEX2DEC(C0)*16+?HEX2DEC(C1), + B = ?HEX2DEC(C0) * 16 + ?HEX2DEC(C1), percent_decode(Cs, <>); false -> {error, nil} end; -percent_decode(<>, Acc) -> +percent_decode(<>, Acc) -> percent_decode(Cs, <>); percent_decode(<<>>, Acc) -> check_utf8(Acc). @@ -336,7 +427,7 @@ percent_ok(C) when $a =< C, C =< $z -> true; percent_ok(_) -> false. is_hex_digit(C) -> - ($0 =< C andalso C =< $9) orelse ($a =< C andalso C =< $f) orelse ($A =< C andalso C =< $F). + ($0 =< C andalso C =< $9) orelse ($a =< C andalso C =< $f) orelse ($A =< C andalso C =< $F). check_utf8(Cs) -> case unicode:characters_to_list(Cs) of @@ -347,27 +438,28 @@ check_utf8(Cs) -> uri_parse(String) -> case uri_string:parse(String) of - {error, _, _} -> {error, nil}; + {error, _, _} -> + {error, nil}; Uri -> - {ok, {uri, - maps_get_optional(Uri, scheme), - maps_get_optional(Uri, userinfo), - maps_get_optional(Uri, host), - maps_get_optional(Uri, port), - maps_get_or(Uri, path, <<>>), - maps_get_optional(Uri, query), - maps_get_optional(Uri, fragment) - }} + {ok, + {uri, maps_get_optional(Uri, scheme), maps_get_optional(Uri, userinfo), + maps_get_optional(Uri, host), maps_get_optional(Uri, port), + maps_get_or(Uri, path, <<>>), maps_get_optional(Uri, query), + maps_get_optional(Uri, fragment)}} end. maps_get_optional(Map, Key) -> - try {some, maps:get(Key, Map)} - catch _:_ -> none + try + {some, maps:get(Key, Map)} + catch + _:_ -> none end. maps_get_or(Map, Key, Default) -> - try maps:get(Key, Map) - catch _:_ -> Default + try + maps:get(Key, Map) + catch + _:_ -> Default end. print(String) -> @@ -395,7 +487,7 @@ inspect(nil) -> inspect(Data) when is_map(Data) -> Fields = [ [<<"#(">>, inspect(Key), <<", ">>, inspect(Value), <<")">>] - || {Key, Value} <- maps:to_list(Data) + || {Key, Value} <- maps:to_list(Data) ], ["dict.from_list([", lists:join(", ", Fields), "])"]; inspect(Atom) when is_atom(Atom) -> @@ -403,14 +495,15 @@ inspect(Atom) when is_atom(Atom) -> case inspect_maybe_gleam_atom(Binary, none, <<>>) of {ok, Inspected} -> Inspected; {error, _} -> ["atom.create_from_string(\"", Binary, "\")"] - end; + end; inspect(Any) when is_integer(Any) -> erlang:integer_to_list(Any); inspect(Any) when is_float(Any) -> io_lib_format:fwrite_g(Any); inspect(Binary) when is_binary(Binary) -> case inspect_maybe_utf8_string(Binary, <<>>) of - {ok, InspectedUtf8String} -> InspectedUtf8String; + {ok, InspectedUtf8String} -> + InspectedUtf8String; {error, not_a_utf8_string} -> Segments = [erlang:integer_to_list(X) || <> <= Binary], ["<<", lists:join(", ", Segments), ">>"] @@ -422,14 +515,17 @@ inspect(List) when is_list(List) -> {proper, Elements} -> ["[", Elements, "]"]; {improper, Elements} -> ["//erl([", Elements, "])"] end; -inspect(Any) when is_tuple(Any) % Record constructors - andalso is_atom(element(1, Any)) - andalso element(1, Any) =/= false - andalso element(1, Any) =/= true - andalso element(1, Any) =/= nil +% Record constructors +inspect(Any) when + is_tuple(Any) andalso + is_atom(element(1, Any)) andalso + element(1, Any) =/= false andalso + element(1, Any) =/= true andalso + element(1, Any) =/= nil -> [Atom | ArgsList] = erlang:tuple_to_list(Any), - Args = lists:join(<<", ">>, + Args = lists:join( + <<", ">>, lists:map(fun inspect/1, ArgsList) ), [inspect(Atom), "(", Args, ")"]; @@ -439,14 +535,14 @@ inspect(Tuple) when is_tuple(Tuple) -> inspect(Any) when is_function(Any) -> {arity, Arity} = erlang:fun_info(Any, arity), ArgsAsciiCodes = lists:seq($a, $a + Arity - 1), - Args = lists:join(<<", ">>, + Args = lists:join( + <<", ">>, lists:map(fun(Arg) -> <> end, ArgsAsciiCodes) ), ["//fn(", Args, ") { ... }"]; inspect(Any) -> ["//erl(", io_lib:format("~p", [Any]), ")"]. - inspect_maybe_gleam_atom(<<>>, none, _) -> {error, nil}; inspect_maybe_gleam_atom(<>, none, _) when ?is_digit_char(First) -> @@ -455,10 +551,11 @@ inspect_maybe_gleam_atom(<<"_", _Rest/binary>>, none, _) -> {error, nil}; inspect_maybe_gleam_atom(<<"_">>, _PrevChar, _Acc) -> {error, nil}; -inspect_maybe_gleam_atom(<<"_", _Rest/binary>>, $_, _Acc) -> +inspect_maybe_gleam_atom(<<"_", _Rest/binary>>, $_, _Acc) -> {error, nil}; -inspect_maybe_gleam_atom(<>, _PrevChar, _Acc) - when not (?is_lowercase_char(First) orelse ?is_underscore_char(First) orelse ?is_digit_char(First)) -> +inspect_maybe_gleam_atom(<>, _PrevChar, _Acc) when + not (?is_lowercase_char(First) orelse ?is_underscore_char(First) orelse ?is_digit_char(First)) +-> {error, nil}; inspect_maybe_gleam_atom(<>, none, Acc) -> inspect_maybe_gleam_atom(Rest, First, <>); @@ -510,24 +607,26 @@ append_segment(<<"<<">>, Segment) -> append_segment(Acc, Segment) -> <>. - inspect_maybe_utf8_string(Binary, Acc) -> case Binary of - <<>> -> {ok, <<$", Acc/binary, $">>}; + <<>> -> + {ok, <<$", Acc/binary, $">>}; <> -> - Escaped = case First of - $" -> <<$\\, $">>; - $\\ -> <<$\\, $\\>>; - $\r -> <<$\\, $r>>; - $\n -> <<$\\, $n>>; - $\t -> <<$\\, $t>>; - $\f -> <<$\\, $f>>; - X when X > 126, X < 160 -> convert_to_u(X); - X when X < 32 -> convert_to_u(X); - Other -> <> - end, + Escaped = + case First of + $" -> <<$\\, $">>; + $\\ -> <<$\\, $\\>>; + $\r -> <<$\\, $r>>; + $\n -> <<$\\, $n>>; + $\t -> <<$\\, $t>>; + $\f -> <<$\\, $f>>; + X when X > 126, X < 160 -> convert_to_u(X); + X when X < 32 -> convert_to_u(X); + Other -> <> + end, inspect_maybe_utf8_string(Rest, <>); - _ -> {error, not_a_utf8_string} + _ -> + {error, not_a_utf8_string} end. convert_to_u(Code) -> diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 4f804f78..76b84feb 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -1010,3 +1010,35 @@ export function bit_array_starts_with(bits, prefix) { return true; } + +export function string_strip_prefix(str, prefix) { + if (prefix == "") { + return new Ok(str) + } + + if (str == "" && prefix.length != 0) { + return new Error(Nil) + } + + if (str.startsWith(prefix)) { + return new Ok(str.substring(prefix.length)) + } + + return new Error(Nil) +} + +export function string_strip_suffix(str, suffix) { + if (suffix == "") { + return new Ok(str) + } + + if (str == "" && suffix.length != 0) { + return new Error(Nil) + } + + if (str.endsWith(suffix)) { + return new Ok(str.substring(0, str.length - suffix.length)) + } + + return new Error(Nil) +} \ No newline at end of file From dc7fd1fc36914f9b8da4cc94d6aef6f77804fde0 Mon Sep 17 00:00:00 2001 From: Lezenn Date: Thu, 19 Dec 2024 01:15:24 +0100 Subject: [PATCH 4/6] reworking from feedbacks --- src/gleam/string.gleam | 33 ++++++++++++--------------------- src/gleam_stdlib.erl | 32 ++++---------------------------- src/gleam_stdlib.mjs | 4 ++-- test/gleam/string_test.gleam | 1 + 4 files changed, 19 insertions(+), 51 deletions(-) diff --git a/src/gleam/string.gleam b/src/gleam/string.gleam index 15928123..33129b00 100644 --- a/src/gleam/string.gleam +++ b/src/gleam/string.gleam @@ -318,9 +318,12 @@ pub fn contains(does haystack: String, contain needle: String) -> Bool /// // -> False /// ``` /// -@external(erlang, "gleam_stdlib", "string_starts_with") -@external(javascript, "../gleam_stdlib.mjs", "starts_with") -pub fn starts_with(string: String, prefix: String) -> Bool +pub fn starts_with(string: String, prefix: String) -> Bool { + case strip_prefix(string, prefix) { + Ok(_) -> True + Error(_) -> False + } +} /// Checks whether the first `String` ends with the second one. /// @@ -331,9 +334,12 @@ pub fn starts_with(string: String, prefix: String) -> Bool /// // -> True /// ``` /// -@external(erlang, "gleam_stdlib", "string_ends_with") -@external(javascript, "../gleam_stdlib.mjs", "ends_with") -pub fn ends_with(string: String, suffix: String) -> Bool +pub fn ends_with(string: String, suffix: String) -> Bool { + case strip_suffix(string, suffix) { + Ok(_) -> True + Error(_) -> False + } +} /// Creates a list of `String`s by splitting a given string on a given substring. /// @@ -967,21 +973,6 @@ pub fn byte_size(string: String) -> Int /// strip_prefix("", "Lucy") /// // -> Error(Nil) /// ``` -// pub fn strip_prefix( -// string: String, -// prefix prefix: String, -// ) -> Result(String, Nil) { -// case prefix { -// "" -> Ok(string) -// prefix -> { -// let prefix_len = length(prefix) -// case starts_with(string, prefix) { -// False -> Error(Nil) -// True -> Ok(drop_start(string, prefix_len)) -// } -// } -// } -// } @external(erlang, "gleam_stdlib", "string_strip_prefix") @external(javascript, "../gleam_stdlib.mjs", "string_strip_prefix") pub fn strip_prefix( diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index 083649f3..dcb9a45b 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -15,9 +15,7 @@ less_than/2, string_pop_grapheme/1, string_pop_codeunit/1, - string_starts_with/2, wrap_list/1, - string_ends_with/2, string_pad/4, decode_map/1, uri_parse/1, @@ -227,21 +225,9 @@ parse_float(String) -> less_than(Lhs, Rhs) -> Lhs < Rhs. -string_starts_with(_, <<>>) -> - true; -string_starts_with(String, Prefix) when byte_size(Prefix) > byte_size(String) -> false; -string_starts_with(String, Prefix) -> - PrefixSize = byte_size(Prefix), - Prefix == binary_part(String, 0, PrefixSize). - -string_strip_prefix(String, <<>>) when is_binary(String) -> - {ok, String}; -string_strip_prefix(String, _) when is_binary(String), String == <<>> -> - {error, nil}; -string_strip_prefix(String, Prefix) when - is_binary(String), is_binary(Prefix), byte_size(Prefix) > byte_size(String) --> - {error, nil}; +string_strip_prefix(String, <<>>) when is_binary(String) -> {ok, String}; +string_strip_prefix(String, _) when is_binary(String), String == <<>> -> {error, nil}; +string_strip_prefix(String, Prefix) when is_binary(String), is_binary(Prefix), byte_size(Prefix) > byte_size(String) -> {error, nil}; string_strip_prefix(String, Prefix) when is_binary(String), is_binary(Prefix) -> PrefixSize = byte_size(Prefix), case Prefix == binary_part(String, 0, PrefixSize) of @@ -249,19 +235,9 @@ string_strip_prefix(String, Prefix) when is_binary(String), is_binary(Prefix) -> false -> {error, nil} end. -string_ends_with(_, <<>>) -> - true; -string_ends_with(String, Suffix) when byte_size(Suffix) > byte_size(String) -> false; -string_ends_with(String, Suffix) -> - SuffixSize = byte_size(Suffix), - Suffix == binary_part(String, byte_size(String) - SuffixSize, SuffixSize). - string_strip_suffix(String, <<>>) when is_binary(String) -> {ok, String}; string_strip_suffix(String, _) when is_binary(String), String == <<>> -> {error, nil}; -string_strip_suffix(String, Suffix) when - is_binary(String), is_binary(Suffix), byte_size(Suffix) > byte_size(String) --> - {error, nil}; +string_strip_suffix(String, Suffix) when is_binary(String), is_binary(Suffix), byte_size(Suffix) > byte_size(String) -> {error, nil}; string_strip_suffix(String, Suffix) when is_binary(String), is_binary(Suffix) -> SuffixSize = byte_size(Suffix), case Suffix == binary_part(String, byte_size(String) - SuffixSize, SuffixSize) of diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 76b84feb..8d459c0f 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -1033,12 +1033,12 @@ export function string_strip_suffix(str, suffix) { } if (str == "" && suffix.length != 0) { - return new Error(Nil) + return new Error(undefined) } if (str.endsWith(suffix)) { return new Ok(str.substring(0, str.length - suffix.length)) } - return new Error(Nil) + return new Error(undefined) } \ No newline at end of file diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam index 310d5386..bfa5f854 100644 --- a/test/gleam/string_test.gleam +++ b/test/gleam/string_test.gleam @@ -1398,6 +1398,7 @@ pub fn strip_prefix_test() { string.strip_prefix("https://gleam.run", "https://") let assert Ok("https://gleam.run") = string.strip_prefix("https://gleam.run", "") + // string.strip_prefix("๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", prefix: "๐Ÿ‘ฉ") |> should.equal(Ok("๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) let assert Ok("") = string.strip_prefix("", "") let assert Error(Nil) = string.strip_prefix("https://gleam.run", "Lucy") let assert Error(Nil) = string.strip_prefix("", "Lucy") From 88ef3c6f42e3d62545cbee46cb9c5be322143ca3 Mon Sep 17 00:00:00 2001 From: Lezenn Date: Fri, 20 Dec 2024 20:07:35 +0100 Subject: [PATCH 5/6] Removed formatting mess, string_starts_with and string_ends_with --- src/gleam_stdlib.erl | 277 ++++++++++++++++--------------------------- 1 file changed, 105 insertions(+), 172 deletions(-) diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index dcb9a45b..de3bbfdc 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -1,83 +1,35 @@ -module(gleam_stdlib). -export([ - map_get/2, - iodata_append/2, - identity/1, - decode_int/1, - decode_bool/1, - decode_float/1, - decode_list/1, - decode_option/2, - decode_field/2, - parse_int/1, - parse_float/1, - less_than/2, - string_pop_grapheme/1, - string_pop_codeunit/1, - wrap_list/1, - string_pad/4, - decode_map/1, - uri_parse/1, - decode_result/1, - bit_array_slice/3, - decode_bit_array/1, - compile_regex/2, - regex_scan/2, - percent_encode/1, - percent_decode/1, - regex_check/2, - regex_split/2, - base_decode64/1, - parse_query/1, - bit_array_concat/1, - bit_array_base64_encode/2, - size_of_tuple/1, - decode_tuple/1, - decode_tuple2/1, - decode_tuple3/1, - decode_tuple4/1, - decode_tuple5/1, - decode_tuple6/1, - tuple_get/2, - classify_dynamic/1, - print/1, - println/1, - print_error/1, - println_error/1, - inspect/1, - float_to_string/1, - int_from_base_string/2, - utf_codepoint_list_to_string/1, - contains_string/2, - crop_string/2, - base16_encode/1, - base16_decode/1, - string_replace/3, - regex_replace/3, - slice/3, - bit_array_to_int_and_size/1, - bit_array_pad_to_bytes/1, - string_strip_prefix/2, - string_strip_suffix/2 + map_get/2, iodata_append/2, identity/1, decode_int/1, decode_bool/1, + decode_float/1, decode_list/1, decode_option/2, decode_field/2, parse_int/1, + parse_float/1, less_than/2, string_pop_grapheme/1, string_pop_codeunit/1, + string_strip_prefix/2, wrap_list/1, string_strip_suffix/2, string_pad/4, + decode_map/1, uri_parse/1, + decode_result/1, bit_array_slice/3, decode_bit_array/1, compile_regex/2, + regex_scan/2, percent_encode/1, percent_decode/1, regex_check/2, + regex_split/2, base_decode64/1, parse_query/1, bit_array_concat/1, + bit_array_base64_encode/2, size_of_tuple/1, decode_tuple/1, decode_tuple2/1, + decode_tuple3/1, decode_tuple4/1, decode_tuple5/1, decode_tuple6/1, + tuple_get/2, classify_dynamic/1, print/1, println/1, print_error/1, + println_error/1, inspect/1, float_to_string/1, int_from_base_string/2, + utf_codepoint_list_to_string/1, contains_string/2, crop_string/2, + base16_encode/1, base16_decode/1, string_replace/3, regex_replace/3, + slice/3, bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1 ]). %% Taken from OTP's uri_string module -define(DEC2HEX(X), - if - ((X) >= 0) andalso ((X) =< 9) -> (X) + $0; + if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0; ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10 - end -). + end). %% Taken from OTP's uri_string module -define(HEX2DEC(X), - if - ((X) >= $0) andalso ((X) =< $9) -> (X) - $0; + if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0; ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10; ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10 - end -). + end). -define(is_lowercase_char(X), (X > 96 andalso X < 123)). -define(is_underscore_char(X), (X == 95)). @@ -100,8 +52,7 @@ decode_error_msg(Expected, Data) when is_binary(Expected) -> decode_error(Expected, Got) when is_binary(Expected) andalso is_binary(Got) -> {error, [{decode_error, Expected, Got, []}]}. -classify_dynamic(nil) -> - <<"Nil">>; +classify_dynamic(nil) -> <<"Nil">>; classify_dynamic(X) when is_boolean(X) -> <<"Bool">>; classify_dynamic(X) when is_atom(X) -> <<"Atom">>; classify_dynamic(X) when is_binary(X) -> <<"String">>; @@ -114,14 +65,11 @@ classify_dynamic(X) when is_tuple(X) -> iolist_to_binary(["Tuple of ", integer_to_list(tuple_size(X)), " elements"]); classify_dynamic(X) when is_function(X, 0) orelse is_function(X, 1) orelse is_function(X, 2) orelse - is_function(X, 3) orelse is_function(X, 4) orelse is_function(X, 5) orelse - is_function(X, 6) orelse is_function(X, 7) orelse is_function(X, 8) orelse - is_function(X, 9) orelse is_function(X, 10) orelse is_function(X, 11) orelse - is_function(X, 12) --> - <<"Function">>; -classify_dynamic(_) -> - <<"Some other type">>. + is_function(X, 3) orelse is_function(X, 4) orelse is_function(X, 5) orelse + is_function(X, 6) orelse is_function(X, 7) orelse is_function(X, 8) orelse + is_function(X, 9) orelse is_function(X, 10) orelse is_function(X, 11) orelse + is_function(X, 12) -> <<"Function">>; +classify_dynamic(_) -> <<"Some other type">>. decode_map(Data) when is_map(Data) -> {ok, Data}; decode_map(Data) -> decode_error_msg(<<"Dict">>, Data). @@ -144,7 +92,8 @@ decode_list(Data) -> decode_error_msg(<<"List">>, Data). decode_field(Data, Key) when is_map(Data) -> case Data of #{Key := Value} -> {ok, {some, Value}}; - _ -> {ok, none} + _ -> + {ok, none} end; decode_field(Data, _) -> decode_error_msg(<<"Dict">>, Data). @@ -158,24 +107,24 @@ tuple_get(Data, Index) -> {ok, element(Index + 1, Data)}. decode_tuple(Data) when is_tuple(Data) -> {ok, Data}; decode_tuple(Data) -> decode_error_msg(<<"Tuple">>, Data). -decode_tuple2({_, _} = A) -> {ok, A}; -decode_tuple2([A, B]) -> {ok, {A, B}}; +decode_tuple2({_,_} = A) -> {ok, A}; +decode_tuple2([A,B]) -> {ok, {A,B}}; decode_tuple2(Data) -> decode_error_msg(<<"Tuple of 2 elements">>, Data). -decode_tuple3({_, _, _} = A) -> {ok, A}; -decode_tuple3([A, B, C]) -> {ok, {A, B, C}}; +decode_tuple3({_,_,_} = A) -> {ok, A}; +decode_tuple3([A,B,C]) -> {ok, {A,B,C}}; decode_tuple3(Data) -> decode_error_msg(<<"Tuple of 3 elements">>, Data). -decode_tuple4({_, _, _, _} = A) -> {ok, A}; -decode_tuple4([A, B, C, D]) -> {ok, {A, B, C, D}}; +decode_tuple4({_,_,_,_} = A) -> {ok, A}; +decode_tuple4([A,B,C,D]) -> {ok, {A,B,C,D}}; decode_tuple4(Data) -> decode_error_msg(<<"Tuple of 4 elements">>, Data). -decode_tuple5({_, _, _, _, _} = A) -> {ok, A}; -decode_tuple5([A, B, C, D, E]) -> {ok, {A, B, C, D, E}}; +decode_tuple5({_,_,_,_,_} = A) -> {ok, A}; +decode_tuple5([A,B,C,D,E]) -> {ok, {A,B,C,D,E}}; decode_tuple5(Data) -> decode_error_msg(<<"Tuple of 5 elements">>, Data). -decode_tuple6({_, _, _, _, _, _} = A) -> {ok, A}; -decode_tuple6([A, B, C, D, E, F]) -> {ok, {A, B, C, D, E, F}}; +decode_tuple6({_,_,_,_,_,_} = A) -> {ok, A}; +decode_tuple6([A,B,C,D,E,F]) -> {ok, {A,B,C,D,E,F}}; decode_tuple6(Data) -> decode_error_msg(<<"Tuple of 6 elements">>, Data). decode_option(Term, F) -> @@ -254,12 +203,13 @@ string_pad(String, Length, Dir, PadString) -> string_pop_grapheme(String) -> case string:next_grapheme(String) of - [Next | Rest] when is_binary(Rest) -> + [ Next | Rest ] when is_binary(Rest) -> {ok, {unicode:characters_to_binary([Next]), Rest}}; - [Next | Rest] -> + + [ Next | Rest ] -> {ok, {unicode:characters_to_binary([Next]), unicode:characters_to_binary(Rest)}}; - _ -> - {error, nil} + + _ -> {error, nil} end. string_pop_codeunit(<>) -> {Cp, Rest}; @@ -267,8 +217,7 @@ string_pop_codeunit(Binary) -> {0, Binary}. bit_array_pad_to_bytes(Bin) -> case erlang:bit_size(Bin) rem 8 of - 0 -> - Bin; + 0 -> Bin; TrailingBits -> PaddingBits = 8 - TrailingBits, <> @@ -287,10 +236,8 @@ bit_array_base64_encode(_Bin, _Padding) -> -endif. bit_array_slice(Bin, Pos, Len) -> - try - {ok, binary:part(Bin, Pos, Len)} - catch - error:badarg -> {error, nil} + try {ok, binary:part(Bin, Pos, Len)} + catch error:badarg -> {error, nil} end. compile_regex(String, Options) -> @@ -304,7 +251,8 @@ compile_regex(String, Options) -> FilteredOptions = [Option || Option <- OptionsList, Option /= false], case re:compile(String, FilteredOptions) of {ok, MP} -> {ok, MP}; - {error, {Str, Pos}} -> {error, {compile_error, unicode:characters_to_binary(Str), Pos}} + {error, {Str, Pos}} -> + {error, {compile_error, unicode:characters_to_binary(Str), Pos}} end. regex_check(Regex, String) -> @@ -313,8 +261,7 @@ regex_check(Regex, String) -> regex_split(Regex, String) -> re:split(String, Regex). -regex_submatches(_, {-1, 0}) -> - none; +regex_submatches(_, {-1, 0}) -> none; regex_submatches(String, {Start, Length}) -> BinarySlice = binary:part(String, {Start, Length}), case string:is_empty(binary_to_list(BinarySlice)) of @@ -336,10 +283,8 @@ regex_replace(Regex, Subject, Replacement) -> re:replace(Subject, Regex, Replacement, [global, {return, binary}]). base_decode64(S) -> - try - {ok, base64:decode(S)} - catch - error:_ -> {error, nil} + try {ok, base64:decode(S)} + catch error:_ -> {error, nil} end. wrap_list(X) when is_list(X) -> X; @@ -347,41 +292,37 @@ wrap_list(X) -> [X]. parse_query(Query) -> case uri_string:dissect_query(Query) of - {error, _, _} -> - {error, nil}; + {error, _, _} -> {error, nil}; Pairs -> - Pairs1 = lists:map( - fun - ({K, true}) -> {K, <<"">>}; - (Pair) -> Pair - end, - Pairs - ), + Pairs1 = lists:map(fun + ({K, true}) -> {K, <<"">>}; + (Pair) -> Pair + end, Pairs), {ok, Pairs1} end. percent_encode(B) -> percent_encode(B, <<>>). percent_encode(<<>>, Acc) -> Acc; -percent_encode(<>, Acc) -> +percent_encode(<>, Acc) -> case percent_ok(H) of true -> - percent_encode(T, <>); + percent_encode(T, <>); false -> - <> = <>, - percent_encode(T, <>) + <> = <>, + percent_encode(T, <>) end. percent_decode(Cs) -> percent_decode(Cs, <<>>). percent_decode(<<$%, C0, C1, Cs/binary>>, Acc) -> case is_hex_digit(C0) andalso is_hex_digit(C1) of true -> - B = ?HEX2DEC(C0) * 16 + ?HEX2DEC(C1), + B = ?HEX2DEC(C0)*16+?HEX2DEC(C1), percent_decode(Cs, <>); false -> {error, nil} end; -percent_decode(<>, Acc) -> +percent_decode(<>, Acc) -> percent_decode(Cs, <>); percent_decode(<<>>, Acc) -> check_utf8(Acc). @@ -403,7 +344,7 @@ percent_ok(C) when $a =< C, C =< $z -> true; percent_ok(_) -> false. is_hex_digit(C) -> - ($0 =< C andalso C =< $9) orelse ($a =< C andalso C =< $f) orelse ($A =< C andalso C =< $F). + ($0 =< C andalso C =< $9) orelse ($a =< C andalso C =< $f) orelse ($A =< C andalso C =< $F). check_utf8(Cs) -> case unicode:characters_to_list(Cs) of @@ -414,28 +355,27 @@ check_utf8(Cs) -> uri_parse(String) -> case uri_string:parse(String) of - {error, _, _} -> - {error, nil}; + {error, _, _} -> {error, nil}; Uri -> - {ok, - {uri, maps_get_optional(Uri, scheme), maps_get_optional(Uri, userinfo), - maps_get_optional(Uri, host), maps_get_optional(Uri, port), - maps_get_or(Uri, path, <<>>), maps_get_optional(Uri, query), - maps_get_optional(Uri, fragment)}} + {ok, {uri, + maps_get_optional(Uri, scheme), + maps_get_optional(Uri, userinfo), + maps_get_optional(Uri, host), + maps_get_optional(Uri, port), + maps_get_or(Uri, path, <<>>), + maps_get_optional(Uri, query), + maps_get_optional(Uri, fragment) + }} end. maps_get_optional(Map, Key) -> - try - {some, maps:get(Key, Map)} - catch - _:_ -> none + try {some, maps:get(Key, Map)} + catch _:_ -> none end. maps_get_or(Map, Key, Default) -> - try - maps:get(Key, Map) - catch - _:_ -> Default + try maps:get(Key, Map) + catch _:_ -> Default end. print(String) -> @@ -463,7 +403,7 @@ inspect(nil) -> inspect(Data) when is_map(Data) -> Fields = [ [<<"#(">>, inspect(Key), <<", ">>, inspect(Value), <<")">>] - || {Key, Value} <- maps:to_list(Data) + || {Key, Value} <- maps:to_list(Data) ], ["dict.from_list([", lists:join(", ", Fields), "])"]; inspect(Atom) when is_atom(Atom) -> @@ -471,15 +411,14 @@ inspect(Atom) when is_atom(Atom) -> case inspect_maybe_gleam_atom(Binary, none, <<>>) of {ok, Inspected} -> Inspected; {error, _} -> ["atom.create_from_string(\"", Binary, "\")"] - end; + end; inspect(Any) when is_integer(Any) -> erlang:integer_to_list(Any); inspect(Any) when is_float(Any) -> io_lib_format:fwrite_g(Any); inspect(Binary) when is_binary(Binary) -> case inspect_maybe_utf8_string(Binary, <<>>) of - {ok, InspectedUtf8String} -> - InspectedUtf8String; + {ok, InspectedUtf8String} -> InspectedUtf8String; {error, not_a_utf8_string} -> Segments = [erlang:integer_to_list(X) || <> <= Binary], ["<<", lists:join(", ", Segments), ">>"] @@ -491,17 +430,14 @@ inspect(List) when is_list(List) -> {proper, Elements} -> ["[", Elements, "]"]; {improper, Elements} -> ["//erl([", Elements, "])"] end; -% Record constructors -inspect(Any) when - is_tuple(Any) andalso - is_atom(element(1, Any)) andalso - element(1, Any) =/= false andalso - element(1, Any) =/= true andalso - element(1, Any) =/= nil +inspect(Any) when is_tuple(Any) % Record constructors + andalso is_atom(element(1, Any)) + andalso element(1, Any) =/= false + andalso element(1, Any) =/= true + andalso element(1, Any) =/= nil -> [Atom | ArgsList] = erlang:tuple_to_list(Any), - Args = lists:join( - <<", ">>, + Args = lists:join(<<", ">>, lists:map(fun inspect/1, ArgsList) ), [inspect(Atom), "(", Args, ")"]; @@ -511,14 +447,14 @@ inspect(Tuple) when is_tuple(Tuple) -> inspect(Any) when is_function(Any) -> {arity, Arity} = erlang:fun_info(Any, arity), ArgsAsciiCodes = lists:seq($a, $a + Arity - 1), - Args = lists:join( - <<", ">>, + Args = lists:join(<<", ">>, lists:map(fun(Arg) -> <> end, ArgsAsciiCodes) ), ["//fn(", Args, ") { ... }"]; inspect(Any) -> ["//erl(", io_lib:format("~p", [Any]), ")"]. + inspect_maybe_gleam_atom(<<>>, none, _) -> {error, nil}; inspect_maybe_gleam_atom(<>, none, _) when ?is_digit_char(First) -> @@ -527,11 +463,10 @@ inspect_maybe_gleam_atom(<<"_", _Rest/binary>>, none, _) -> {error, nil}; inspect_maybe_gleam_atom(<<"_">>, _PrevChar, _Acc) -> {error, nil}; -inspect_maybe_gleam_atom(<<"_", _Rest/binary>>, $_, _Acc) -> +inspect_maybe_gleam_atom(<<"_", _Rest/binary>>, $_, _Acc) -> {error, nil}; -inspect_maybe_gleam_atom(<>, _PrevChar, _Acc) when - not (?is_lowercase_char(First) orelse ?is_underscore_char(First) orelse ?is_digit_char(First)) --> +inspect_maybe_gleam_atom(<>, _PrevChar, _Acc) + when not (?is_lowercase_char(First) orelse ?is_underscore_char(First) orelse ?is_digit_char(First)) -> {error, nil}; inspect_maybe_gleam_atom(<>, none, Acc) -> inspect_maybe_gleam_atom(Rest, First, <>); @@ -583,26 +518,24 @@ append_segment(<<"<<">>, Segment) -> append_segment(Acc, Segment) -> <>. + inspect_maybe_utf8_string(Binary, Acc) -> case Binary of - <<>> -> - {ok, <<$", Acc/binary, $">>}; + <<>> -> {ok, <<$", Acc/binary, $">>}; <> -> - Escaped = - case First of - $" -> <<$\\, $">>; - $\\ -> <<$\\, $\\>>; - $\r -> <<$\\, $r>>; - $\n -> <<$\\, $n>>; - $\t -> <<$\\, $t>>; - $\f -> <<$\\, $f>>; - X when X > 126, X < 160 -> convert_to_u(X); - X when X < 32 -> convert_to_u(X); - Other -> <> - end, + Escaped = case First of + $" -> <<$\\, $">>; + $\\ -> <<$\\, $\\>>; + $\r -> <<$\\, $r>>; + $\n -> <<$\\, $n>>; + $\t -> <<$\\, $t>>; + $\f -> <<$\\, $f>>; + X when X > 126, X < 160 -> convert_to_u(X); + X when X < 32 -> convert_to_u(X); + Other -> <> + end, inspect_maybe_utf8_string(Rest, <>); - _ -> - {error, not_a_utf8_string} + _ -> {error, not_a_utf8_string} end. convert_to_u(Code) -> @@ -644,4 +577,4 @@ slice(String, Index, Length) -> case string:slice(String, Index, Length) of X when is_binary(X) -> X; X when is_list(X) -> unicode:characters_to_binary(X) - end. + end. \ No newline at end of file From aebf1d6d6a2b31718eddf7e7676bcb5cf0ce2796 Mon Sep 17 00:00:00 2001 From: Lezenn Date: Fri, 20 Dec 2024 21:23:07 +0100 Subject: [PATCH 6/6] Unit test refactor. The strip_prefix shouldn't get rid of the zero width joiners. --- src/gleam/string.gleam | 6 +++ src/gleam_stdlib.mjs | 4 +- test/gleam/string_test.gleam | 79 ++++++++++++++++++++++++++++++------ 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/src/gleam/string.gleam b/src/gleam/string.gleam index 33129b00..9d2ecfb6 100644 --- a/src/gleam/string.gleam +++ b/src/gleam/string.gleam @@ -955,6 +955,9 @@ pub fn byte_size(string: String) -> Int /// If an empty prefix is given, the result is always `Ok` containing the whole string. /// If an empty string is given with a non empty prefix, then the result is always `Error(Nil)` /// +/// The function does **not** removes zero width joiners (`\u200D`) codepoints when stripping an emoji. +/// A leading one may remain. +/// /// ## Examples /// /// ```gleam @@ -986,6 +989,9 @@ pub fn strip_prefix( /// If an empty suffix is given, the result is always `Ok` containing the whole string. /// If an empty string is given with a non empty suffix, then the result is always `Error(Nil)` /// +/// The function does **not** removes zero width joiners (`\u200D`) codepoints when stripping an emoji. +/// A trailing one may remain. +/// /// ## Examples /// /// ```gleam diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 8d459c0f..ccf2dfa8 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -1017,14 +1017,14 @@ export function string_strip_prefix(str, prefix) { } if (str == "" && prefix.length != 0) { - return new Error(Nil) + return new Error(undefined) } if (str.startsWith(prefix)) { return new Ok(str.substring(prefix.length)) } - return new Error(Nil) + return new Error(undefined) } export function string_strip_suffix(str, suffix) { diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam index bfa5f854..4a6e6c7d 100644 --- a/test/gleam/string_test.gleam +++ b/test/gleam/string_test.gleam @@ -1394,20 +1394,73 @@ pub fn inspect_map_test() { } pub fn strip_prefix_test() { - let assert Ok("gleam.run") = - string.strip_prefix("https://gleam.run", "https://") - let assert Ok("https://gleam.run") = - string.strip_prefix("https://gleam.run", "") - // string.strip_prefix("๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", prefix: "๐Ÿ‘ฉ") |> should.equal(Ok("๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) - let assert Ok("") = string.strip_prefix("", "") - let assert Error(Nil) = string.strip_prefix("https://gleam.run", "Lucy") - let assert Error(Nil) = string.strip_prefix("", "Lucy") + string.strip_prefix("https://gleam.run", "https://") + |> should.equal(Ok("gleam.run")) + + string.strip_prefix("https://gleam.run", "") + |> should.equal(Ok("https://gleam.run")) + + let assert Ok(top_right) = string.utf_codepoint(0x1F469) + let assert Ok(bot_left) = string.utf_codepoint(0x1F467) + let assert Ok(bot_right) = string.utf_codepoint(0x1F466) + let assert Ok(separator) = string.utf_codepoint(0x200D) + + string.strip_prefix("๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", prefix: "๐Ÿ‘ฉ") + |> should.equal( + Ok( + string.from_utf_codepoints([ + separator, + top_right, + separator, + bot_left, + separator, + bot_right, + ]), + ), + ) + + string.strip_prefix("", "") + |> should.equal(Ok("")) + + string.strip_prefix("https://gleam.run", "Lucy") + |> should.equal(Error(Nil)) + + string.strip_prefix("", "Lucy") + |> should.equal(Error(Nil)) } pub fn strip_suffix_test() { - let assert Ok("lucy") = string.strip_suffix("lucy@gleam.run", "@gleam.run") - let assert Ok("lucy@gleam.run") = string.strip_suffix("lucy@gleam.run", "") - let assert Ok("") = string.strip_suffix("", "") - let assert Error(Nil) = string.strip_suffix("lucy@gleam.run", "Lucy") - let assert Error(Nil) = string.strip_suffix("", "Lucy") + string.strip_suffix("lucy@gleam.run", "@gleam.run") + |> should.equal(Ok("lucy")) + + string.strip_suffix("lucy@gleam.run", "") + |> should.equal(Ok("lucy@gleam.run")) + + string.strip_suffix("", "") + |> should.equal(Ok("")) + + let assert Ok(top_left) = string.utf_codepoint(0x1F468) + let assert Ok(top_right) = string.utf_codepoint(0x1F469) + let assert Ok(bot_left) = string.utf_codepoint(0x1F467) + let assert Ok(separator) = string.utf_codepoint(0x200D) + + string.strip_suffix("๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", suffix: "๐Ÿ‘ฆ") + |> should.equal( + Ok( + string.from_utf_codepoints([ + top_left, + separator, + top_right, + separator, + bot_left, + separator, + ]), + ), + ) + + string.strip_suffix("lucy@gleam.run", "Lucy") + |> should.equal(Error(Nil)) + + string.strip_suffix("", "Lucy") + |> should.equal(Error(Nil)) }