diff --git a/lib/stdlib/Cargo.toml b/lib/stdlib/Cargo.toml index b68c4bfda..c61a8ba1f 100644 --- a/lib/stdlib/Cargo.toml +++ b/lib/stdlib/Cargo.toml @@ -44,6 +44,7 @@ sha-1 = { version = "0.10", optional = true } sha-2 = { package = "sha2", version = "0.10", optional = true } sha-3 = { package = "sha3", version = "0.10", optional = true } strip-ansi-escapes = { version = "0.1", optional = true } +substring = { version = "1.4.5", optional = true } syslog_loose = { version = "0.18", optional = true } tracing = { version = "0.1", optional = true } url = { version = "2", optional = true } @@ -169,6 +170,7 @@ default = [ "mezmo_parse_int", "mezmo_relational_comparison", "mezmo_to_string", + "mezmo_string_operations", "mod", "now", "object", @@ -335,6 +337,7 @@ mezmo_is_truthy = [] mezmo_parse_int = ["parse_int"] mezmo_parse_float = ["to_float"] mezmo_relational_comparison = [] +mezmo_string_operations = ["substring"] mezmo_to_string = ["to_string"] mod = [] now = ["dep:chrono"] diff --git a/lib/stdlib/src/lib.rs b/lib/stdlib/src/lib.rs index e01c9ca53..aa4212a0b 100644 --- a/lib/stdlib/src/lib.rs +++ b/lib/stdlib/src/lib.rs @@ -213,8 +213,31 @@ mod mezmo_parse_float; mod mezmo_parse_int; #[cfg(feature = "mezmo_relational_comparison")] mod mezmo_relational_comparison; +#[cfg(feature = "mezmo_string_operations")] +mod mezmo_char_at; +#[cfg(feature = "mezmo_string_operations")] +mod mezmo_index_of; +#[cfg(feature = "mezmo_string_operations")] +mod mezmo_last_index_of; +#[cfg(feature = "mezmo_string_operations")] +mod mezmo_pad_end; +#[cfg(feature = "mezmo_string_operations")] +mod mezmo_pad_start; +#[cfg(feature = "mezmo_string_operations")] +mod mezmo_repeat; +#[cfg(feature = "mezmo_string_operations")] +mod mezmo_string_at; +#[cfg(feature = "mezmo_string_operations")] +mod mezmo_string_slice; +#[cfg(feature = "mezmo_string_operations")] +mod mezmo_substring; +#[cfg(feature = "mezmo_string_operations")] +mod mezmo_trim_end; +#[cfg(feature = "mezmo_string_operations")] +mod mezmo_trim_start; #[cfg(feature = "mezmo_to_string")] mod mezmo_to_string; + #[cfg(feature = "mod")] mod mod_func; #[cfg(feature = "now")] @@ -540,6 +563,28 @@ pub use mezmo_parse_float::MezmoParseFloat; pub use mezmo_parse_int::MezmoParseInt; #[cfg(feature = "mezmo_relational_comparison")] pub use mezmo_relational_comparison::{MezmoGt, MezmoGte, MezmoLt, MezmoLte}; +#[cfg(feature = "mezmo_string_operations")] +pub use mezmo_char_at::MezmoCharAt; +#[cfg(feature = "mezmo_string_operations")] +pub use mezmo_index_of::MezmoIndexOf; +#[cfg(feature = "mezmo_string_operations")] +pub use mezmo_last_index_of::MezmoLastIndexOf; +#[cfg(feature = "mezmo_string_operations")] +pub use mezmo_pad_end::MezmoPadEnd; +#[cfg(feature = "mezmo_string_operations")] +pub use mezmo_pad_start::MezmoPadStart; +#[cfg(feature = "mezmo_string_operations")] +pub use mezmo_repeat::MezmoRepeat; +#[cfg(feature = "mezmo_string_operations")] +pub use mezmo_string_at::MezmoStringAt; +#[cfg(feature = "mezmo_string_operations")] +pub use mezmo_string_slice::MezmoStringSlice; +#[cfg(feature = "mezmo_string_operations")] +pub use mezmo_substring::MezmoSubstring; +#[cfg(feature = "mezmo_string_operations")] +pub use mezmo_trim_end::MezmoTrimEnd; +#[cfg(feature = "mezmo_string_operations")] +pub use mezmo_trim_start::MezmoTrimStart; #[cfg(feature = "mezmo_to_string")] pub use mezmo_to_string::MezmoToString; #[cfg(feature = "mod")] @@ -891,6 +936,28 @@ pub fn all() -> Vec> { Box::new(MezmoLt), #[cfg(feature = "mezmo_relational_comparison")] Box::new(MezmoLte), + #[cfg(feature = "mezmo_string_operations")] + Box::new(MezmoCharAt), + #[cfg(feature = "mezmo_string_operations")] + Box::new(MezmoIndexOf), + #[cfg(feature = "mezmo_string_operations")] + Box::new(MezmoLastIndexOf), + #[cfg(feature = "mezmo_string_operations")] + Box::new(MezmoPadEnd), + #[cfg(feature = "mezmo_string_operations")] + Box::new(MezmoPadStart), + #[cfg(feature = "mezmo_string_operations")] + Box::new(MezmoRepeat), + #[cfg(feature = "mezmo_string_operations")] + Box::new(MezmoStringAt), + #[cfg(feature = "mezmo_string_operations")] + Box::new(MezmoStringSlice), + #[cfg(feature = "mezmo_string_operations")] + Box::new(MezmoSubstring), + #[cfg(feature = "mezmo_string_operations")] + Box::new(MezmoTrimEnd), + #[cfg(feature = "mezmo_string_operations")] + Box::new(MezmoTrimStart), #[cfg(feature = "mezmo_to_string")] Box::new(MezmoToString), #[cfg(feature = "mod")] diff --git a/lib/stdlib/src/mezmo_char_at.rs b/lib/stdlib/src/mezmo_char_at.rs new file mode 100644 index 000000000..2c78f2aaa --- /dev/null +++ b/lib/stdlib/src/mezmo_char_at.rs @@ -0,0 +1,130 @@ +use std::borrow::Cow; + +use ::value::Value; +use compiler::{value::VrlValueConvert, Expression}; +use vrl::prelude::*; +use vrl_core::Resolved; + +fn char_at(s: Cow<'_, str>, index: i64) -> Value { + if index >= 0 { + s.chars() + .nth(index as usize) + .map(|c| c.to_string()) + .unwrap_or(String::new()) + .into() + } else { + String::new().into() + } +} + +/// Returns the char at the given index as a string. Negative and out of range +/// indexes return an empty string. +/// +/// Behaves like the JavaScript's String.prototype.charAt() method. +#[derive(Clone, Copy, Debug)] +pub struct MezmoCharAt; + +impl Function for MezmoCharAt { + fn identifier(&self) -> &'static str { + "mezmo_char_at" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "index", + kind: kind::INTEGER, + required: true, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[Example { + title: "basic", + source: "mezmo_char_at(\"abc\", 0)", + result: Ok("a"), + }] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let index = arguments.required("index"); + + Ok(MezmoCharAtFn { value, index }.as_expr()) + } +} + +#[derive(Debug, Clone)] +struct MezmoCharAtFn { + value: Box, + index: Box, +} + +impl FunctionExpression for MezmoCharAtFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + let index = self.index.resolve(ctx)?; + + Ok(char_at(value.try_bytes_utf8_lossy()?, index.try_integer()?)) + } + + fn type_def(&self, _state: &state::TypeState) -> TypeDef { + TypeDef::bytes().infallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + mezmo_char_at => MezmoCharAt; + + basic { + args: func_args![value: "abc", index: 0], + want: Ok("a"), + tdef: TypeDef::bytes().infallible(), + } + + negative_index { + args: func_args![value: "abc", index: -3], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + invalid_index { + args: func_args![value: "abc", index: 4], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + empty { + args: func_args![value: "", index: 0], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + empty_non_zero { + args: func_args![value: "", index: 1], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + empty_negative { + args: func_args![value: "", index: -1], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + ]; +} diff --git a/lib/stdlib/src/mezmo_index_of.rs b/lib/stdlib/src/mezmo_index_of.rs new file mode 100644 index 000000000..248b16349 --- /dev/null +++ b/lib/stdlib/src/mezmo_index_of.rs @@ -0,0 +1,216 @@ +use std::borrow::Cow; + +use ::value::Value; +use compiler::{value::VrlValueConvert, Expression}; +use vrl::prelude::*; +use vrl_core::Resolved; + +fn index_of(value: Cow<'_, str>, search_value: Cow<'_, str>, position: i64) -> Value { + let position = std::cmp::max(position, 0); // Negative values clamped to 0 + + if search_value.is_empty() { + return Value::from(std::cmp::min(position, value.chars().count() as i64)); + } + + // Convert character position to the byte position in the string + let byte_position = value + .char_indices() + .nth(position as usize) + .map(|(pos, _)| pos) + .unwrap_or(value.len()); + + if byte_position == value.len() { + // Nothing to search + return Value::from(-1); + } + + match value.as_ref()[byte_position..].find(search_value.as_ref()) { + Some(found_byte_index) => { + let found_byte_index = found_byte_index + byte_position; // Absolute index within string + // Convert the byte index in the string to the character index in the string + let found = value + .char_indices() + .enumerate() + .find(|(_, (byte_index, _))| found_byte_index == *byte_index); + match found { + Some(found) => Value::from(found.0 as i64), + None => Value::from(-1), // This means search value is not valid utf8? + } + } + None => Value::from(-1), + } +} + +/// Searches a given string for a search string and returns the index of the +/// first occurance of the search string. Returns -1 if the search string is not +/// found. +/// +/// If an optional position value is given then only occurences of the search +/// string greater than or equal to that position are considered. Position can +/// be negative and counts from the back of the string to be searched. +/// +/// Behaves like the JavaScript's String.prototype.indexOf() method. +#[derive(Clone, Copy, Debug)] +pub struct MezmoIndexOf; + +impl Function for MezmoIndexOf { + fn identifier(&self) -> &'static str { + "mezmo_index_of" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "search_value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "position", + kind: kind::INTEGER, + required: false, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[ + Example { + title: "basic", + source: "mezmo_index_of(\"abc\", \"bc\")", + result: Ok("1"), + }, + Example { + title: "position", + source: "mezmo_index_of(\"abcdefabcdef\", \"abc\", 6)", + result: Ok("6"), + }, + ] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let search_value = arguments.required("search_value"); + let position = arguments.optional("position"); + + Ok(MezmoIndexOfFn { + value, + search_value, + position, + } + .as_expr()) + } +} + +#[derive(Debug, Clone)] +struct MezmoIndexOfFn { + value: Box, + search_value: Box, + position: Option>, +} + +impl FunctionExpression for MezmoIndexOfFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + let search_value = self.search_value.resolve(ctx)?; + let position = match &self.position { + Some(v) => v.resolve(ctx)?.try_integer()?, + None => 0, + }; + + Ok(index_of( + value.try_bytes_utf8_lossy()?, + search_value.try_bytes_utf8_lossy()?, + position, + )) + } + + fn type_def(&self, _state: &state::TypeState) -> TypeDef { + TypeDef::bytes().infallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + mezmo_index_of => MezmoIndexOf; + + basic { + args: func_args![value: "abc", search_value: "bc"], + want: Ok(1), + tdef: TypeDef::bytes().infallible(), + } + + utf8 { + args: func_args![value: "नमस्ते", search_value: "स्ते"], + want: Ok(2), + tdef: TypeDef::bytes().infallible(), + } + + not_found { + args: func_args![value: "abc", search_value: "def"], + want: Ok(-1), + tdef: TypeDef::bytes().infallible(), + } + + position { + args: func_args![value: "abcdefabcdef", search_value: "abc", position: 6], + want: Ok(6), + tdef: TypeDef::bytes().infallible(), + } + + position_greater_than_length { + args: func_args![value: "abc", search_value: "bc", position: 100], + want: Ok(-1), + tdef: TypeDef::bytes().infallible(), + } + + negative_position { + args: func_args![value: "abcdefabcdef", search_value: "abc", position: -6], + want: Ok(0), + tdef: TypeDef::bytes().infallible(), + } + + empty { + args: func_args![value: "", search_value: ""], + want: Ok(0), + tdef: TypeDef::bytes().infallible(), + } + + search_non_empty_with_empty { + args: func_args![value: "abc", search_value: ""], + want: Ok(0), + tdef: TypeDef::bytes().infallible(), + } + + empty_with_position { + args: func_args![value: "abc", search_value: "", position: 3], + want: Ok(3), + tdef: TypeDef::bytes().infallible(), + } + + empty_with_negative_position { + args: func_args![value: "abc", search_value: "", position: -1], + want: Ok(0), + tdef: TypeDef::bytes().infallible(), + } + + empty_with_position_exceed_length { + args: func_args![value: "abc", search_value: "", position: 6], + want: Ok(3), + tdef: TypeDef::bytes().infallible(), + } + ]; +} diff --git a/lib/stdlib/src/mezmo_last_index_of.rs b/lib/stdlib/src/mezmo_last_index_of.rs new file mode 100644 index 000000000..e86a651d9 --- /dev/null +++ b/lib/stdlib/src/mezmo_last_index_of.rs @@ -0,0 +1,236 @@ +use std::borrow::Cow; + +use ::value::Value; +use compiler::{value::VrlValueConvert, Expression}; +use vrl::prelude::*; +use vrl_core::Resolved; + +fn last_index_of(value: Cow<'_, str>, search_value: Cow<'_, str>, position: Option) -> Value { + if search_value.is_empty() { + return match position { + Some(position) => Value::from(std::cmp::min( + std::cmp::max(position, 0), // Negative values clamped to 0 + value.chars().count() as i64, + )), + None => Value::from(value.chars().count() as i64), + }; + } + + let byte_position = match position { + Some(position) => { + let position = std::cmp::max(position, 0); // Negative values clamped to 0 + + // Convert character position to the byte position in the string + let byte_position = value + .char_indices() + .nth(position as usize) + .map(|(pos, _)| pos) + .unwrap_or(value.len()); + + // Starting from the search position include search value bytes + std::cmp::min(byte_position + search_value.len(), value.len()) + } + None => value.len(), // If position is not provided we search the whole string + }; + + match value.as_ref()[..byte_position].rfind(search_value.as_ref()) { + Some(found_byte_index) => { + // Convert the byte index in the string to the character index in the string + let found = value + .char_indices() + .enumerate() + .find(|(_, (byte_index, _))| found_byte_index == *byte_index); + match found { + Some(found) => Value::from(found.0 as i64), + None => Value::from(-1), // This means search value is not valid utf8? + } + } + None => Value::from(-1), + } +} + +/// Searches a given string for a search string and returns the index of the +/// last occurance of the search string. Returns -1 if the search string is not +/// found. +/// +/// If an optional position value is given then only occurences of the search +/// string greater than or equal to that position are considered. Position can +/// be negative and counts from the back of the string to be searched. +/// +/// Behaves like the JavaScript's String.prototype.lastIndexOf() method. +#[derive(Clone, Copy, Debug)] +pub struct MezmoLastIndexOf; + +impl Function for MezmoLastIndexOf { + fn identifier(&self) -> &'static str { + "mezmo_last_index_of" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "search_value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "position", + kind: kind::INTEGER, + required: false, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[ + Example { + title: "basic", + source: "mezmo_last_index_of(\"abcabc\", \"bc\")", + result: Ok("4"), + }, + Example { + title: "position", + source: "mezmo_last_index_of(\"abcabc\", \"bc\", 3)", + result: Ok("1"), + }, + ] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let search_value = arguments.required("search_value"); + let position = arguments.optional("position"); + + Ok(MezmoLastIndexOfFn { + value, + search_value, + position, + } + .as_expr()) + } +} + +#[derive(Debug, Clone)] +struct MezmoLastIndexOfFn { + value: Box, + search_value: Box, + position: Option>, +} + +impl FunctionExpression for MezmoLastIndexOfFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + let search_value = self.search_value.resolve(ctx)?; + let position = match &self.position { + Some(v) => Some(v.resolve(ctx)?.try_integer()?), + None => None, + }; + + Ok(last_index_of( + value.try_bytes_utf8_lossy()?, + search_value.try_bytes_utf8_lossy()?, + position, + )) + } + + fn type_def(&self, _state: &state::TypeState) -> TypeDef { + TypeDef::bytes().infallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + mezmo_last_index_of => MezmoLastIndexOf; + + basic { + args: func_args![value: "abcabc", search_value: "bc"], + want: Ok(4), + tdef: TypeDef::bytes().infallible(), + } + + utf8 { + args: func_args![value: "नमस्तेनमस्ते", search_value: "स्ते"], + want: Ok(8), + tdef: TypeDef::bytes().infallible(), + } + + not_found { + args: func_args![value: "abc", search_value: "def"], + want: Ok(-1), + tdef: TypeDef::bytes().infallible(), + } + + position { + args: func_args![value: "abcabc", search_value: "bc", position: 3], + want: Ok(1), + tdef: TypeDef::bytes().infallible(), + } + + position_boundary { + args: func_args![value: "abcabc", search_value: "bc", position: 4], + want: Ok(4), + tdef: TypeDef::bytes().infallible(), + } + + position_greater_than_length { + args: func_args![value: "abc", search_value: "bc", position: 100], + want: Ok(1), + tdef: TypeDef::bytes().infallible(), + } + + negative_position { + args: func_args![value: "abcdefabcdef", search_value: "abc", position: -5], + want: Ok(0), + tdef: TypeDef::bytes().infallible(), + } + + zero_position { + args: func_args![value: "abcdefabcdef", search_value: "abc", position: 0], + want: Ok(0), + tdef: TypeDef::bytes().infallible(), + } + + empty { + args: func_args![value: "", search_value: ""], + want: Ok(0), + tdef: TypeDef::bytes().infallible(), + } + + search_non_empty_with_empty { + args: func_args![value: "abc", search_value: ""], + want: Ok(3), + tdef: TypeDef::bytes().infallible(), + } + + empty_with_position { + args: func_args![value: "abc", search_value: "", position: 3], + want: Ok(3), + tdef: TypeDef::bytes().infallible(), + } + + empty_with_negative_position { + args: func_args![value: "abc", search_value: "", position: -1], + want: Ok(0), + tdef: TypeDef::bytes().infallible(), + } + + empty_with_position_exceed_length { + args: func_args![value: "abc", search_value: "", position: 6], + want: Ok(3), + tdef: TypeDef::bytes().infallible(), + } + ]; +} diff --git a/lib/stdlib/src/mezmo_pad_end.rs b/lib/stdlib/src/mezmo_pad_end.rs new file mode 100644 index 000000000..3bceeb22b --- /dev/null +++ b/lib/stdlib/src/mezmo_pad_end.rs @@ -0,0 +1,161 @@ +use std::borrow::Cow; + +use ::value::Value; +use compiler::{value::VrlValueConvert, Expression}; +use vrl::prelude::*; +use vrl_core::Resolved; + +fn pad_end(value: Cow<'_, str>, target_length: i64, pad_value: Cow<'_, str>) -> Value { + let current_length = value.chars().count(); + let target_length = if target_length < 0 { + 0 + } else { + target_length as usize + }; + + if target_length > current_length { + let pad_length = target_length - current_length; + let pad = pad_value + .chars() + .cycle() + .take(pad_length) + .collect::(); + let mut s = value.to_string(); + s.push_str(&pad); + s.into() + } else { + value.into() + } +} + +/// Pads a string with the provided pad string, possibly multiple times, until +/// it reaches the given target length. The string is padded from the end. +/// +/// Behaves like the JavaScript's String.prototype.padEnd() method. +#[derive(Clone, Copy, Debug)] +pub struct MezmoPadEnd; + +impl Function for MezmoPadEnd { + fn identifier(&self) -> &'static str { + "mezmo_pad_end" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "target_length", + kind: kind::INTEGER, + required: true, + }, + Parameter { + keyword: "pad_value", + kind: kind::BYTES, + required: false, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[ + Example { + title: "basic", + source: "mezmo_pad_end(\"abc\", 6)", + result: Ok("abc "), + }, + Example { + title: "with_value", + source: "mezmo_pad_end(\"abc\", 6, \"def\")", + result: Ok("abcdef"), + }, + ] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let target_length = arguments.required("target_length"); + let pad_value = arguments.optional("pad_value"); + + Ok(MezmoPadEndFn { + value, + target_length, + pad_value, + } + .as_expr()) + } +} + +#[derive(Debug, Clone)] +struct MezmoPadEndFn { + value: Box, + target_length: Box, + pad_value: Option>, +} + +impl FunctionExpression for MezmoPadEndFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + let target_length = self.target_length.resolve(ctx)?; + let pad_value = match &self.pad_value { + Some(pv) => pv.resolve(ctx)?, + None => " ".into(), + }; + Ok(pad_end( + value.try_bytes_utf8_lossy()?, + target_length.try_integer()?, + pad_value.try_bytes_utf8_lossy()?, + )) + } + + fn type_def(&self, _state: &state::TypeState) -> TypeDef { + TypeDef::bytes().infallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + mezmo_pad_end => MezmoPadEnd; + + basic { + args: func_args![value: "abc", target_length: 6], + want: Ok("abc "), + tdef: TypeDef::bytes().infallible(), + } + + repeat_pad { + args: func_args![value: "abc", target_length: 10, pad_value: "foo"], + want: Ok("abcfoofoof"), + tdef: TypeDef::bytes().infallible(), + } + + negative_target_length { + args: func_args![value: "abc", target_length: -10, pad_value: "foo"], + want: Ok("abc"), + tdef: TypeDef::bytes().infallible(), + } + + empty { + args: func_args![value: "", target_length: 10, pad_value: ""], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + pad_with_empty { + args: func_args![value: "abc", target_length: 10, pad_value: ""], + want: Ok("abc"), + tdef: TypeDef::bytes().infallible(), + } + ]; +} diff --git a/lib/stdlib/src/mezmo_pad_start.rs b/lib/stdlib/src/mezmo_pad_start.rs new file mode 100644 index 000000000..4c4621a5a --- /dev/null +++ b/lib/stdlib/src/mezmo_pad_start.rs @@ -0,0 +1,160 @@ +use std::borrow::Cow; + +use ::value::Value; +use compiler::{value::VrlValueConvert, Expression}; +use vrl::prelude::*; +use vrl_core::Resolved; + +fn pad_start(value: Cow<'_, str>, target_length: i64, pad_value: Cow<'_, str>) -> Value { + let current_length = value.chars().count(); + let target_length = if target_length < 0 { + 0 + } else { + target_length as usize + }; + + if target_length > current_length { + let pad_length = target_length - current_length; + let mut pad = pad_value + .chars() + .cycle() + .take(pad_length) + .collect::(); + pad.push_str(&value); + pad.into() + } else { + value.into() + } +} + +/// Pads a string with the provided pad string, possibly multiple times, until +/// it reaches the given target length. The string is padded from the start. +/// +/// Behaves like the JavaScript's String.prototype.padStart() method. +#[derive(Clone, Copy, Debug)] +pub struct MezmoPadStart; + +impl Function for MezmoPadStart { + fn identifier(&self) -> &'static str { + "mezmo_pad_start" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "target_length", + kind: kind::INTEGER, + required: true, + }, + Parameter { + keyword: "pad_value", + kind: kind::BYTES, + required: false, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[ + Example { + title: "basic", + source: "mezmo_pad_start(\"abc\", 6)", + result: Ok(" abc"), + }, + Example { + title: "with_value", + source: "mezmo_pad_start(\"abc\", 6, \"def\")", + result: Ok("defabc"), + }, + ] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let target_length = arguments.required("target_length"); + let pad_value = arguments.optional("pad_value"); + + Ok(MezmoPadStartFn { + value, + target_length, + pad_value, + } + .as_expr()) + } +} + +#[derive(Debug, Clone)] +struct MezmoPadStartFn { + value: Box, + target_length: Box, + pad_value: Option>, +} + +impl FunctionExpression for MezmoPadStartFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + let target_length = self.target_length.resolve(ctx)?; + let pad_value = match &self.pad_value { + Some(pv) => pv.resolve(ctx)?, + None => " ".into(), + }; + Ok(pad_start( + value.try_bytes_utf8_lossy()?, + target_length.try_integer()?, + pad_value.try_bytes_utf8_lossy()?, + )) + } + + fn type_def(&self, _state: &state::TypeState) -> TypeDef { + TypeDef::bytes().infallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + mezmo_pad_start => MezmoPadStart; + + basic { + args: func_args![value: "abc", target_length: 6], + want: Ok(" abc"), + tdef: TypeDef::bytes().infallible(), + } + + repeat_pad { + args: func_args![value: "abc", target_length: 10, pad_value: "foo"], + want: Ok("foofoofabc"), + tdef: TypeDef::bytes().infallible(), + } + + negative_target_length { + args: func_args![value: "abc", target_length: -10, pad_value: "foo"], + want: Ok("abc"), + tdef: TypeDef::bytes().infallible(), + } + + empty { + args: func_args![value: "", target_length: 10, pad_value: ""], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + pad_with_empty { + args: func_args![value: "abc", target_length: 10, pad_value: ""], + want: Ok("abc"), + tdef: TypeDef::bytes().infallible(), + } + ]; +} diff --git a/lib/stdlib/src/mezmo_repeat.rs b/lib/stdlib/src/mezmo_repeat.rs new file mode 100644 index 000000000..a012768ba --- /dev/null +++ b/lib/stdlib/src/mezmo_repeat.rs @@ -0,0 +1,104 @@ +use compiler::{value::VrlValueConvert, Expression}; +use vrl::prelude::*; +use vrl_core::Resolved; +/// Repeats a the given string by the provided count. +/// +/// Behaves like the JavaScript's String.prototype.repeat() method except for +/// not returning an error for negative counts. In this case an empty string is +/// returned (behaves as if the count is 0). +#[derive(Clone, Copy, Debug)] +pub struct MezmoRepeat; + +impl Function for MezmoRepeat { + fn identifier(&self) -> &'static str { + "mezmo_repeat" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "count", + kind: kind::INTEGER, + required: true, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[Example { + title: "basic", + source: "mezmo_repeat(\"abc\", 3)", + result: Ok("abcabcabc"), + }] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let count = arguments.required("count"); + + Ok(MezmoRepeatFn { value, count }.as_expr()) + } +} + +#[derive(Debug, Clone)] +struct MezmoRepeatFn { + value: Box, + count: Box, +} + +impl FunctionExpression for MezmoRepeatFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + let count = self.count.resolve(ctx)?.try_integer()?; + + let count = std::cmp::max(count, 0) as usize; // Negative values clamped to 0 + Ok(value.try_bytes_utf8_lossy()?.repeat(count).into()) + } + + fn type_def(&self, _state: &state::TypeState) -> TypeDef { + TypeDef::bytes().infallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + mezmo_repeat => MezmoRepeat; + + basic { + args: func_args![value: "abc", count: 3], + want: Ok("abcabcabc"), + tdef: TypeDef::bytes().infallible(), + } + + negative { + args: func_args![value: "abc", count: -1], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + zero { + args: func_args![value: "abc", count: 0], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + empty { + args: func_args![value: "", count: 1], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + ]; +} diff --git a/lib/stdlib/src/mezmo_string_at.rs b/lib/stdlib/src/mezmo_string_at.rs new file mode 100644 index 000000000..ec91d966a --- /dev/null +++ b/lib/stdlib/src/mezmo_string_at.rs @@ -0,0 +1,149 @@ +use std::borrow::Cow; + +use compiler::{value::VrlValueConvert, Expression}; +use vrl::prelude::*; +use vrl_core::Resolved; + + +fn string_at(s: Cow<'_, str>, index: i64) -> Resolved { + if index >= 0 { + Ok(s.chars() + .nth(index as usize) + .map(|c| c.to_string()) + .unwrap_or(String::new()) + .into()) + } else { + Ok(s.chars() + .nth_back((-(index + 1)) as usize) + .map(|c| c.to_string()) + .unwrap_or(String::new()) + .into()) + } +} + +/// Returns the char at the given index as a string. Allows negative indexes, +/// but indexes out of range, including out of range negative indexes, return an +/// emtpy string. +/// +/// Behaves like the JavaScript's String.prototype.at() method except for not +/// returning an error for out of range indexes. In this case an empty string is +/// returned. +#[derive(Clone, Copy, Debug)] +pub struct MezmoStringAt; + +impl Function for MezmoStringAt { + fn identifier(&self) -> &'static str { + "mezmo_string_at" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "index", + kind: kind::INTEGER, + required: true, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[Example { + title: "basic", + source: "mezmo_string_at(\"abc\", 0)", + result: Ok("a"), + }] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let index = arguments.required("index"); + + Ok(MezmoStringAtFn { + value, + index, + } + .as_expr()) + } +} + +#[derive(Debug, Clone)] +struct MezmoStringAtFn { + value: Box, + index: Box, +} + +impl FunctionExpression for MezmoStringAtFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + let index = self.index.resolve(ctx)?; + string_at( + value.try_bytes_utf8_lossy()?, + index.try_integer()?, + ) + } + + fn type_def(&self, _state: &state::TypeState) -> TypeDef { + TypeDef::bytes().infallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + mezmo_string_at => MezmoStringAt; + + basic { + args: func_args![value: "abc", index: 0], + want: Ok("a"), + tdef: TypeDef::bytes().infallible(), + } + + negative_index { + args: func_args![value: "abc", index: -3], + want: Ok("a"), + tdef: TypeDef::bytes().infallible(), + } + + invalid_index { + args: func_args![value: "abc", index: 4], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + invalid_negative_index { + args: func_args![value: "abc", index: -4], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + empty { + args: func_args![value: "", index: 0], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + empty_non_zero { + args: func_args![value: "", index: 1], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + empty_negative { + args: func_args![value: "", index: -1], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + ]; +} diff --git a/lib/stdlib/src/mezmo_string_slice.rs b/lib/stdlib/src/mezmo_string_slice.rs new file mode 100644 index 000000000..d23ef35e5 --- /dev/null +++ b/lib/stdlib/src/mezmo_string_slice.rs @@ -0,0 +1,232 @@ +use std::borrow::Cow; + +use ::value::Value; +use compiler::{value::VrlValueConvert, Expression}; +use substring::Substring; +use vrl::prelude::*; +use vrl_core::Resolved; + +fn string_slice(s: Cow<'_, str>, index_start: i64, index_end: Option) -> Value { + let len = s.chars().count(); + + let index_start = normalize_index(index_start, len); + + let index_end = match index_end { + Some(index_end) => normalize_index(index_end, len), + None => len, + }; + + if index_end > index_start { + Value::from(s.substring(index_start, index_end)) + } else { + Value::from("") + } +} + +fn normalize_index(index: i64, len: usize) -> usize { + if index < 0 { + let index = -index as usize; + if len > index { + len - index + } else { + 0 + } + } else { + std::cmp::min(index as usize, len) + } +} + +/// Extracts a portion of a string using the provided indexes. Negative indexes +/// are computed from the end and out of range index are clamped to the bounds +/// of the string. Unlike substring this does not flip the start and end indexes +/// if end is less than start. +/// +/// Behaves like the JavaScript's String.prototype.slice() method. +#[derive(Clone, Copy, Debug)] +pub struct MezmoStringSlice; + +impl Function for MezmoStringSlice { + fn identifier(&self) -> &'static str { + "mezmo_string_slice" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "index", + kind: kind::INTEGER, + required: true, + }, + Parameter { + keyword: "allow_negative", + kind: kind::BOOLEAN, + required: false, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[ + Example { + title: "basic", + source: "mezmo_string_slice(\"abc\", 1)", + result: Ok("bc"), + }, + Example { + title: "bounds", + source: "mezmo_string_slice(\"abc\", 1, 2)", + result: Ok("b"), + }, + ] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let index_start = arguments.required("index_start"); + let index_end = arguments.optional("index_end"); + + Ok(MezmoStringSliceFn { + value, + index_start, + index_end, + } + .as_expr()) + } +} + +#[derive(Debug, Clone)] +struct MezmoStringSliceFn { + value: Box, + index_start: Box, + index_end: Option>, +} + +impl FunctionExpression for MezmoStringSliceFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + let index_start = self.index_start.resolve(ctx)?; + let index_end = match &self.index_end { + Some(v) => Some(v.resolve(ctx)?.try_integer()?), + None => None, + }; + + Ok(string_slice( + value.try_bytes_utf8_lossy()?, + index_start.try_integer()?, + index_end, + )) + } + + fn type_def(&self, _state: &state::TypeState) -> TypeDef { + TypeDef::bytes().infallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + mezmo_string_slice => MezmoStringSlice; + + basic { + args: func_args![value: "abc", index_start: 1], + want: Ok("bc"), + tdef: TypeDef::bytes().infallible(), + } + + utf8 { + args: func_args![value: "नमस्ते", index_start: 0, index_end: 1], + want: Ok("न"), + tdef: TypeDef::bytes().infallible(), + } + + utf8_code_points { + args: func_args![value: "नमस्ते", index_start: 0, index_end: -2], + want: Ok("नमस्"), + tdef: TypeDef::bytes().infallible(), + } + + start_and_end { + args: func_args![value: "abc", index_start: 1, index_end: 2], + want: Ok("b"), + tdef: TypeDef::bytes().infallible(), + } + + same_start_and_end { + args: func_args![value: "abc", index_start: 2, index_end: 2], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + index_end_greater_than_length { + args: func_args![value: "abc", index_start: 1, index_end: 100], + want: Ok("bc"), + tdef: TypeDef::bytes().infallible(), + } + + negative_start { + args: func_args![value: "abc", index_start: -1, index_end: 100], + want: Ok("c"), + tdef: TypeDef::bytes().infallible(), + } + + negative_end { + args: func_args![value: "abc", index_start: 0, index_end: -1], + want: Ok("ab"), + tdef: TypeDef::bytes().infallible(), + } + + negative_start_and_end { + args: func_args![value: "abc", index_start: -3, index_end: -2], + want: Ok("a"), + tdef: TypeDef::bytes().infallible(), + } + + zero_indexes { + args: func_args![value: "abc", index_start: 0, index_end: 0], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + empty { + args: func_args![value: "", index_start: 0], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + empty_start_and_end { + args: func_args![value: "", index_start: 0, index_end: -1], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + end_greater_than_start { + args: func_args![value: "abc", index_start: 2, index_end: 0], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + negative_end_greater_than_start { + args: func_args![value: "abc", index_start: -2, index_end: -3], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + negative_greater_than_length { + args: func_args![value: "abc", index_start: -100], + want: Ok("abc"), + tdef: TypeDef::bytes().infallible(), + } + ]; +} diff --git a/lib/stdlib/src/mezmo_substring.rs b/lib/stdlib/src/mezmo_substring.rs new file mode 100644 index 000000000..7e5397618 --- /dev/null +++ b/lib/stdlib/src/mezmo_substring.rs @@ -0,0 +1,237 @@ +use std::borrow::Cow; + +use ::value::Value; +use compiler::{value::VrlValueConvert, Expression}; +use substring::Substring; +use vrl::prelude::*; +use vrl_core::Resolved; + +fn substring(s: Cow<'_, str>, index_start: i64, index_end: Option) -> Value { + let len = s.chars().count(); + let index_start = normalize_index(index_start, len); + let index_end = match index_end { + Some(index_end) => normalize_index(index_end, len), + None => len, + }; + if index_end < index_start { + Value::from(s.substring(index_end, index_start)) + } else { + Value::from(s.substring(index_start, index_end)) + } +} + +fn normalize_index(index: i64, len: usize) -> usize { + if index < 0 { + 0 + } else if (index as usize) > len { + len + } else { + index as usize + } +} + +/// Extracts a portion of a string using the provided indexes. Negative indexes +/// are clamped to 0 and indexes larger than the string length are clamped to +/// the length. If the start index is larger than the end index the indexes are +/// flipped. +/// +/// Behaves like the JavaScript's String.prototype.substring() method. +#[derive(Clone, Copy, Debug)] +pub struct MezmoSubstring; + +impl Function for MezmoSubstring { + fn identifier(&self) -> &'static str { + "mezmo_substring" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + Parameter { + keyword: "index", + kind: kind::INTEGER, + required: true, + }, + Parameter { + keyword: "allow_negative", + kind: kind::BOOLEAN, + required: false, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[ + Example { + title: "basic", + source: "mezmo_substring(\"abc\", 1)", + result: Ok("bc"), + }, + Example { + title: "bounds", + source: "mezmo_substring(\"abc\", 1, 2)", + result: Ok("b"), + }, + Example { + title: "flipped_bounds", + source: "mezmo_substring(\"abc\", 2, 1)", + result: Ok("b"), + }, + ] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let index_start = arguments.required("index_start"); + let index_end = arguments.optional("index_end"); + + Ok(MezmoSubstringFn { + value, + index_start, + index_end, + } + .as_expr()) + } +} + +#[derive(Debug, Clone)] +struct MezmoSubstringFn { + value: Box, + index_start: Box, + index_end: Option>, +} + +impl FunctionExpression for MezmoSubstringFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + let index_start = self.index_start.resolve(ctx)?; + let index_end = match &self.index_end { + Some(v) => Some(v.resolve(ctx)?.try_integer()?), + None => None, + }; + + Ok(substring( + value.try_bytes_utf8_lossy()?, + index_start.try_integer()?, + index_end, + )) + } + + fn type_def(&self, _state: &state::TypeState) -> TypeDef { + TypeDef::bytes().infallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + mezmo_substring => MezmoSubstring; + + basic { + args: func_args![value: "abc", index_start: 1], + want: Ok("bc"), + tdef: TypeDef::bytes().infallible(), + } + + flipped_indexes { + args: func_args![value: "abc", index_start: 8, index_end: 0], + want: Ok("abc"), + tdef: TypeDef::bytes().infallible(), + } + + utf8 { + args: func_args![value: "नमस्ते", index_start: 0, index_end: 1], + want: Ok("न"), + tdef: TypeDef::bytes().infallible(), + } + + utf8_code_points { + args: func_args![value: "नमस्ते", index_start: 0, index_end: 4], + want: Ok("नमस्"), + tdef: TypeDef::bytes().infallible(), + } + + start_and_end { + args: func_args![value: "abc", index_start: 1, index_end: 2], + want: Ok("b"), + tdef: TypeDef::bytes().infallible(), + } + + same_start_and_end { + args: func_args![value: "abc", index_start: 2, index_end: 2], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + index_end_greater_than_length { + args: func_args![value: "abc", index_start: 1, index_end: 100], + want: Ok("bc"), + tdef: TypeDef::bytes().infallible(), + } + + negative_start { + args: func_args![value: "abc", index_start: -1, index_end: 100], + want: Ok("abc"), + tdef: TypeDef::bytes().infallible(), + } + + negative_end { + args: func_args![value: "abc", index_start: 0, index_end: -1], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + negative_start_and_end { + args: func_args![value: "abc", index_start: -3, index_end: -2], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + zero_indexes { + args: func_args![value: "abc", index_start: 0, index_end: 0], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + empty { + args: func_args![value: "", index_start: 0], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + empty_start_and_end { + args: func_args![value: "", index_start: 0, index_end: 1], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + end_greater_than_start { + args: func_args![value: "abc", index_start: 2, index_end: 0], + want: Ok("ab"), + tdef: TypeDef::bytes().infallible(), + } + + negative_end_greater_than_start { + args: func_args![value: "abc", index_start: -2, index_end: -3], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + negative_greater_than_length { + args: func_args![value: "abc", index_start: -100], + want: Ok("abc"), + tdef: TypeDef::bytes().infallible(), + } + ]; +} diff --git a/lib/stdlib/src/mezmo_trim_end.rs b/lib/stdlib/src/mezmo_trim_end.rs new file mode 100644 index 000000000..7d2f704e8 --- /dev/null +++ b/lib/stdlib/src/mezmo_trim_end.rs @@ -0,0 +1,94 @@ +use compiler::{Expression, value::VrlValueConvert}; +use vrl::prelude::*; +use vrl_core::Resolved; + +/// Trims whitespace from the end of a string. +#[derive(Clone, Copy, Debug)] +pub struct MezmoTrimEnd; + +impl Function for MezmoTrimEnd { + fn identifier(&self) -> &'static str { + "mezmo_trim_end" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[Example { + title: "basic", + source: "mezmo_trim_end(\"abc \")", + result: Ok("abc"), + }] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + + Ok(MezmoTrimEndFn { + value, + } + .as_expr()) + } +} + +#[derive(Debug, Clone)] +struct MezmoTrimEndFn { + value: Box, +} + +impl FunctionExpression for MezmoTrimEndFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + Ok(value.try_bytes_utf8_lossy()?.trim_end().into()) + } + + fn type_def(&self, _state: &state::TypeState) -> TypeDef { + TypeDef::bytes().infallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + mezmo_trim_end => MezmoTrimEnd; + + basic { + args: func_args![value: "abc "], + want: Ok("abc"), + tdef: TypeDef::bytes().infallible(), + } + + not_trimming_the_start { + args: func_args![value: " abc"], + want: Ok(" abc"), + tdef: TypeDef::bytes().infallible(), + } + + empty { + args: func_args![value: ""], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + only_whitespace { + args: func_args![value: " "], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + ]; +} diff --git a/lib/stdlib/src/mezmo_trim_start.rs b/lib/stdlib/src/mezmo_trim_start.rs new file mode 100644 index 000000000..c846cce55 --- /dev/null +++ b/lib/stdlib/src/mezmo_trim_start.rs @@ -0,0 +1,95 @@ +use compiler::{Expression, value::VrlValueConvert}; +use vrl::prelude::*; +use vrl_core::Resolved; + + +/// Trims whitespace from the start of a string. +#[derive(Clone, Copy, Debug)] +pub struct MezmoTrimStart; + +impl Function for MezmoTrimStart { + fn identifier(&self) -> &'static str { + "mezmo_trim_start" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[Example { + title: "basic", + source: "mezmo_trim_start(\" abc\")", + result: Ok("abc"), + }] + } + + fn compile( + &self, + _state: &state::TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + + Ok(MezmoTrimStartFn { + value, + } + .as_expr()) + } +} + +#[derive(Debug, Clone)] +struct MezmoTrimStartFn { + value: Box, +} + +impl FunctionExpression for MezmoTrimStartFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?; + Ok(value.try_bytes_utf8_lossy()?.trim_start().into()) + } + + fn type_def(&self, _state: &state::TypeState) -> TypeDef { + TypeDef::bytes().infallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + mezmo_trim_start => MezmoTrimStart; + + basic { + args: func_args![value: " abc"], + want: Ok("abc"), + tdef: TypeDef::bytes().infallible(), + } + + not_trimming_the_end { + args: func_args![value: "abc "], + want: Ok("abc "), + tdef: TypeDef::bytes().infallible(), + } + + empty { + args: func_args![value: ""], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + + only_whitespace { + args: func_args![value: " "], + want: Ok(""), + tdef: TypeDef::bytes().infallible(), + } + ]; +}