From 61eab758f6396a1732c8db993ecf33df24c1007e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ml=C3=A1dek?= Date: Fri, 3 Jan 2025 16:25:28 +0100 Subject: [PATCH] axum: Update matchit to 0.8.6 and support capture prefixes and suffixes --- Cargo.lock | 4 +- axum/CHANGELOG.md | 4 ++ axum/Cargo.toml | 2 +- axum/src/docs/routing/route.md | 31 ++++++++++- axum/src/extract/matched_path.rs | 21 ++++++++ axum/src/extract/path/mod.rs | 21 ++++++++ axum/src/routing/strip_prefix.rs | 90 +++++++++++++++++++++++++++++--- axum/src/routing/tests/mod.rs | 60 +++++++++++++++++++++ axum/src/routing/tests/nest.rs | 38 ++++++++++++++ 9 files changed, 260 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7ffe93b6a6..de7814e70e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3066,9 +3066,9 @@ dependencies = [ [[package]] name = "matchit" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" +checksum = "2f926ade0c4e170215ae43342bf13b9310a437609c81f29f86c5df6657582ef9" [[package]] name = "md-5" diff --git a/axum/CHANGELOG.md b/axum/CHANGELOG.md index 5cc9649adb..0f90d65290 100644 --- a/axum/CHANGELOG.md +++ b/axum/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 # Unreleased +- **changed:** Updated `matchit` allowing for routes with captures and static prefixes and suffixes ([#3143]) + +[#3143]: https://github.com/tokio-rs/axum/pull/3143 + # 0.8.0 ## since rc.1 diff --git a/axum/Cargo.toml b/axum/Cargo.toml index 4b00721142..1c8f25e074 100644 --- a/axum/Cargo.toml +++ b/axum/Cargo.toml @@ -57,7 +57,7 @@ http = "1.0.0" http-body = "1.0.0" http-body-util = "0.1.0" itoa = "1.0.5" -matchit = "=0.8.4" +matchit = "=0.8.6" memchr = "2.4.1" mime = "0.3.16" percent-encoding = "2.1" diff --git a/axum/src/docs/routing/route.md b/axum/src/docs/routing/route.md index 01be9152ed..3f037498ae 100644 --- a/axum/src/docs/routing/route.md +++ b/axum/src/docs/routing/route.md @@ -1,7 +1,7 @@ Add another route to the router. `path` is a string of path segments separated by `/`. Each segment -can be either static, a capture, or a wildcard. +can either be static, contain a capture, or be a wildcard. `method_router` is the [`MethodRouter`] that should receive the request if the path matches `path`. `method_router` will commonly be a handler wrapped in a method @@ -24,11 +24,15 @@ Paths can contain segments like `/{key}` which matches any single segment and will store the value captured at `key`. The value captured can be zero-length except for in the invalid path `//`. +Each segment may have only one capture, but it may have static prefixes and suffixes. + Examples: - `/{key}` - `/users/{id}` - `/users/{id}/tweets` +- `/avatars/large_{id}.png` +- `/avatars/small_{id}.jpg` Captures can be extracted using [`Path`](crate::extract::Path). See its documentation for more details. @@ -39,6 +43,31 @@ regular expression. You must handle that manually in your handlers. [`MatchedPath`](crate::extract::MatchedPath) can be used to extract the matched path rather than the actual path. +Captures must not be empty. For example `/a/` will not match `/a/{capture}` and +`/.png` will not match `/{image}.png`. + +You may mix captures that have different static prefixes or suffixes, though it is discouraged as it +might lead to surprising behavior. If multiple routes would match, the one with the longest static +prefix is used, if there are multiple with the same match, the longest matched static suffix is +chosen. For example, if a request is done to `/abcdef` here are examples of routes that would all +match. If multiple of these were defined in a single router, the topmost one would be used. + +- `/abcdef` +- `/abc{x}ef` +- `/abc{x}f` +- `/abc{x}` +- `/a{x}def` +- `/a{x}` +- `/{x}def` +- `/{x}` + +This is done on each level of the path and if the path matches even if due to a wildcard, that path +will be chosen. For example if one makes a request to `/foo/bar/baz` the first route will be used by +axum because it has better match on the leftmost differing path segment and the whole path matches. + +- `/foo/{*wildcard}` +- `/fo{x}/bar/baz` + # Wildcards Paths can end in `/{*key}` which matches all segments and will store the segments diff --git a/axum/src/extract/matched_path.rs b/axum/src/extract/matched_path.rs index 0f5efba326..34dbd25978 100644 --- a/axum/src/extract/matched_path.rs +++ b/axum/src/extract/matched_path.rs @@ -295,6 +295,27 @@ mod tests { assert_eq!(res.status(), StatusCode::OK); } + #[crate::test] + async fn can_extract_nested_matched_path_with_prefix_and_suffix_in_middleware_on_nested_router() + { + async fn extract_matched_path(matched_path: MatchedPath, req: Request) -> Request { + assert_eq!(matched_path.as_str(), "/f{o}o/b{a}r"); + req + } + + let app = Router::new().nest( + "/f{o}o", + Router::new() + .route("/b{a}r", get(|| async move {})) + .layer(map_request(extract_matched_path)), + ); + + let client = TestClient::new(app); + + let res = client.get("/foo/bar").await; + assert_eq!(res.status(), StatusCode::OK); + } + #[crate::test] async fn can_extract_nested_matched_path_in_middleware_on_nested_router_via_extension() { async fn extract_matched_path(req: Request) -> Request { diff --git a/axum/src/extract/path/mod.rs b/axum/src/extract/path/mod.rs index 1b179547b1..3af526ff78 100644 --- a/axum/src/extract/path/mod.rs +++ b/axum/src/extract/path/mod.rs @@ -848,6 +848,27 @@ mod tests { assert_eq!(res.status(), StatusCode::OK); } + #[crate::test] + async fn deserialize_into_vec_of_tuples_with_prefixes_and_suffixes() { + let app = Router::new().route( + "/f{o}o/b{a}r", + get(|Path(params): Path>| async move { + assert_eq!( + params, + vec![ + ("o".to_owned(), "0".to_owned()), + ("a".to_owned(), "4".to_owned()) + ] + ); + }), + ); + + let client = TestClient::new(app); + + let res = client.get("/f0o/b4r").await; + assert_eq!(res.status(), StatusCode::OK); + } + #[crate::test] async fn type_that_uses_deserialize_any() { use time::Date; diff --git a/axum/src/routing/strip_prefix.rs b/axum/src/routing/strip_prefix.rs index 3209da3b12..466adb9c7c 100644 --- a/axum/src/routing/strip_prefix.rs +++ b/axum/src/routing/strip_prefix.rs @@ -66,7 +66,7 @@ fn strip_prefix(uri: &Uri, prefix: &str) -> Option { match item { Item::Both(path_segment, prefix_segment) => { - if is_capture(prefix_segment) || path_segment == prefix_segment { + if prefix_matches(prefix_segment, path_segment) { // the prefix segment is either a param, which matches anything, or // it actually matches the path segment *matching_prefix_length.as_mut().unwrap() += path_segment.len(); @@ -148,12 +148,67 @@ where }) } -fn is_capture(segment: &str) -> bool { - segment.starts_with('{') - && segment.ends_with('}') - && !segment.starts_with("{{") - && !segment.ends_with("}}") - && !segment.starts_with("{*") +fn prefix_matches(prefix_segment: &str, path_segment: &str) -> bool { + if let Some((prefix, suffix)) = capture_prefix_suffix(prefix_segment) { + path_segment.starts_with(prefix) && path_segment.ends_with(suffix) + } else { + prefix_segment == path_segment + } +} + +/// Takes a segment and returns prefix and suffix of the path, omitting the capture. Currently, +/// matchit supports only one capture so this can be a pair. If there is no capture, `None` is +/// returned. +fn capture_prefix_suffix(segment: &str) -> Option<(&str, &str)> { + fn find_first_not_double(needle: u8, haystack: &[u8]) -> Option { + let mut possible_capture = 0; + while let Some(index) = haystack + .get(possible_capture..) + .and_then(|haystack| haystack.iter().position(|byte| byte == &needle)) + { + let index = index + possible_capture; + + if haystack.get(index + 1) == Some(&needle) { + possible_capture = index + 2; + continue; + } + + return Some(index); + } + + None + } + + let capture_start = find_first_not_double(b'{', segment.as_bytes())?; + + let Some(capture_end) = find_first_not_double(b'}', segment.as_bytes()) else { + if cfg!(debug_assertions) { + panic!( + "Segment `{segment}` is malformed. It seems to contain a capture start but no \ + capture end. This should have been rejected at application start, please file a \ + bug in axum repository." + ); + } else { + // This is very bad but let's not panic in production. This will most likely not match. + return None; + } + }; + + if capture_start > capture_end { + if cfg!(debug_assertions) { + panic!( + "Segment `{segment}` is malformed. It seems to contain a capture start after \ + capture end. This should have been rejected at application start, please file a \ + bug in axum repository." + ); + } else { + // This is very bad but let's not panic in production. This will most likely not match. + return None; + } + } + + // Slicing may panic but we found the indexes inside the string so this should be fine. + Some((&segment[..capture_start], &segment[capture_end + 1..])) } #[derive(Debug)] @@ -380,6 +435,27 @@ mod tests { expected = Some("/a"), ); + test!( + param_14, + uri = "/abc", + prefix = "/a{b}c", + expected = Some("/"), + ); + + test!( + param_15, + uri = "/z/abc/d", + prefix = "/z/a{b}c", + expected = Some("/d"), + ); + + test!( + param_16, + uri = "/abc/d/e", + prefix = "/a{b}c/d/", + expected = Some("/e"), + ); + #[quickcheck] fn does_not_panic(uri_and_prefix: UriAndPrefix) -> bool { let UriAndPrefix { uri, prefix } = uri_and_prefix; diff --git a/axum/src/routing/tests/mod.rs b/axum/src/routing/tests/mod.rs index 8851738667..02b24da965 100644 --- a/axum/src/routing/tests/mod.rs +++ b/axum/src/routing/tests/mod.rs @@ -407,6 +407,66 @@ async fn what_matches_wildcard() { assert_eq!(get("/x/a/b/").await, "x"); } +#[crate::test] +async fn prefix_suffix_match() { + let app = Router::new() + .route("/{picture}.png", get(|| async { "picture" })) + .route("/hello-{name}", get(|| async { "greeting" })) + .route("/start-{regex}-end", get(|| async { "regex" })) + .route("/logo.svg", get(|| async { "logo" })) + .fallback(|| async { "fallback" }); + + let client = TestClient::new(app); + + let get = |path| { + let f = client.get(path); + async move { f.await.text().await } + }; + + assert_eq!(get("/").await, "fallback"); + assert_eq!(get("/a/b.png").await, "fallback"); + assert_eq!(get("/a.png/").await, "fallback"); + assert_eq!(get("//a.png").await, "fallback"); + + // Empty capture is not allowed + assert_eq!(get("/.png").await, "fallback"); + assert_eq!(get("/..png").await, "picture"); + assert_eq!(get("/a.png").await, "picture"); + assert_eq!(get("/b.png").await, "picture"); + + assert_eq!(get("/hello-").await, "fallback"); + assert_eq!(get("/hello-world").await, "greeting"); + + assert_eq!(get("/start--end").await, "fallback"); + assert_eq!(get("/start-regex-end").await, "regex"); + + assert_eq!(get("/logo.svg").await, "logo"); + + assert_eq!(get("/hello-.png").await, "greeting"); +} + +#[crate::test] +async fn prefix_suffix_nested_match() { + let app = Router::new() + .route("/{a}/a", get(|| async { "a" })) + .route("/{b}/b", get(|| async { "b" })) + .route("/a{c}c/a", get(|| async { "c" })) + .route("/a{d}c/{*anything}", get(|| async { "d" })) + .fallback(|| async { "fallback" }); + + let client = TestClient::new(app); + + let get = |path| { + let f = client.get(path); + async move { f.await.text().await } + }; + + assert_eq!(get("/ac/a").await, "a"); + assert_eq!(get("/ac/b").await, "b"); + assert_eq!(get("/abc/a").await, "c"); + assert_eq!(get("/abc/b").await, "d"); +} + #[crate::test] async fn static_and_dynamic_paths() { let app = Router::new() diff --git a/axum/src/routing/tests/nest.rs b/axum/src/routing/tests/nest.rs index 6e14203662..9234ea3331 100644 --- a/axum/src/routing/tests/nest.rs +++ b/axum/src/routing/tests/nest.rs @@ -290,6 +290,44 @@ async fn nest_at_capture() { assert_eq!(res.text().await, "a=foo b=bar"); } +// Not `crate::test` because `nest_service` would fail. +#[tokio::test] +async fn nest_at_prefix_capture() { + let empty_routes = Router::new(); + let api_routes = Router::new().route( + "/{b}", + get(|Path((a, b)): Path<(String, String)>| async move { format!("a={a} b={b}") }), + ); + + let app = Router::new() + .nest("/x{a}x", api_routes) + .nest("/xax", empty_routes); + + let client = TestClient::new(app); + + let res = client.get("/xax/bar").await; + assert_eq!(res.status(), StatusCode::OK); + assert_eq!(res.text().await, "a=a b=bar"); +} + +#[tokio::test] +async fn nest_service_at_prefix_capture() { + let empty_routes = Router::new(); + let api_routes = Router::new().route( + "/{b}", + get(|Path((a, b)): Path<(String, String)>| async move { format!("a={a} b={b}") }), + ); + + let app = Router::new() + .nest_service("/x{a}x", api_routes) + .nest_service("/xax", empty_routes); + + let client = TestClient::new(app); + + let res = client.get("/xax/bar").await; + assert_eq!(res.status(), StatusCode::NOT_FOUND); +} + #[crate::test] async fn nest_with_and_without_trailing() { let app = Router::new().nest_service("/foo", get(|| async {}));