From e6547d6d8f37973fc33688e197c8cf27a855d553 Mon Sep 17 00:00:00 2001 From: David Lord Date: Tue, 12 Nov 2024 13:41:59 -0800 Subject: [PATCH 1/2] improve subdomain and host matching --- CHANGES.rst | 5 ++ src/werkzeug/routing/map.py | 110 +++++++++++++++++++++--------------- 2 files changed, 68 insertions(+), 47 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index de9c3a205..0a328dda8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,11 @@ Version 3.2.0 Unreleased +- ``Map`` takes a ``subdomain_matching`` parameter to disable subdomain + matching. In ``bind_to_environ``, the ``server_name`` parameter is not used + if ``host_matching`` is enabled. If ``default_subdomain`` is set, it is used + if a subdomain could not be determined. :issue:`3005` + Version 3.1.3 ------------- diff --git a/src/werkzeug/routing/map.py b/src/werkzeug/routing/map.py index 4d15e8824..de488f3af 100644 --- a/src/werkzeug/routing/map.py +++ b/src/werkzeug/routing/map.py @@ -46,8 +46,9 @@ class Map: arguments besides the `rules` as keyword arguments! :param rules: sequence of url rules for this map. - :param default_subdomain: The default subdomain for rules without a - subdomain defined. + :param default_subdomain: The default subdomain used by + :meth:`bind_to_environ` and :meth:`bind` if the current subdomain can't + be determined. :param strict_slashes: If a rule ends with a slash but the matched URL does not, redirect to the URL with a trailing slash. :param merge_slashes: Merge consecutive slashes when matching or @@ -62,10 +63,16 @@ class Map: :param sort_parameters: If set to `True` the url parameters are sorted. See `url_encode` for more details. :param sort_key: The sort key function for `url_encode`. - :param host_matching: if set to `True` it enables the host matching - feature and disables the subdomain one. If - enabled the `host` parameter to rules is used - instead of the `subdomain` one. + :param host_matching: Whether to route based on the full ``Host`` rather + than subdomains of ``server_name``. Uses the ``host`` parameter for + each :class:`Rule`. + :param subdomain_matching: Whether to detect the subdomain from ``Host`` and + ``server_name``, and route based on it. Uses the ``subdomain`` parameter + of each :class:`Rule`. Enabled by default, but always disabled if + ``host_matching`` is enabled. + + .. versionchanged:: 3.1 + The ``subdomain_matching`` parameter was added. .. versionchanged:: 3.0 The ``charset`` and ``encoding_errors`` parameters were removed. @@ -102,6 +109,8 @@ def __init__( sort_parameters: bool = False, sort_key: t.Callable[[t.Any], t.Any] | None = None, host_matching: bool = False, + *, + subdomain_matching: bool = True, ) -> None: self._matcher = StateMachineMatcher(merge_slashes) self._rules_by_endpoint: dict[t.Any, list[Rule]] = {} @@ -111,6 +120,7 @@ def __init__( self.default_subdomain = default_subdomain self.strict_slashes = strict_slashes self.redirect_defaults = redirect_defaults + self.subdomain_matching = subdomain_matching and not host_matching self.host_matching = host_matching self.converters = self.default_converters.copy() @@ -255,46 +265,52 @@ def bind_to_environ( server_name: str | None = None, subdomain: str | None = None, ) -> MapAdapter: - """Like :meth:`bind` but you can pass it an WSGI environment and it - will fetch the information from that dictionary. Note that because of - limitations in the protocol there is no way to get the current - subdomain and real `server_name` from the environment. If you don't - provide it, Werkzeug will use `SERVER_NAME` and `SERVER_PORT` (or - `HTTP_HOST` if provided) as used `server_name` with disabled subdomain - feature. - - If `subdomain` is `None` but an environment and a server name is - provided it will calculate the current subdomain automatically. - Example: `server_name` is ``'example.com'`` and the `SERVER_NAME` - in the wsgi `environ` is ``'staging.dev.example.com'`` the calculated - subdomain will be ``'staging.dev'``. - - If the object passed as environ has an environ attribute, the value of - this attribute is used instead. This allows you to pass request - objects. Additionally `PATH_INFO` added as a default of the - :class:`MapAdapter` so that you don't have to pass the path info to - the match method. + """Call :meth:`bind` with information from the WSGI environ. + ``PATH_INFO`` is used so it doesn't need to be passed to + :meth:`~.MapAdapter.match`. + + The WSGI environ does not have information to determine what subdomain + was accessed, so ``server_name`` or ``subdomain`` must be passed in for + :attr:`subdomain_matching`. For example, if ``Host`` is + ``abc.example.test`` and ``server_name`` is ``example.test``, + ``subdomain`` is determined to be ``abc``. If the ``server_name`` is not + a suffix of the current ``Host``, then :attr:`default_subdomain` or + ``""`` is used. + + :param environ: The WSGI environ for the request. Can also be a + ``Request`` with an ``environ`` attribute. + :param server_name: When subdomain matching is enabled and ``subdomain`` + is not given, the subdomain is determined by removing this + ``host:port`` as a suffix from the request's ``Host``. If the scheme + is ``http``, ``https``, ``ws``, or ``wss``, the corresponding port + 80 or 443 will be removed. + :param subdomain: Route using this subdomain rather than determining it + using ``server_name``. + + .. versionchanged:: 3.2 + If ``server_name`` is not a suffix of ``Host``, + :attr:`default_subdomain` is used if set, rather than always + ``""``. + + .. versionchanged:: 3.2 + ``subdomain_matching`` can be disabled. + + .. versionchanged:: 3.2 + ``server_name`` is ignored if ``host_matching`` is enabled. .. versionchanged:: 1.0.0 - If the passed server name specifies port 443, it will match - if the incoming scheme is ``https`` without a port. + If ``server_name`` specifies port 443, it will match if the scheme + is ``https`` and ``Host`` does not specify a port. - .. versionchanged:: 1.0.0 - A warning is shown when the passed server name does not - match the incoming WSGI server name. + .. versionchanged:: 1.0 + A warning is shown when ``server_name`` is not a suffix of ``Host``. .. versionchanged:: 0.8 - This will no longer raise a ValueError when an unexpected server - name was passed. + ``""`` is used as the subdomain if ``server_name`` is not a + suffix of ``Host``, rather than raising ``ValueError``. .. versionchanged:: 0.5 - previously this method accepted a bogus `calculate_subdomain` - parameter that did not have any effect. It was removed because - of that. - - :param environ: a WSGI environment. - :param server_name: an optional server name hint (see above). - :param subdomain: optionally the current subdomain (see above). + Removed the ``calculate_subdomain`` parameter which was not used. """ env = _get_environ(environ) wsgi_server_name = get_host(env).lower() @@ -307,7 +323,7 @@ def bind_to_environ( if upgrade and env.get("HTTP_UPGRADE", "").lower() == "websocket": scheme = "wss" if scheme == "https" else "ws" - if server_name is None: + if server_name is None or self.host_matching: server_name = wsgi_server_name else: server_name = server_name.lower() @@ -318,24 +334,24 @@ def bind_to_environ( elif scheme in {"https", "wss"} and server_name.endswith(":443"): server_name = server_name[:-4] - if subdomain is None and not self.host_matching: + if subdomain is None and self.subdomain_matching and not self.host_matching: cur_server_name = wsgi_server_name.split(".") real_server_name = server_name.split(".") offset = -len(real_server_name) if cur_server_name[offset:] != real_server_name: - # This can happen even with valid configs if the server was - # accessed directly by IP address under some situations. - # Instead of raising an exception like in Werkzeug 0.7 or - # earlier we go by an invalid subdomain which will result - # in a 404 error on matching. + # Host does not have server_name as a suffix. This can happen if + # the server was accessed by IP, or other names point to it in + # DNS. Use a placeholder subdomain, which can result in a 404 on + # matching or be detected in a wildcard endpoint. warnings.warn( f"Current server name {wsgi_server_name!r} doesn't match configured" f" server name {server_name!r}", stacklevel=2, ) - subdomain = "" + subdomain = self.default_subdomain or "" else: + # Remove server_name as a suffix from Host to get the subdomain. subdomain = ".".join(filter(None, cur_server_name[:offset])) def _get_wsgi_string(name: str) -> str | None: From 3c0503fe8c35d2a31e0599ece0c4106c9356c15f Mon Sep 17 00:00:00 2001 From: David Lord Date: Tue, 12 Nov 2024 21:07:10 -0800 Subject: [PATCH 2/2] validate trusted_hosts during routing --- CHANGES.rst | 3 +++ src/werkzeug/routing/map.py | 15 +++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0a328dda8..b82802002 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,6 +9,9 @@ Unreleased matching. In ``bind_to_environ``, the ``server_name`` parameter is not used if ``host_matching`` is enabled. If ``default_subdomain`` is set, it is used if a subdomain could not be determined. :issue:`3005` +- If a request object is passed to ``Map.bind_to_environ``, the host is + validated against ``request.trusted_hosts``. An invalid host will raise a + 400 error. :issue:`3007` Version 3.1.3 diff --git a/src/werkzeug/routing/map.py b/src/werkzeug/routing/map.py index de488f3af..854f8ab68 100644 --- a/src/werkzeug/routing/map.py +++ b/src/werkzeug/routing/map.py @@ -16,6 +16,7 @@ from ..exceptions import HTTPException from ..exceptions import MethodNotAllowed from ..exceptions import NotFound +from ..sansio.request import Request as SansIORequest from ..urls import _urlencode from ..wsgi import get_host from .converters import DEFAULT_CONVERTERS @@ -278,7 +279,9 @@ def bind_to_environ( ``""`` is used. :param environ: The WSGI environ for the request. Can also be a - ``Request`` with an ``environ`` attribute. + ``Request`` with an ``environ`` attribute; in that case, its + :attr:`~.Request.host` is accessed to validate its + :attr:`~.Request.trusted_hosts`. :param server_name: When subdomain matching is enabled and ``subdomain`` is not given, the subdomain is determined by removing this ``host:port`` as a suffix from the request's ``Host``. If the scheme @@ -298,6 +301,10 @@ def bind_to_environ( .. versionchanged:: 3.2 ``server_name`` is ignored if ``host_matching`` is enabled. + .. versionchanged:: 3.2 + If the ``environ`` argument is a ``Request``, access ``request.host`` + to validate``request.trusted_hosts``. + .. versionchanged:: 1.0.0 If ``server_name`` specifies port 443, it will match if the scheme is ``https`` and ``Host`` does not specify a port. @@ -312,8 +319,12 @@ def bind_to_environ( .. versionchanged:: 0.5 Removed the ``calculate_subdomain`` parameter which was not used. """ + if isinstance(environ, SansIORequest): + wsgi_server_name = environ.host.lower() + else: + wsgi_server_name = get_host(environ).lower() + env = _get_environ(environ) - wsgi_server_name = get_host(env).lower() scheme = env["wsgi.url_scheme"] upgrade = any( v.strip() == "upgrade"