"""Functions for working with URLs.

Contains implementations of functions from :mod:`urllib.parse` that
handle bytes and strings.
"""
from __future__ import annotations

import codecs
import os
import re
import typing as t
import warnings
from urllib.parse import quote
from urllib.parse import unquote
from urllib.parse import urlencode
from urllib.parse import urlsplit
from urllib.parse import urlunsplit

from ._internal import _check_str_tuple
from ._internal import _decode_idna
from ._internal import _make_encode_wrapper
from ._internal import _to_str
from .datastructures import iter_multi_items

if t.TYPE_CHECKING:
    from . import datastructures as ds

# A regular expression for what a valid schema looks like
_scheme_re = re.compile(r"^[a-zA-Z0-9+-.]+$")

# Characters that are safe in any part of an URL.
_always_safe_chars = (
    "abcdefghijklmnopqrstuvwxyz"
    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    "0123456789"
    "-._~"
    "$!'()*+,;"  # RFC3986 sub-delims set, not including query string delimiters &=
)
_always_safe = frozenset(_always_safe_chars.encode("ascii"))

_hexdigits = "0123456789ABCDEFabcdef"
_hextobyte = {
    f"{a}{b}".encode("ascii"): int(f"{a}{b}", 16)
    for a in _hexdigits
    for b in _hexdigits
}
_bytetohex = [f"%{char:02X}".encode("ascii") for char in range(256)]


class _URLTuple(t.NamedTuple):
    scheme: str
    netloc: str
    path: str
    query: str
    fragment: str


class BaseURL(_URLTuple):
    """Superclass of :py:class:`URL` and :py:class:`BytesURL`.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead.
    """

    __slots__ = ()
    _at: str
    _colon: str
    _lbracket: str
    _rbracket: str

    def __new__(cls, *args: t.Any, **kwargs: t.Any) -> BaseURL:
        warnings.warn(
            f"'werkzeug.urls.{cls.__name__}' is deprecated and will be removed in"
            " Werkzeug 3.0. Use the 'urllib.parse' library instead.",
            DeprecationWarning,
            stacklevel=2,
        )
        return super().__new__(cls, *args, **kwargs)

    def __str__(self) -> str:
        return self.to_url()

    def replace(self, **kwargs: t.Any) -> BaseURL:
        """Return an URL with the same values, except for those parameters
        given new values by whichever keyword arguments are specified."""
        return self._replace(**kwargs)

    @property
    def host(self) -> str | None:
        """The host part of the URL if available, otherwise `None`.  The
        host is either the hostname or the IP address mentioned in the
        URL.  It will not contain the port.
        """
        return self._split_host()[0]

    @property
    def ascii_host(self) -> str | None:
        """Works exactly like :attr:`host` but will return a result that
        is restricted to ASCII.  If it finds a netloc that is not ASCII
        it will attempt to idna decode it.  This is useful for socket
        operations when the URL might include internationalized characters.
        """
        rv = self.host
        if rv is not None and isinstance(rv, str):
            try:
                rv = rv.encode("idna").decode("ascii")
            except UnicodeError:
                pass
        return rv

    @property
    def port(self) -> int | None:
        """The port in the URL as an integer if it was present, `None`
        otherwise.  This does not fill in default ports.
        """
        try:
            rv = int(_to_str(self._split_host()[1]))
            if 0 <= rv <= 65535:
                return rv
        except (ValueError, TypeError):
            pass
        return None

    @property
    def auth(self) -> str | None:
        """The authentication part in the URL if available, `None`
        otherwise.
        """
        return self._split_netloc()[0]

    @property
    def username(self) -> str | None:
        """The username if it was part of the URL, `None` otherwise.
        This undergoes URL decoding and will always be a string.
        """
        rv = self._split_auth()[0]
        if rv is not None:
            return _url_unquote_legacy(rv)
        return None

    @property
    def raw_username(self) -> str | None:
        """The username if it was part of the URL, `None` otherwise.
        Unlike :attr:`username` this one is not being decoded.
        """
        return self._split_auth()[0]

    @property
    def password(self) -> str | None:
        """The password if it was part of the URL, `None` otherwise.
        This undergoes URL decoding and will always be a string.
        """
        rv = self._split_auth()[1]
        if rv is not None:
            return _url_unquote_legacy(rv)
        return None

    @property
    def raw_password(self) -> str | None:
        """The password if it was part of the URL, `None` otherwise.
        Unlike :attr:`password` this one is not being decoded.
        """
        return self._split_auth()[1]

    def decode_query(self, *args: t.Any, **kwargs: t.Any) -> ds.MultiDict[str, str]:
        """Decodes the query part of the URL.  Ths is a shortcut for
        calling :func:`url_decode` on the query argument.  The arguments and
        keyword arguments are forwarded to :func:`url_decode` unchanged.
        """
        return url_decode(self.query, *args, **kwargs)

    def join(self, *args: t.Any, **kwargs: t.Any) -> BaseURL:
        """Joins this URL with another one.  This is just a convenience
        function for calling into :meth:`url_join` and then parsing the
        return value again.
        """
        return url_parse(url_join(self, *args, **kwargs))

    def to_url(self) -> str:
        """Returns a URL string or bytes depending on the type of the
        information stored.  This is just a convenience function
        for calling :meth:`url_unparse` for this URL.
        """
        return url_unparse(self)

    def encode_netloc(self) -> str:
        """Encodes the netloc part to an ASCII safe URL as bytes."""
        rv = self.ascii_host or ""
        if ":" in rv:
            rv = f"[{rv}]"
        port = self.port
        if port is not None:
            rv = f"{rv}:{port}"
        auth = ":".join(
            filter(
                None,
                [
                    url_quote(self.raw_username or "", "utf-8", "strict", "/:%"),
                    url_quote(self.raw_password or "", "utf-8", "strict", "/:%"),
                ],
            )
        )
        if auth:
            rv = f"{auth}@{rv}"
        return rv

    def decode_netloc(self) -> str:
        """Decodes the netloc part into a string."""
        host = self.host or ""

        if isinstance(host, bytes):
            host = host.decode()

        rv = _decode_idna(host)

        if ":" in rv:
            rv = f"[{rv}]"
        port = self.port
        if port is not None:
            rv = f"{rv}:{port}"
        auth = ":".join(
            filter(
                None,
                [
                    _url_unquote_legacy(self.raw_username or "", "/:%@"),
                    _url_unquote_legacy(self.raw_password or "", "/:%@"),
                ],
            )
        )
        if auth:
            rv = f"{auth}@{rv}"
        return rv

    def to_uri_tuple(self) -> BaseURL:
        """Returns a :class:`BytesURL` tuple that holds a URI.  This will
        encode all the information in the URL properly to ASCII using the
        rules a web browser would follow.

        It's usually more interesting to directly call :meth:`iri_to_uri` which
        will return a string.
        """
        return url_parse(iri_to_uri(self))

    def to_iri_tuple(self) -> BaseURL:
        """Returns a :class:`URL` tuple that holds a IRI.  This will try
        to decode as much information as possible in the URL without
        losing information similar to how a web browser does it for the
        URL bar.

        It's usually more interesting to directly call :meth:`uri_to_iri` which
        will return a string.
        """
        return url_parse(uri_to_iri(self))

    def get_file_location(
        self, pathformat: str | None = None
    ) -> tuple[str | None, str | None]:
        """Returns a tuple with the location of the file in the form
        ``(server, location)``.  If the netloc is empty in the URL or
        points to localhost, it's represented as ``None``.

        The `pathformat` by default is autodetection but needs to be set
        when working with URLs of a specific system.  The supported values
        are ``'windows'`` when working with Windows or DOS paths and
        ``'posix'`` when working with posix paths.

        If the URL does not point to a local file, the server and location
        are both represented as ``None``.

        :param pathformat: The expected format of the path component.
                           Currently ``'windows'`` and ``'posix'`` are
                           supported.  Defaults to ``None`` which is
                           autodetect.
        """
        if self.scheme != "file":
            return None, None

        path = url_unquote(self.path)
        host = self.netloc or None

        if pathformat is None:
            if os.name == "nt":
                pathformat = "windows"
            else:
                pathformat = "posix"

        if pathformat == "windows":
            if path[:1] == "/" and path[1:2].isalpha() and path[2:3] in "|:":
                path = f"{path[1:2]}:{path[3:]}"
            windows_share = path[:3] in ("\\" * 3, "/" * 3)
            import ntpath

            path = ntpath.normpath(path)
            # Windows shared drives are represented as ``\\host\\directory``.
            # That results in a URL like ``file://///host/directory``, and a
            # path like ``///host/directory``. We need to special-case this
            # because the path contains the hostname.
            if windows_share and host is None:
                parts = path.lstrip("\\").split("\\", 1)
                if len(parts) == 2:
                    host, path = parts
                else:
                    host = parts[0]
                    path = ""
        elif pathformat == "posix":
            import posixpath

            path = posixpath.normpath(path)
        else:
            raise TypeError(f"Invalid path format {pathformat!r}")

        if host in ("127.0.0.1", "::1", "localhost"):
            host = None

        return host, path

    def _split_netloc(self) -> tuple[str | None, str]:
        if self._at in self.netloc:
            auth, _, netloc = self.netloc.partition(self._at)
            return auth, netloc
        return None, self.netloc

    def _split_auth(self) -> tuple[str | None, str | None]:
        auth = self._split_netloc()[0]
        if not auth:
            return None, None
        if self._colon not in auth:
            return auth, None

        username, _, password = auth.partition(self._colon)
        return username, password

    def _split_host(self) -> tuple[str | None, str | None]:
        rv = self._split_netloc()[1]
        if not rv:
            return None, None

        if not rv.startswith(self._lbracket):
            if self._colon in rv:
                host, _, port = rv.partition(self._colon)
                return host, port
            return rv, None

        idx = rv.find(self._rbracket)
        if idx < 0:
            return rv, None

        host = rv[1:idx]
        rest = rv[idx + 1 :]
        if rest.startswith(self._colon):
            return host, rest[1:]
        return host, None


class URL(BaseURL):
    """Represents a parsed URL.  This behaves like a regular tuple but
    also has some extra attributes that give further insight into the
    URL.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead.
    """

    __slots__ = ()
    _at = "@"
    _colon = ":"
    _lbracket = "["
    _rbracket = "]"

    def encode(self, charset: str = "utf-8", errors: str = "replace") -> BytesURL:
        """Encodes the URL to a tuple made out of bytes.  The charset is
        only being used for the path, query and fragment.
        """
        return BytesURL(
            self.scheme.encode("ascii"),
            self.encode_netloc(),
            self.path.encode(charset, errors),
            self.query.encode(charset, errors),
            self.fragment.encode(charset, errors),
        )


class BytesURL(BaseURL):
    """Represents a parsed URL in bytes.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead.
    """

    __slots__ = ()
    _at = b"@"  # type: ignore
    _colon = b":"  # type: ignore
    _lbracket = b"["  # type: ignore
    _rbracket = b"]"  # type: ignore

    def __str__(self) -> str:
        return self.to_url().decode("utf-8", "replace")  # type: ignore

    def encode_netloc(self) -> bytes:  # type: ignore
        """Returns the netloc unchanged as bytes."""
        return self.netloc  # type: ignore

    def decode(self, charset: str = "utf-8", errors: str = "replace") -> URL:
        """Decodes the URL to a tuple made out of strings.  The charset is
        only being used for the path, query and fragment.
        """
        return URL(
            self.scheme.decode("ascii"),  # type: ignore
            self.decode_netloc(),
            self.path.decode(charset, errors),  # type: ignore
            self.query.decode(charset, errors),  # type: ignore
            self.fragment.decode(charset, errors),  # type: ignore
        )


_unquote_maps: dict[frozenset[int], dict[bytes, int]] = {frozenset(): _hextobyte}


def _unquote_to_bytes(string: str | bytes, unsafe: str | bytes = "") -> bytes:
    if isinstance(string, str):
        string = string.encode("utf-8")

    if isinstance(unsafe, str):
        unsafe = unsafe.encode("utf-8")

    unsafe = frozenset(bytearray(unsafe))
    groups = iter(string.split(b"%"))
    result = bytearray(next(groups, b""))

    try:
        hex_to_byte = _unquote_maps[unsafe]
    except KeyError:
        hex_to_byte = _unquote_maps[unsafe] = {
            h: b for h, b in _hextobyte.items() if b not in unsafe
        }

    for group in groups:
        code = group[:2]

        if code in hex_to_byte:
            result.append(hex_to_byte[code])
            result.extend(group[2:])
        else:
            result.append(37)  # %
            result.extend(group)

    return bytes(result)


def _url_encode_impl(
    obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]],
    charset: str,
    sort: bool,
    key: t.Callable[[tuple[str, str]], t.Any] | None,
) -> t.Iterator[str]:
    from .datastructures import iter_multi_items

    iterable: t.Iterable[tuple[str, str]] = iter_multi_items(obj)

    if sort:
        iterable = sorted(iterable, key=key)

    for key_str, value_str in iterable:
        if value_str is None:
            continue

        if not isinstance(key_str, bytes):
            key_bytes = str(key_str).encode(charset)
        else:
            key_bytes = key_str

        if not isinstance(value_str, bytes):
            value_bytes = str(value_str).encode(charset)
        else:
            value_bytes = value_str

        yield f"{_fast_url_quote_plus(key_bytes)}={_fast_url_quote_plus(value_bytes)}"


def _url_unquote_legacy(value: str, unsafe: str = "") -> str:
    try:
        return url_unquote(value, charset="utf-8", errors="strict", unsafe=unsafe)
    except UnicodeError:
        return url_unquote(value, charset="latin1", unsafe=unsafe)


def url_parse(
    url: str, scheme: str | None = None, allow_fragments: bool = True
) -> BaseURL:
    """Parses a URL from a string into a :class:`URL` tuple.  If the URL
    is lacking a scheme it can be provided as second argument. Otherwise,
    it is ignored.  Optionally fragments can be stripped from the URL
    by setting `allow_fragments` to `False`.

    The inverse of this function is :func:`url_unparse`.

    :param url: the URL to parse.
    :param scheme: the default schema to use if the URL is schemaless.
    :param allow_fragments: if set to `False` a fragment will be removed
                            from the URL.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 3.0. Use ``urllib.parse.urlsplit`` instead.
    """
    warnings.warn(
        "'werkzeug.urls.url_parse' is deprecated and will be removed in Werkzeug 3.0."
        " Use 'urllib.parse.urlsplit' instead.",
        DeprecationWarning,
        stacklevel=2,
    )
    s = _make_encode_wrapper(url)
    is_text_based = isinstance(url, str)

    if scheme is None:
        scheme = s("")
    netloc = query = fragment = s("")
    i = url.find(s(":"))
    if i > 0 and _scheme_re.match(_to_str(url[:i], errors="replace")):
        # make sure "iri" is not actually a port number (in which case
        # "scheme" is really part of the path)
        rest = url[i + 1 :]
        if not rest or any(c not in s("0123456789") for c in rest):
            # not a port number
            scheme, url = url[:i].lower(), rest

    if url[:2] == s("//"):
        delim = len(url)
        for c in s("/?#"):
            wdelim = url.find(c, 2)
            if wdelim >= 0:
                delim = min(delim, wdelim)
        netloc, url = url[2:delim], url[delim:]
        if (s("[") in netloc and s("]") not in netloc) or (
            s("]") in netloc and s("[") not in netloc
        ):
            raise ValueError("Invalid IPv6 URL")

    if allow_fragments and s("#") in url:
        url, fragment = url.split(s("#"), 1)
    if s("?") in url:
        url, query = url.split(s("?"), 1)

    result_type = URL if is_text_based else BytesURL

    return result_type(scheme, netloc, url, query, fragment)


def _make_fast_url_quote(
    charset: str = "utf-8",
    errors: str = "strict",
    safe: str | bytes = "/:",
    unsafe: str | bytes = "",
) -> t.Callable[[bytes], str]:
    """Precompile the translation table for a URL encoding function.

    Unlike :func:`url_quote`, the generated function only takes the
    string to quote.

    :param charset: The charset to encode the result with.
    :param errors: How to handle encoding errors.
    :param safe: An optional sequence of safe characters to never encode.
    :param unsafe: An optional sequence of unsafe characters to always encode.
    """
    if isinstance(safe, str):
        safe = safe.encode(charset, errors)

    if isinstance(unsafe, str):
        unsafe = unsafe.encode(charset, errors)

    safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe))
    table = [chr(c) if c in safe else f"%{c:02X}" for c in range(256)]

    def quote(string: bytes) -> str:
        return "".join([table[c] for c in string])

    return quote


_fast_url_quote = _make_fast_url_quote()
_fast_quote_plus = _make_fast_url_quote(safe=" ", unsafe="+")


def _fast_url_quote_plus(string: bytes) -> str:
    return _fast_quote_plus(string).replace(" ", "+")


def url_quote(
    string: str | bytes,
    charset: str = "utf-8",
    errors: str = "strict",
    safe: str | bytes = "/:",
    unsafe: str | bytes = "",
) -> str:
    """URL encode a single string with a given encoding.

    :param s: the string to quote.
    :param charset: the charset to be used.
    :param safe: an optional sequence of safe characters.
    :param unsafe: an optional sequence of unsafe characters.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 3.0. Use ``urllib.parse.quote`` instead.

    .. versionadded:: 0.9.2
       The `unsafe` parameter was added.
    """
    warnings.warn(
        "'werkzeug.urls.url_quote' is deprecated and will be removed in Werkzeug 3.0."
        " Use 'urllib.parse.quote' instead.",
        DeprecationWarning,
        stacklevel=2,
    )

    if not isinstance(string, (str, bytes, bytearray)):
        string = str(string)
    if isinstance(string, str):
        string = string.encode(charset, errors)
    if isinstance(safe, str):
        safe = safe.encode(charset, errors)
    if isinstance(unsafe, str):
        unsafe = unsafe.encode(charset, errors)
    safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe))
    rv = bytearray()
    for char in bytearray(string):
        if char in safe:
            rv.append(char)
        else:
            rv.extend(_bytetohex[char])
    return bytes(rv).decode(charset)


def url_quote_plus(
    string: str, charset: str = "utf-8", errors: str = "strict", safe: str = ""
) -> str:
    """URL encode a single string with the given encoding and convert
    whitespace to "+".

    :param s: The string to quote.
    :param charset: The charset to be used.
    :param safe: An optional sequence of safe characters.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 3.0. Use ``urllib.parse.quote_plus`` instead.
    """
    warnings.warn(
        "'werkzeug.urls.url_quote_plus' is deprecated and will be removed in Werkzeug"
        " 2.4. Use 'urllib.parse.quote_plus' instead.",
        DeprecationWarning,
        stacklevel=2,
    )

    return url_quote(string, charset, errors, safe + " ", "+").replace(" ", "+")


def url_unparse(components: tuple[str, str, str, str, str]) -> str:
    """The reverse operation to :meth:`url_parse`.  This accepts arbitrary
    as well as :class:`URL` tuples and returns a URL as a string.

    :param components: the parsed URL as tuple which should be converted
                       into a URL string.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 3.0. Use ``urllib.parse.urlunsplit`` instead.
    """
    warnings.warn(
        "'werkzeug.urls.url_unparse' is deprecated and will be removed in Werkzeug 3.0."
        " Use 'urllib.parse.urlunsplit' instead.",
        DeprecationWarning,
        stacklevel=2,
    )
    _check_str_tuple(components)
    scheme, netloc, path, query, fragment = components
    s = _make_encode_wrapper(scheme)
    url = s("")

    # We generally treat file:///x and file:/x the same which is also
    # what browsers seem to do.  This also allows us to ignore a schema
    # register for netloc utilization or having to differentiate between
    # empty and missing netloc.
    if netloc or (scheme and path.startswith(s("/"))):
        if path and path[:1] != s("/"):
            path = s("/") + path
        url = s("//") + (netloc or s("")) + path
    elif path:
        url += path
    if scheme:
        url = scheme + s(":") + url
    if query:
        url = url + s("?") + query
    if fragment:
        url = url + s("#") + fragment
    return url


def url_unquote(
    s: str | bytes,
    charset: str = "utf-8",
    errors: str = "replace",
    unsafe: str = "",
) -> str:
    """URL decode a single string with a given encoding.  If the charset
    is set to `None` no decoding is performed and raw bytes are
    returned.

    :param s: the string to unquote.
    :param charset: the charset of the query string.  If set to `None`
        no decoding will take place.
    :param errors: the error handling for the charset decoding.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 3.0. Use ``urllib.parse.unquote`` instead.
    """
    warnings.warn(
        "'werkzeug.urls.url_unquote' is deprecated and will be removed in Werkzeug 3.0."
        " Use 'urllib.parse.unquote' instead.",
        DeprecationWarning,
        stacklevel=2,
    )
    rv = _unquote_to_bytes(s, unsafe)
    if charset is None:
        return rv
    return rv.decode(charset, errors)


def url_unquote_plus(
    s: str | bytes, charset: str = "utf-8", errors: str = "replace"
) -> str:
    """URL decode a single string with the given `charset` and decode "+" to
    whitespace.

    Per default encoding errors are ignored.  If you want a different behavior
    you can set `errors` to ``'replace'`` or ``'strict'``.

    :param s: The string to unquote.
    :param charset: the charset of the query string.  If set to `None`
        no decoding will take place.
    :param errors: The error handling for the `charset` decoding.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 3.0. Use ``urllib.parse.unquote_plus`` instead.
    """
    warnings.warn(
        "'werkzeug.urls.url_unquote_plus' is deprecated and will be removed in Werkzeug"
        " 2.4. Use 'urllib.parse.unquote_plus' instead.",
        DeprecationWarning,
        stacklevel=2,
    )

    if isinstance(s, str):
        s = s.replace("+", " ")
    else:
        s = s.replace(b"+", b" ")

    return url_unquote(s, charset, errors)


def url_fix(s: str, charset: str = "utf-8") -> str:
    r"""Sometimes you get an URL by a user that just isn't a real URL because
    it contains unsafe characters like ' ' and so on. This function can fix
    some of the problems in a similar way browsers handle data entered by the
    user:

    >>> url_fix('http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)')
    'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)'

    :param s: the string with the URL to fix.
    :param charset: The target charset for the URL if the url was given
        as a string.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 3.0.
    """
    warnings.warn(
        "'werkzeug.urls.url_fix' is deprecated and will be removed in Werkzeug 3.0.",
        DeprecationWarning,
        stacklevel=2,
    )
    # First step is to switch to text processing and to convert
    # backslashes (which are invalid in URLs anyways) to slashes.  This is
    # consistent with what Chrome does.
    s = _to_str(s, charset, "replace").replace("\\", "/")

    # For the specific case that we look like a malformed windows URL
    # we want to fix this up manually:
    if s.startswith("file://") and s[7:8].isalpha() and s[8:10] in (":/", "|/"):
        s = f"file:///{s[7:]}"

    url = url_parse(s)
    path = url_quote(url.path, charset, safe="/%+$!*'(),")
    qs = url_quote_plus(url.query, charset, safe=":&%=+$!*'(),")
    anchor = url_quote_plus(url.fragment, charset, safe=":&%=+$!*'(),")
    return url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor))


def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]:
    """Used in :func:`uri_to_iri` after unquoting to re-quote any
    invalid bytes.
    """
    # the docs state that UnicodeError does have these attributes,
    # but mypy isn't picking them up
    out = quote(e.object[e.start : e.end], safe="")  # type: ignore
    return out, e.end  # type: ignore


codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)


def _make_unquote_part(name: str, chars: str) -> t.Callable[[str, str, str], str]:
    """Create a function that unquotes all percent encoded characters except those
    given. This allows working with unquoted characters if possible while not changing
    the meaning of a given part of a URL.
    """
    choices = "|".join(f"{ord(c):02X}" for c in sorted(chars))
    pattern = re.compile(f"((?:%(?:{choices}))+)", re.I)

    def _unquote_partial(value: str, encoding: str, errors: str) -> str:
        parts = iter(pattern.split(value))
        out = []

        for part in parts:
            out.append(unquote(part, encoding, errors))
            out.append(next(parts, ""))

        return "".join(out)

    _unquote_partial.__name__ = f"_unquote_{name}"
    return _unquote_partial


# characters that should remain quoted in URL parts
# based on https://url.spec.whatwg.org/#percent-encoded-bytes
# always keep all controls, space, and % quoted
_always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode()
_unquote_fragment = _make_unquote_part("fragment", _always_unsafe)
_unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#")
_unquote_path = _make_unquote_part("path", _always_unsafe + "/?#")
_unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#")


def uri_to_iri(
    uri: str | tuple[str, str, str, str, str],
    charset: str | None = None,
    errors: str | None = None,
) -> str:
    """Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
    leaving all reserved and invalid characters quoted. If the URL has
    a domain, it is decoded from Punycode.

    >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
    'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'

    :param uri: The URI to convert.
    :param charset: The encoding to encode unquoted bytes with.
    :param errors: Error handler to use during ``bytes.encode``. By
        default, invalid bytes are left quoted.

    .. versionchanged:: 2.3
        Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters, are
        deprecated and will be removed in Werkzeug 3.0.

    .. versionchanged:: 2.3
        Which characters remain quoted is specific to each part of the URL.

    .. versionchanged:: 0.15
        All reserved and invalid characters remain quoted. Previously,
        only some reserved characters were preserved, and invalid bytes
        were replaced instead of left quoted.

    .. versionadded:: 0.6
    """
    if isinstance(uri, tuple):
        warnings.warn(
            "Passing a tuple is deprecated and will not be supported in Werkzeug 3.0.",
            DeprecationWarning,
            stacklevel=2,
        )
        uri = urlunsplit(uri)

    if isinstance(uri, bytes):
        warnings.warn(
            "Passing bytes is deprecated and will not be supported in Werkzeug 3.0.",
            DeprecationWarning,
            stacklevel=2,
        )
        uri = uri.decode()

    if charset is not None:
        warnings.warn(
            "The 'charset' parameter is deprecated and will be removed"
            " in Werkzeug 3.0.",
            DeprecationWarning,
            stacklevel=2,
        )
    else:
        charset = "utf-8"

    if errors is not None:
        warnings.warn(
            "The 'errors' parameter is deprecated and will be removed in Werkzeug 3.0.",
            DeprecationWarning,
            stacklevel=2,
        )
    else:
        errors = "werkzeug.url_quote"

    parts = urlsplit(uri)
    path = _unquote_path(parts.path, charset, errors)
    query = _unquote_query(parts.query, charset, errors)
    fragment = _unquote_fragment(parts.fragment, charset, errors)

    if parts.hostname:
        netloc = _decode_idna(parts.hostname)
    else:
        netloc = ""

    if ":" in netloc:
        netloc = f"[{netloc}]"

    if parts.port:
        netloc = f"{netloc}:{parts.port}"

    if parts.username:
        auth = _unquote_user(parts.username, charset, errors)

        if parts.password:
            auth = f"{auth}:{_unquote_user(parts.password, charset, errors)}"

        netloc = f"{auth}@{netloc}"

    return urlunsplit((parts.scheme, netloc, path, query, fragment))


def iri_to_uri(
    iri: str | tuple[str, str, str, str, str],
    charset: str | None = None,
    errors: str | None = None,
    safe_conversion: bool | None = None,
) -> str:
    """Convert an IRI to a URI. All non-ASCII and unsafe characters are
    quoted. If the URL has a domain, it is encoded to Punycode.

    >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')
    'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'

    :param iri: The IRI to convert.
    :param charset: The encoding of the IRI.
    :param errors: Error handler to use during ``bytes.encode``.

    .. versionchanged:: 2.3
        Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters, are
        deprecated and will be removed in Werkzeug 3.0.

    .. versionchanged:: 2.3
        Which characters remain unquoted is specific to each part of the URL.

    .. versionchanged:: 2.3
        The ``safe_conversion`` parameter is deprecated and will be removed in Werkzeug
        2.4.

    .. versionchanged:: 0.15
        All reserved characters remain unquoted. Previously, only some reserved
        characters were left unquoted.

    .. versionchanged:: 0.9.6
       The ``safe_conversion`` parameter was added.

    .. versionadded:: 0.6
    """
    if charset is not None:
        warnings.warn(
            "The 'charset' parameter is deprecated and will be removed"
            " in Werkzeug 3.0.",
            DeprecationWarning,
            stacklevel=2,
        )
    else:
        charset = "utf-8"

    if isinstance(iri, tuple):
        warnings.warn(
            "Passing a tuple is deprecated and will not be supported in Werkzeug 3.0.",
            DeprecationWarning,
            stacklevel=2,
        )
        iri = urlunsplit(iri)

    if isinstance(iri, bytes):
        warnings.warn(
            "Passing bytes is deprecated and will not be supported in Werkzeug 3.0.",
            DeprecationWarning,
            stacklevel=2,
        )
        iri = iri.decode(charset)

    if errors is not None:
        warnings.warn(
            "The 'errors' parameter is deprecated and will be removed in Werkzeug 3.0.",
            DeprecationWarning,
            stacklevel=2,
        )
    else:
        errors = "strict"

    if safe_conversion is not None:
        warnings.warn(
            "The 'safe_conversion' parameter is deprecated and will be removed in"
            " Werkzeug 3.0.",
            DeprecationWarning,
            stacklevel=2,
        )

    if safe_conversion:
        # If we're not sure if it's safe to normalize the URL, and it only contains
        # ASCII characters, return it as-is.
        try:
            ascii_iri = iri.encode("ascii")

            # Only return if it doesn't have whitespace. (Why?)
            if len(ascii_iri.split()) == 1:
                return iri
        except UnicodeError:
            pass

    parts = urlsplit(iri)
    # safe = https://url.spec.whatwg.org/#url-path-segment-string
    # as well as percent for things that are already quoted
    path = quote(parts.path, safe="%!$&'()*+,/:;=@", encoding=charset, errors=errors)
    query = quote(parts.query, safe="%!$&'()*+,/:;=?@", encoding=charset, errors=errors)
    fragment = quote(
        parts.fragment, safe="%!#$&'()*+,/:;=?@", encoding=charset, errors=errors
    )

    if parts.hostname:
        netloc = parts.hostname.encode("idna").decode("ascii")
    else:
        netloc = ""

    if ":" in netloc:
        netloc = f"[{netloc}]"

    if parts.port:
        netloc = f"{netloc}:{parts.port}"

    if parts.username:
        auth = quote(parts.username, safe="%!$&'()*+,;=")

        if parts.password:
            pass_quoted = quote(parts.password, safe="%!$&'()*+,;=")
            auth = f"{auth}:{pass_quoted}"

        netloc = f"{auth}@{netloc}"

    return urlunsplit((parts.scheme, netloc, path, query, fragment))


def _invalid_iri_to_uri(iri: str) -> str:
    """The URL scheme ``itms-services://`` must contain the ``//`` even though it does
    not have a host component. There may be other invalid schemes as well. Currently,
    responses will always call ``iri_to_uri`` on the redirect ``Location`` header, which
    removes the ``//``. For now, if the IRI only contains ASCII and does not contain
    spaces, pass it on as-is. In Werkzeug 3.0, this should become a
    ``response.process_location`` flag.

    :meta private:
    """
    try:
        iri.encode("ascii")
    except UnicodeError:
        pass
    else:
        if len(iri.split(None, 1)) == 1:
            return iri

    return iri_to_uri(iri)


def url_decode(
    s: t.AnyStr,
    charset: str = "utf-8",
    include_empty: bool = True,
    errors: str = "replace",
    separator: str = "&",
    cls: type[ds.MultiDict] | None = None,
) -> ds.MultiDict[str, str]:
    """Parse a query string and return it as a :class:`MultiDict`.

    :param s: The query string to parse.
    :param charset: Decode bytes to string with this charset. If not
        given, bytes are returned as-is.
    :param include_empty: Include keys with empty values in the dict.
    :param errors: Error handling behavior when decoding bytes.
    :param separator: Separator character between pairs.
    :param cls: Container to hold result instead of :class:`MultiDict`.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 3.0. Use ``urllib.parse.parse_qs`` instead.

    .. versionchanged:: 2.1
        The ``decode_keys`` parameter was removed.

    .. versionchanged:: 0.5
        In previous versions ";" and "&" could be used for url decoding.
        Now only "&" is supported. If you want to use ";", a different
        ``separator`` can be provided.

    .. versionchanged:: 0.5
        The ``cls`` parameter was added.
    """
    warnings.warn(
        "'werkzeug.urls.url_decode' is deprecated and will be removed in Werkzeug 2.4."
        " Use 'urllib.parse.parse_qs' instead.",
        DeprecationWarning,
        stacklevel=2,
    )

    if cls is None:
        from .datastructures import MultiDict  # noqa: F811

        cls = MultiDict
    if isinstance(s, str) and not isinstance(separator, str):
        separator = separator.decode(charset or "ascii")
    elif isinstance(s, bytes) and not isinstance(separator, bytes):
        separator = separator.encode(charset or "ascii")  # type: ignore
    return cls(
        _url_decode_impl(
            s.split(separator), charset, include_empty, errors  # type: ignore
        )
    )


def url_decode_stream(
    stream: t.IO[bytes],
    charset: str = "utf-8",
    include_empty: bool = True,
    errors: str = "replace",
    separator: bytes = b"&",
    cls: type[ds.MultiDict] | None = None,
    limit: int | None = None,
) -> ds.MultiDict[str, str]:
    """Works like :func:`url_decode` but decodes a stream.  The behavior
    of stream and limit follows functions like
    :func:`~werkzeug.wsgi.make_line_iter`.  The generator of pairs is
    directly fed to the `cls` so you can consume the data while it's
    parsed.

    :param stream: a stream with the encoded querystring
    :param charset: the charset of the query string.  If set to `None`
        no decoding will take place.
    :param include_empty: Set to `False` if you don't want empty values to
                          appear in the dict.
    :param errors: the decoding error behavior.
    :param separator: the pair separator to be used, defaults to ``&``
    :param cls: an optional dict class to use.  If this is not specified
                       or `None` the default :class:`MultiDict` is used.
    :param limit: the content length of the URL data.  Not necessary if
                  a limited stream is provided.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 2.4. Use ``urllib.parse.parse_qs`` instead.

    .. versionchanged:: 2.1
        The ``decode_keys`` and ``return_iterator`` parameters were removed.

    .. versionadded:: 0.8
    """
    warnings.warn(
        "'werkzeug.urls.url_decode_stream' is deprecated and will be removed in"
        " Werkzeug 2.4. Use 'urllib.parse.parse_qs' instead.",
        DeprecationWarning,
        stacklevel=2,
    )

    from .wsgi import make_chunk_iter

    pair_iter = make_chunk_iter(stream, separator, limit)
    decoder = _url_decode_impl(pair_iter, charset, include_empty, errors)

    if cls is None:
        from .datastructures import MultiDict  # noqa: F811

        cls = MultiDict

    return cls(decoder)


def _url_decode_impl(
    pair_iter: t.Iterable[t.AnyStr], charset: str, include_empty: bool, errors: str
) -> t.Iterator[tuple[str, str]]:
    for pair in pair_iter:
        if not pair:
            continue
        s = _make_encode_wrapper(pair)
        equal = s("=")
        if equal in pair:
            key, value = pair.split(equal, 1)
        else:
            if not include_empty:
                continue
            key = pair
            value = s("")
        yield (
            url_unquote_plus(key, charset, errors),
            url_unquote_plus(value, charset, errors),
        )


def url_encode(
    obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]],
    charset: str = "utf-8",
    sort: bool = False,
    key: t.Callable[[tuple[str, str]], t.Any] | None = None,
    separator: str = "&",
) -> str:
    """URL encode a dict/`MultiDict`.  If a value is `None` it will not appear
    in the result string.  Per default only values are encoded into the target
    charset strings.

    :param obj: the object to encode into a query string.
    :param charset: the charset of the query string.
    :param sort: set to `True` if you want parameters to be sorted by `key`.
    :param separator: the separator to be used for the pairs.
    :param key: an optional function to be used for sorting.  For more details
                check out the :func:`sorted` documentation.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 2.4. Use ``urllib.parse.urlencode`` instead.

    .. versionchanged:: 2.1
        The ``encode_keys`` parameter was removed.

    .. versionchanged:: 0.5
        Added the ``sort``, ``key``, and ``separator`` parameters.
    """
    warnings.warn(
        "'werkzeug.urls.url_encode' is deprecated and will be removed in Werkzeug 2.4."
        " Use 'urllib.parse.urlencode' instead.",
        DeprecationWarning,
        stacklevel=2,
    )
    separator = _to_str(separator, "ascii")
    return separator.join(_url_encode_impl(obj, charset, sort, key))


def url_encode_stream(
    obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]],
    stream: t.IO[str] | None = None,
    charset: str = "utf-8",
    sort: bool = False,
    key: t.Callable[[tuple[str, str]], t.Any] | None = None,
    separator: str = "&",
) -> None:
    """Like :meth:`url_encode` but writes the results to a stream
    object.  If the stream is `None` a generator over all encoded
    pairs is returned.

    :param obj: the object to encode into a query string.
    :param stream: a stream to write the encoded object into or `None` if
                   an iterator over the encoded pairs should be returned.  In
                   that case the separator argument is ignored.
    :param charset: the charset of the query string.
    :param sort: set to `True` if you want parameters to be sorted by `key`.
    :param separator: the separator to be used for the pairs.
    :param key: an optional function to be used for sorting.  For more details
                check out the :func:`sorted` documentation.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 2.4. Use ``urllib.parse.urlencode`` instead.

    .. versionchanged:: 2.1
        The ``encode_keys`` parameter was removed.

    .. versionadded:: 0.8
    """
    warnings.warn(
        "'werkzeug.urls.url_encode_stream' is deprecated and will be removed in"
        " Werkzeug 2.4. Use 'urllib.parse.urlencode' instead.",
        DeprecationWarning,
        stacklevel=2,
    )
    separator = _to_str(separator, "ascii")
    gen = _url_encode_impl(obj, charset, sort, key)
    if stream is None:
        return gen  # type: ignore
    for idx, chunk in enumerate(gen):
        if idx:
            stream.write(separator)
        stream.write(chunk)
    return None


def url_join(
    base: str | tuple[str, str, str, str, str],
    url: str | tuple[str, str, str, str, str],
    allow_fragments: bool = True,
) -> str:
    """Join a base URL and a possibly relative URL to form an absolute
    interpretation of the latter.

    :param base: the base URL for the join operation.
    :param url: the URL to join.
    :param allow_fragments: indicates whether fragments should be allowed.

    .. deprecated:: 2.3
        Will be removed in Werkzeug 2.4. Use ``urllib.parse.urljoin`` instead.
    """
    warnings.warn(
        "'werkzeug.urls.url_join' is deprecated and will be removed in Werkzeug 2.4."
        " Use 'urllib.parse.urljoin' instead.",
        DeprecationWarning,
        stacklevel=2,
    )

    if isinstance(base, tuple):
        base = url_unparse(base)
    if isinstance(url, tuple):
        url = url_unparse(url)

    _check_str_tuple((base, url))
    s = _make_encode_wrapper(base)

    if not base:
        return url
    if not url:
        return base

    bscheme, bnetloc, bpath, bquery, bfragment = url_parse(
        base, allow_fragments=allow_fragments
    )
    scheme, netloc, path, query, fragment = url_parse(url, bscheme, allow_fragments)
    if scheme != bscheme:
        return url
    if netloc:
        return url_unparse((scheme, netloc, path, query, fragment))
    netloc = bnetloc

    if path[:1] == s("/"):
        segments = path.split(s("/"))
    elif not path:
        segments = bpath.split(s("/"))
        if not query:
            query = bquery
    else:
        segments = bpath.split(s("/"))[:-1] + path.split(s("/"))

    # If the rightmost part is "./" we want to keep the slash but
    # remove the dot.
    if segments[-1] == s("."):
        segments[-1] = s("")

    # Resolve ".." and "."
    segments = [segment for segment in segments if segment != s(".")]
    while True:
        i = 1
        n = len(segments) - 1
        while i < n:
            if segments[i] == s("..") and segments[i - 1] not in (s(""), s("..")):
                del segments[i - 1 : i + 1]
                break
            i += 1
        else:
            break

    # Remove trailing ".." if the URL is absolute
    unwanted_marker = [s(""), s("..")]
    while segments[:2] == unwanted_marker:
        del segments[1]

    path = s("/").join(segments)
    return url_unparse((scheme, netloc, path, query, fragment))


def _urlencode(
    query: t.Mapping[str, str] | t.Iterable[tuple[str, str]], encoding: str = "utf-8"
) -> str:
    items = [x for x in iter_multi_items(query) if x[1] is not None]
    # safe = https://url.spec.whatwg.org/#percent-encoded-bytes
    return urlencode(items, safe="!$'()*,/:;?@", encoding=encoding)
