pleroma/lib/pleroma/utils/uri_encoding.ex
2025-12-10 14:56:06 +01:00

142 lines
4.8 KiB
Elixir

# Pleroma: A lightweight social networking server
# Copyright © 2017-2025 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Utils.URIEncoding do
@moduledoc """
Utility functions for dealing with URI encoding of paths and queries
with support for query-encoding quirks.
"""
require Pleroma.Constants
# We don't always want to decode the path first, like is the case in
# Pleroma.Upload.url_from_spec/3.
@doc """
Wraps URI encoding/decoding functions from Elixir's standard library to fix usually unintended side-effects.
Supports two URL processing options in the optional 2nd argument with the default being `false`:
* `bypass_parse` - Bypasses `URI.parse` stage, useful when it's not desirable to parse to URL first
before encoding it. Supports only encoding as the Path segment of a URI.
* `bypass_decode` - Bypasses `URI.decode` stage for the Path segment of a URI. Used when a URL
has to be double %-encoded for internal reasons.
Options must be specified as a Keyword with tuples with booleans, otherwise
`{:error, :invalid_opts}` is returned. Example:
`encode_url(url, [bypass_parse: true, bypass_decode: true])`
"""
@spec encode_url(String.t(), Keyword.t()) :: String.t() | {:error, :invalid_opts}
def encode_url(url, opts \\ []) when is_binary(url) and is_list(opts) do
bypass_parse = Keyword.get(opts, :bypass_parse, false)
bypass_decode = Keyword.get(opts, :bypass_decode, false)
with true <- is_boolean(bypass_parse),
true <- is_boolean(bypass_decode) do
cond do
bypass_parse ->
encode_path(url, bypass_decode)
true ->
URI.parse(url)
|> then(fn parsed ->
path = encode_path(parsed.path, bypass_decode)
query = encode_query(parsed.query, parsed.host)
%{parsed | path: path, query: query}
end)
|> URI.to_string()
end
else
_ -> {:error, :invalid_opts}
end
end
defp encode_path(nil, _bypass_decode), do: nil
# URI.encode/2 deliberately does not encode all chars that are forbidden
# in the path component of a URI. It only encodes chars that are forbidden
# in the whole URI. A predicate in the 2nd argument is used to fix that here.
# URI.encode/2 uses the predicate function to determine whether each byte
# (in an integer representation) should be encoded or not.
defp encode_path(path, bypass_decode) when is_binary(path) do
path =
cond do
bypass_decode ->
path
true ->
URI.decode(path)
end
path
|> URI.encode(fn byte ->
URI.char_unreserved?(byte) ||
Enum.any?(
Pleroma.Constants.uri_path_allowed_reserved_chars(),
fn char ->
char == byte
end
)
end)
end
# Order of kv pairs in query is not preserved when using URI.decode_query.
# URI.query_decoder/2 returns a stream which so far appears to not change order.
# Immediately switch to a list to prevent breakage for sites that expect
# the order of query keys to be always the same.
defp encode_query(query, host) when is_binary(query) do
query
|> URI.query_decoder()
|> Enum.to_list()
|> do_encode_query(host)
end
defp encode_query(nil, _), do: nil
# Always uses www_form encoding.
# Taken from Elixir's URI module.
defp do_encode_query(enumerable, host) do
Enum.map_join(enumerable, "&", &maybe_apply_query_quirk(&1, host))
end
# https://git.pleroma.social/pleroma/pleroma/-/issues/1055
defp maybe_apply_query_quirk({key, value}, "i.guim.co.uk" = _host) do
case key do
"precrop" ->
query_encode_kv_pair({key, value}, ~c":,")
key ->
query_encode_kv_pair({key, value})
end
end
defp maybe_apply_query_quirk({key, value}, _), do: query_encode_kv_pair({key, value})
# Taken from Elixir's URI module and modified to support quirks.
defp query_encode_kv_pair({key, value}, rules \\ []) when is_list(rules) do
cond do
length(rules) > 0 ->
# URI.encode_query/2 does not appear to follow spec and encodes all parts
# of our URI path Constant. This appears to work outside of edge-cases
# like The Guardian Rich Media Cards, keeping behavior same as with
# URI.encode_query/2 unless otherwise specified via rules.
(URI.encode_www_form(Kernel.to_string(key)) <>
"=" <>
URI.encode(value, fn byte ->
URI.char_unreserved?(byte) ||
Enum.any?(
rules,
fn char ->
char == byte
end
)
end))
|> String.replace("%20", "+")
true ->
URI.encode_www_form(Kernel.to_string(key)) <>
"=" <> URI.encode_www_form(Kernel.to_string(value))
end
end
end