Check what chars to encode in the path segment of URIs, add list to Constants

https://datatracker.ietf.org/doc/html/rfc3986
This commit is contained in:
Phantasm 2025-08-14 22:03:23 +02:00
commit 0f3b1808fd
No known key found for this signature in database
GPG key ID: 2669E588BCC634C8
2 changed files with 18 additions and 1 deletions

View file

@ -132,6 +132,13 @@ defmodule Pleroma.Constants do
do: ~r/^[^[:cntrl:] ()<>@,;:\\"\/\[\]?=]+\/[^[:cntrl:] ()<>@,;:\\"\/\[\]?=]+(; .*)?$/
)
# List of allowed chars in the path segment of a URI
# unreserved, sub-delims, ":", "@" and "/" allowed as the separator in path
# https://datatracker.ietf.org/doc/html/rfc3986
const(uri_path_allowed_reserved_chars,
do: ~c"!$&'()*+,;=/:@"
)
const(upload_object_types, do: ["Document", "Image"])
const(activity_json_canonical_mime_type,

View file

@ -14,6 +14,7 @@ defmodule Pleroma.HTTP do
alias Tesla.Env
require Logger
require Pleroma.Constants
@type t :: __MODULE__
@type method() :: :get | :post | :put | :delete | :head
@ -145,10 +146,19 @@ defmodule Pleroma.HTTP do
defp encode_path(nil), do: nil
# URI.encode/2 deliberately does not encode all chars that are forbidden
# in the path component of a URI. It only encodes chars that are forbidden
# in the whole URI. A predicate in the 2nd argument is used to fix that here.
# URI.encode/2 uses the predicate function to determine whether each byte
# (in an integer representation) should be encoded or not.
defp encode_path(path) when is_binary(path) do
path
|> URI.decode()
|> URI.encode()
|> URI.encode(fn byte ->
URI.char_unreserved?(byte) || Enum.any?(
Pleroma.Constants.uri_path_allowed_reserved_chars, fn char ->
char == byte end)
end)
end
defp encode_query(nil), do: nil