Merge branch 'http-url-encoding' into 'develop'

Fix URL encoding of HTTP requests

See merge request pleroma/pleroma!4388
This commit is contained in:
feld 2025-08-01 17:55:21 +00:00
commit 7042495d7c
6 changed files with 148 additions and 10 deletions

View file

@ -0,0 +1 @@
Fix HTTP client making invalid requests due to no percent encoding processing or validation.

View file

@ -105,20 +105,57 @@ defmodule Pleroma.HTTP do
end end
defp adapter_middlewares(Tesla.Adapter.Gun, extra_middleware) do defp adapter_middlewares(Tesla.Adapter.Gun, extra_middleware) do
[Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.ConnectionPool] ++ default_middleware() ++
[Pleroma.Tesla.Middleware.ConnectionPool] ++
extra_middleware extra_middleware
end end
defp adapter_middlewares({Tesla.Adapter.Finch, _}, extra_middleware) do
[Tesla.Middleware.FollowRedirects] ++ extra_middleware
end
defp adapter_middlewares(_, extra_middleware) do defp adapter_middlewares(_, extra_middleware) do
if Pleroma.Config.get(:env) == :test do # A lot of tests are written expecting unencoded URLs
# Emulate redirects in test env, which are handled by adapters in other environments # and the burden of fixing that is high. Also it makes
[Tesla.Middleware.FollowRedirects] # them hard to read. Tests will opt-in when we want to validate
else # the encoding is being done correctly.
extra_middleware cond do
Pleroma.Config.get(:env) == :test and Pleroma.Config.get(:test_url_encoding) ->
default_middleware()
Pleroma.Config.get(:env) == :test ->
# Emulate redirects in test env, which are handled by adapters in other environments
[Tesla.Middleware.FollowRedirects]
# Hackney and Finch
true ->
default_middleware() ++ extra_middleware
end end
end end
defp default_middleware,
do: [Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.EncodeUrl]
def encode_url(url) when is_binary(url) do
URI.parse(url)
|> then(fn parsed ->
path = encode_path(parsed.path)
query = encode_query(parsed.query)
%{parsed | path: path, query: query}
end)
|> URI.to_string()
end
defp encode_path(nil), do: nil
defp encode_path(path) when is_binary(path) do
path
|> URI.decode()
|> URI.encode()
end
defp encode_query(nil), do: nil
defp encode_query(query) when is_binary(query) do
query
|> URI.decode_query()
|> URI.encode_query()
end
end end

View file

@ -158,6 +158,8 @@ defmodule Pleroma.ReverseProxy do
Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}") Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}")
method = method |> String.downcase() |> String.to_existing_atom() method = method |> String.downcase() |> String.to_existing_atom()
url = maybe_encode_url(url)
case client().request(method, url, headers, "", opts) do case client().request(method, url, headers, "", opts) do
{:ok, code, headers, client} when code in @valid_resp_codes -> {:ok, code, headers, client} when code in @valid_resp_codes ->
{:ok, code, downcase_headers(headers), client} {:ok, code, downcase_headers(headers), client}
@ -449,4 +451,18 @@ defmodule Pleroma.ReverseProxy do
_ -> delete_resp_header(conn, "content-length") _ -> delete_resp_header(conn, "content-length")
end end
end end
# Only when Tesla adapter is Hackney or Finch does the URL
# need encoding before Reverse Proxying as both end up
# using the raw Hackney client and cannot leverage our
# EncodeUrl Tesla middleware
# Also do it for test environment
defp maybe_encode_url(url) do
case Application.get_env(:tesla, :adapter) do
Tesla.Adapter.Hackney -> Pleroma.HTTP.encode_url(url)
{Tesla.Adapter.Finch, _} -> Pleroma.HTTP.encode_url(url)
Tesla.Mock -> Pleroma.HTTP.encode_url(url)
_ -> url
end
end
end end

View file

@ -0,0 +1,29 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2025 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Tesla.Middleware.EncodeUrl do
@moduledoc """
Middleware to encode URLs properly
We must decode and then re-encode to ensure correct encoding.
If we only encode it will re-encode each % as %25 causing a space
already encoded as %20 to be %2520.
Similar problem for query parameters which need spaces to be the + character
"""
@behaviour Tesla.Middleware
@impl Tesla.Middleware
def call(%Tesla.Env{url: url} = env, next, _) do
url = Pleroma.HTTP.encode_url(url)
env = %{env | url: url}
case Tesla.run(env, next) do
{:ok, env} -> {:ok, env}
err -> err
end
end
end

View file

@ -25,6 +25,9 @@ defmodule Pleroma.HTTPTest do
%{method: :post, url: "http://example.com/world"} -> %{method: :post, url: "http://example.com/world"} ->
%Tesla.Env{status: 200, body: "world"} %Tesla.Env{status: 200, body: "world"}
%{method: :get, url: "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz"} ->
%Tesla.Env{status: 200, body: "emoji data"}
end) end)
:ok :ok
@ -67,4 +70,20 @@ defmodule Pleroma.HTTPTest do
} }
end end
end end
test "URL encoding properly encodes URLs with spaces" do
clear_config(:test_url_encoding, true)
url_with_space = "https://example.com/emoji/Pack 1/koronebless.png?foo=bar baz"
{:ok, result} = HTTP.get(url_with_space)
assert result.status == 200
properly_encoded_url = "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz"
{:ok, result} = HTTP.get(properly_encoded_url)
assert result.status == 200
end
end end

View file

@ -395,4 +395,40 @@ defmodule Pleroma.ReverseProxyTest do
assert Conn.get_resp_header(conn, "content-type") == ["application/octet-stream"] assert Conn.get_resp_header(conn, "content-type") == ["application/octet-stream"]
end end
end end
# Hackey is used for Reverse Proxy when Hackney or Finch is the Tesla Adapter
# Gun is able to proxy through Tesla, so it does not need testing as the
# test cases in the Pleroma.HTTPTest module are sufficient
describe "Hackney URL encoding:" do
setup do
ClientMock
|> expect(:request, fn :get,
"https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz",
_headers,
_body,
_opts ->
{:ok, 200, [{"content-type", "image/png"}], "It works!"}
end)
|> stub(:stream_body, fn _ -> :done end)
|> stub(:close, fn _ -> :ok end)
:ok
end
test "properly encodes URLs with spaces", %{conn: conn} do
url_with_space = "https://example.com/emoji/Pack 1/koronebless.png?foo=bar baz"
result = ReverseProxy.call(conn, url_with_space)
assert result.status == 200
end
test "properly encoded URL should not be altered", %{conn: conn} do
properly_encoded_url = "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz"
result = ReverseProxy.call(conn, properly_encoded_url)
assert result.status == 200
end
end
end end