From 1d8eafc0d2c38fdcac34af6b71291e9c8833a20b Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 30 Jul 2025 10:13:54 -0700 Subject: [PATCH 01/11] Add failing test case for URL encoding issue --- test/pleroma/http_test.exs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs index de359e599..058cb96c0 100644 --- a/test/pleroma/http_test.exs +++ b/test/pleroma/http_test.exs @@ -25,6 +25,9 @@ defmodule Pleroma.HTTPTest do %{method: :post, url: "http://example.com/world"} -> %Tesla.Env{status: 200, body: "world"} + + %{method: :get, url: "https://tsundere.love/emoji/Pack%201/koronebless.png"} -> + %Tesla.Env{status: 200, body: "emoji data"} end) :ok @@ -67,4 +70,18 @@ defmodule Pleroma.HTTPTest do } end end + + test "URL encoding properly encodes URLs with spaces" do + url_with_space = "https://tsundere.love/emoji/Pack 1/koronebless.png" + + result = HTTP.get(url_with_space) + + assert result == {:ok, %Tesla.Env{status: 200, body: "emoji data"}} + + properly_encoded_url = "https://tsundere.love/emoji/Pack%201/koronebless.png" + + result = HTTP.get(properly_encoded_url) + + assert result == {:ok, %Tesla.Env{status: 200, body: "emoji data"}} + end end From 11d27349e32d23649dd4e5ba6a3597f62199e6e5 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 30 Jul 2025 11:05:49 -0700 Subject: [PATCH 02/11] Fix HTTP client making invalid requests due to no percent encoding processing or validation. --- changelog.d/url-encoding.fix | 1 + lib/pleroma/http.ex | 10 ++-- lib/pleroma/tesla/middleware/encode_url.ex | 53 ++++++++++++++++++++++ 3 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 changelog.d/url-encoding.fix create mode 100644 lib/pleroma/tesla/middleware/encode_url.ex diff --git a/changelog.d/url-encoding.fix b/changelog.d/url-encoding.fix new file mode 100644 index 000000000..3cca87ded --- /dev/null +++ b/changelog.d/url-encoding.fix @@ -0,0 +1 @@ +Fix HTTP client making invalid requests due to no percent encoding processing or validation. diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index c11317850..1833f5f85 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -105,20 +105,24 @@ defmodule Pleroma.HTTP do end defp adapter_middlewares(Tesla.Adapter.Gun, extra_middleware) do - [Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.ConnectionPool] ++ + default_middleware() ++ + [Pleroma.Tesla.Middleware.ConnectionPool] ++ extra_middleware end defp adapter_middlewares({Tesla.Adapter.Finch, _}, extra_middleware) do - [Tesla.Middleware.FollowRedirects] ++ extra_middleware + default_middleware() ++ extra_middleware end defp adapter_middlewares(_, extra_middleware) do if Pleroma.Config.get(:env) == :test do # Emulate redirects in test env, which are handled by adapters in other environments - [Tesla.Middleware.FollowRedirects] + default_middleware() else extra_middleware end end + + defp default_middleware(), + do: [Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.EncodeUrl] end diff --git a/lib/pleroma/tesla/middleware/encode_url.ex b/lib/pleroma/tesla/middleware/encode_url.ex new file mode 100644 index 000000000..df10e13c2 --- /dev/null +++ b/lib/pleroma/tesla/middleware/encode_url.ex @@ -0,0 +1,53 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2025 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Tesla.Middleware.EncodeUrl do + @moduledoc """ + Middleware to encode URLs properly + + We must decode and then re-encode to ensure correct encoding. + If we only encode it will re-encode each % as %25 causing a space + already encoded as %20 to be %2520. + + Similar problem for query parameters which need spaces to be the + character + """ + + @behaviour Tesla.Middleware + + @impl Tesla.Middleware + def call(%Tesla.Env{url: url} = env, next, _) do + url = + URI.parse(url) + |> then(fn parsed -> + path = encode_path(parsed.path) + query = encode_query(parsed.query) + + %{parsed | path: path, query: query} + end) + |> URI.to_string() + + env = %{env | url: url} + + case Tesla.run(env, next) do + {:ok, env} -> {:ok, env} + err -> err + end + end + + defp encode_path(nil), do: nil + + defp encode_path(path) when is_binary(path) do + path + |> URI.decode() + |> URI.encode() + end + + defp encode_query(nil), do: nil + + defp encode_query(query) when is_binary(query) do + query + |> URI.decode_query() + |> URI.encode_query() + end +end From 4217ababfc0f75559d48e58cc9d966aae5059476 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 30 Jul 2025 13:17:50 -0700 Subject: [PATCH 03/11] Improve design so existing tests do not break --- lib/pleroma/http.ex | 19 ++++++++++++++----- lib/pleroma/tesla/middleware/encode_url.ex | 21 ++++++++++++--------- test/pleroma/http_test.exs | 2 ++ 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index 1833f5f85..75570d281 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -115,11 +115,20 @@ defmodule Pleroma.HTTP do end defp adapter_middlewares(_, extra_middleware) do - if Pleroma.Config.get(:env) == :test do - # Emulate redirects in test env, which are handled by adapters in other environments - default_middleware() - else - extra_middleware + # A lot of tests are written expecting unencoded URLs + # and the burden of fixing that is high. Also it makes + # them hard to read. Tests will opt-in when we want to validate + # the encoding is being done correctly. + cond do + Pleroma.Config.get(:env) == :test and Pleroma.Config.get(:test_url_encoding) -> + default_middleware() + + Pleroma.Config.get(:env) == :test -> + # Emulate redirects in test env, which are handled by adapters in other environments + [Tesla.Middleware.FollowRedirects] + + true -> + extra_middleware end end diff --git a/lib/pleroma/tesla/middleware/encode_url.ex b/lib/pleroma/tesla/middleware/encode_url.ex index df10e13c2..ee74c41a1 100644 --- a/lib/pleroma/tesla/middleware/encode_url.ex +++ b/lib/pleroma/tesla/middleware/encode_url.ex @@ -17,15 +17,7 @@ defmodule Pleroma.Tesla.Middleware.EncodeUrl do @impl Tesla.Middleware def call(%Tesla.Env{url: url} = env, next, _) do - url = - URI.parse(url) - |> then(fn parsed -> - path = encode_path(parsed.path) - query = encode_query(parsed.query) - - %{parsed | path: path, query: query} - end) - |> URI.to_string() + url = encode_url(url) env = %{env | url: url} @@ -35,6 +27,17 @@ defmodule Pleroma.Tesla.Middleware.EncodeUrl do end end + defp encode_url(url) when is_binary(url) do + URI.parse(url) + |> then(fn parsed -> + path = encode_path(parsed.path) + query = encode_query(parsed.query) + + %{parsed | path: path, query: query} + end) + |> URI.to_string() + end + defp encode_path(nil), do: nil defp encode_path(path) when is_binary(path) do diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs index 058cb96c0..803bd451a 100644 --- a/test/pleroma/http_test.exs +++ b/test/pleroma/http_test.exs @@ -72,6 +72,8 @@ defmodule Pleroma.HTTPTest do end test "URL encoding properly encodes URLs with spaces" do + clear_config(:test_url_encoding, true) + url_with_space = "https://tsundere.love/emoji/Pack 1/koronebless.png" result = HTTP.get(url_with_space) From 404e09126076dcbd895fb2d17f872c553cc31249 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 30 Jul 2025 13:48:16 -0700 Subject: [PATCH 04/11] Credo --- lib/pleroma/http.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index 75570d281..f0e01d589 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -132,6 +132,6 @@ defmodule Pleroma.HTTP do end end - defp default_middleware(), + defp default_middleware, do: [Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.EncodeUrl] end From c49dece0ddf7f6704afde2e1fc969537e423a455 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 30 Jul 2025 15:13:43 -0700 Subject: [PATCH 05/11] Update test to also cover query encoding --- test/pleroma/http_test.exs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs index 803bd451a..6325ea3c5 100644 --- a/test/pleroma/http_test.exs +++ b/test/pleroma/http_test.exs @@ -26,7 +26,7 @@ defmodule Pleroma.HTTPTest do %{method: :post, url: "http://example.com/world"} -> %Tesla.Env{status: 200, body: "world"} - %{method: :get, url: "https://tsundere.love/emoji/Pack%201/koronebless.png"} -> + %{method: :get, url: "https://tsundere.love/emoji/Pack%201/koronebless.png?foo=bar+baz"} -> %Tesla.Env{status: 200, body: "emoji data"} end) @@ -74,13 +74,13 @@ defmodule Pleroma.HTTPTest do test "URL encoding properly encodes URLs with spaces" do clear_config(:test_url_encoding, true) - url_with_space = "https://tsundere.love/emoji/Pack 1/koronebless.png" + url_with_space = "https://tsundere.love/emoji/Pack 1/koronebless.png?foo=bar baz" result = HTTP.get(url_with_space) assert result == {:ok, %Tesla.Env{status: 200, body: "emoji data"}} - properly_encoded_url = "https://tsundere.love/emoji/Pack%201/koronebless.png" + properly_encoded_url = "https://tsundere.love/emoji/Pack%201/koronebless.png?foo=bar+baz" result = HTTP.get(properly_encoded_url) From 842090945aa5700faa222e47985cad542d375314 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 30 Jul 2025 15:42:49 -0700 Subject: [PATCH 06/11] Ensure Hackney and Finch both get the default middleware --- lib/pleroma/http.ex | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index f0e01d589..bdeb2171e 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -110,10 +110,6 @@ defmodule Pleroma.HTTP do extra_middleware end - defp adapter_middlewares({Tesla.Adapter.Finch, _}, extra_middleware) do - default_middleware() ++ extra_middleware - end - defp adapter_middlewares(_, extra_middleware) do # A lot of tests are written expecting unencoded URLs # and the burden of fixing that is high. Also it makes @@ -127,8 +123,9 @@ defmodule Pleroma.HTTP do # Emulate redirects in test env, which are handled by adapters in other environments [Tesla.Middleware.FollowRedirects] + # Hackney and Finch true -> - extra_middleware + default_middleware() ++ extra_middleware end end From 49ba6c88655669fe83b8c58d4070bd7ca5f215f1 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 30 Jul 2025 17:07:22 -0700 Subject: [PATCH 07/11] Rework the URL encoding so it is a public function: Pleroma.HTTP.encode_url/1 --- lib/pleroma/http.ex | 27 ++++++++++++++++++++ lib/pleroma/tesla/middleware/encode_url.ex | 29 +--------------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index bdeb2171e..9a0868d33 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -131,4 +131,31 @@ defmodule Pleroma.HTTP do defp default_middleware, do: [Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.EncodeUrl] + + def encode_url(url) when is_binary(url) do + URI.parse(url) + |> then(fn parsed -> + path = encode_path(parsed.path) + query = encode_query(parsed.query) + + %{parsed | path: path, query: query} + end) + |> URI.to_string() + end + + defp encode_path(nil), do: nil + + defp encode_path(path) when is_binary(path) do + path + |> URI.decode() + |> URI.encode() + end + + defp encode_query(nil), do: nil + + defp encode_query(query) when is_binary(query) do + query + |> URI.decode_query() + |> URI.encode_query() + end end diff --git a/lib/pleroma/tesla/middleware/encode_url.ex b/lib/pleroma/tesla/middleware/encode_url.ex index ee74c41a1..32c559d3b 100644 --- a/lib/pleroma/tesla/middleware/encode_url.ex +++ b/lib/pleroma/tesla/middleware/encode_url.ex @@ -17,7 +17,7 @@ defmodule Pleroma.Tesla.Middleware.EncodeUrl do @impl Tesla.Middleware def call(%Tesla.Env{url: url} = env, next, _) do - url = encode_url(url) + url = Pleroma.HTTP.encode_url(url) env = %{env | url: url} @@ -26,31 +26,4 @@ defmodule Pleroma.Tesla.Middleware.EncodeUrl do err -> err end end - - defp encode_url(url) when is_binary(url) do - URI.parse(url) - |> then(fn parsed -> - path = encode_path(parsed.path) - query = encode_query(parsed.query) - - %{parsed | path: path, query: query} - end) - |> URI.to_string() - end - - defp encode_path(nil), do: nil - - defp encode_path(path) when is_binary(path) do - path - |> URI.decode() - |> URI.encode() - end - - defp encode_query(nil), do: nil - - defp encode_query(query) when is_binary(query) do - query - |> URI.decode_query() - |> URI.encode_query() - end end From ab4edf7933f245c9e955d4942471fb938a2f7c0e Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 30 Jul 2025 17:20:42 -0700 Subject: [PATCH 08/11] Add proper ReverseProxy test cases --- test/pleroma/reverse_proxy_test.exs | 36 +++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/test/pleroma/reverse_proxy_test.exs b/test/pleroma/reverse_proxy_test.exs index 85e1d0910..034ab28a5 100644 --- a/test/pleroma/reverse_proxy_test.exs +++ b/test/pleroma/reverse_proxy_test.exs @@ -395,4 +395,40 @@ defmodule Pleroma.ReverseProxyTest do assert Conn.get_resp_header(conn, "content-type") == ["application/octet-stream"] end end + + # Hackey is used for Reverse Proxy when Hackney or Finch is the Tesla Adapter + # Gun is able to proxy through Tesla, so it does not need testing as the + # test cases in the Pleroma.HTTPTest module are sufficient + describe "Hackney URL encoding:" do + setup do + ClientMock + |> expect(:request, fn :get, + "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz", + _headers, + _body, + _opts -> + {:ok, 200, [{"content-type", "image/png"}], "It works!"} + end) + |> stub(:stream_body, fn _ -> :done end) + |> stub(:close, fn _ -> :ok end) + + :ok + end + + test "properly encodes URLs with spaces", %{conn: conn} do + url_with_space = "https://example.com/emoji/Pack 1/koronebless.png?foo=bar baz" + + result = ReverseProxy.call(conn, url_with_space) + + assert result.status == 200 + end + + test "properly encoded URL should not be altered", %{conn: conn} do + properly_encoded_url = "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz" + + result = ReverseProxy.call(conn, properly_encoded_url) + + assert result.status == 200 + end + end end From 425329bacd59cbf15c017aaa4ab271c768076120 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 30 Jul 2025 17:32:08 -0700 Subject: [PATCH 09/11] Add fix to ensure URL is encoded when reverse proxying --- lib/pleroma/reverse_proxy.ex | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/pleroma/reverse_proxy.ex b/lib/pleroma/reverse_proxy.ex index 3c82f9996..cd58f29e4 100644 --- a/lib/pleroma/reverse_proxy.ex +++ b/lib/pleroma/reverse_proxy.ex @@ -158,6 +158,8 @@ defmodule Pleroma.ReverseProxy do Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}") method = method |> String.downcase() |> String.to_existing_atom() + url = maybe_encode_url(url) + case client().request(method, url, headers, "", opts) do {:ok, code, headers, client} when code in @valid_resp_codes -> {:ok, code, downcase_headers(headers), client} @@ -449,4 +451,18 @@ defmodule Pleroma.ReverseProxy do _ -> delete_resp_header(conn, "content-length") end end + + # Only when Tesla adapter is Hackney or Finch does the URL + # need encoding before Reverse Proxying as both end up + # using the raw Hackney client and cannot leverage our + # EncodeUrl Tesla middleware + # Also do it for test environment + defp maybe_encode_url(url) do + case Application.get_env(:tesla, :adapter) do + Tesla.Adapter.Hackney -> Pleroma.HTTP.encode_url(url) + {Tesla.Adapter.Finch, _} -> Pleroma.HTTP.encode_url(url) + Tesla.Mock -> Pleroma.HTTP.encode_url(url) + _ -> url + end + end end From 4e6f0af4ce66b17687092746d397c04afc56e281 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 30 Jul 2025 17:32:49 -0700 Subject: [PATCH 10/11] Better assertion logic --- test/pleroma/http_test.exs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs index 6325ea3c5..0e2551755 100644 --- a/test/pleroma/http_test.exs +++ b/test/pleroma/http_test.exs @@ -76,14 +76,14 @@ defmodule Pleroma.HTTPTest do url_with_space = "https://tsundere.love/emoji/Pack 1/koronebless.png?foo=bar baz" - result = HTTP.get(url_with_space) + {:ok, result} = HTTP.get(url_with_space) - assert result == {:ok, %Tesla.Env{status: 200, body: "emoji data"}} + assert result.status == 200 properly_encoded_url = "https://tsundere.love/emoji/Pack%201/koronebless.png?foo=bar+baz" - result = HTTP.get(properly_encoded_url) + {:ok, result} = HTTP.get(properly_encoded_url) - assert result == {:ok, %Tesla.Env{status: 200, body: "emoji data"}} + assert result.status == 200 end end From 44e56ed756e19a1de324afebc71103c0c6e7ed31 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 30 Jul 2025 18:26:56 -0700 Subject: [PATCH 11/11] Switch to example domain name --- test/pleroma/http_test.exs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs index 0e2551755..61347015d 100644 --- a/test/pleroma/http_test.exs +++ b/test/pleroma/http_test.exs @@ -26,7 +26,7 @@ defmodule Pleroma.HTTPTest do %{method: :post, url: "http://example.com/world"} -> %Tesla.Env{status: 200, body: "world"} - %{method: :get, url: "https://tsundere.love/emoji/Pack%201/koronebless.png?foo=bar+baz"} -> + %{method: :get, url: "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz"} -> %Tesla.Env{status: 200, body: "emoji data"} end) @@ -74,13 +74,13 @@ defmodule Pleroma.HTTPTest do test "URL encoding properly encodes URLs with spaces" do clear_config(:test_url_encoding, true) - url_with_space = "https://tsundere.love/emoji/Pack 1/koronebless.png?foo=bar baz" + url_with_space = "https://example.com/emoji/Pack 1/koronebless.png?foo=bar baz" {:ok, result} = HTTP.get(url_with_space) assert result.status == 200 - properly_encoded_url = "https://tsundere.love/emoji/Pack%201/koronebless.png?foo=bar+baz" + properly_encoded_url = "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz" {:ok, result} = HTTP.get(properly_encoded_url)