From 0f3b1808fd3dc835e0fe993ceb16ec4d6ea12c03 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Thu, 14 Aug 2025 22:03:23 +0200 Subject: [PATCH 01/22] Check what chars to encode in the path segment of URIs, add list to Constants https://datatracker.ietf.org/doc/html/rfc3986 --- lib/pleroma/constants.ex | 7 +++++++ lib/pleroma/http.ex | 12 +++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/constants.ex b/lib/pleroma/constants.ex index 92ca11494..f02607273 100644 --- a/lib/pleroma/constants.ex +++ b/lib/pleroma/constants.ex @@ -132,6 +132,13 @@ defmodule Pleroma.Constants do do: ~r/^[^[:cntrl:] ()<>@,;:\\"\/\[\]?=]+\/[^[:cntrl:] ()<>@,;:\\"\/\[\]?=]+(; .*)?$/ ) + # List of allowed chars in the path segment of a URI + # unreserved, sub-delims, ":", "@" and "/" allowed as the separator in path + # https://datatracker.ietf.org/doc/html/rfc3986 + const(uri_path_allowed_reserved_chars, + do: ~c"!$&'()*+,;=/:@" + ) + const(upload_object_types, do: ["Document", "Image"]) const(activity_json_canonical_mime_type, diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index 9a0868d33..c6bf20127 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -14,6 +14,7 @@ defmodule Pleroma.HTTP do alias Tesla.Env require Logger + require Pleroma.Constants @type t :: __MODULE__ @type method() :: :get | :post | :put | :delete | :head @@ -145,10 +146,19 @@ defmodule Pleroma.HTTP do defp encode_path(nil), do: nil + # URI.encode/2 deliberately does not encode all chars that are forbidden + # in the path component of a URI. It only encodes chars that are forbidden + # in the whole URI. A predicate in the 2nd argument is used to fix that here. + # URI.encode/2 uses the predicate function to determine whether each byte + # (in an integer representation) should be encoded or not. defp encode_path(path) when is_binary(path) do path |> URI.decode() - |> URI.encode() + |> URI.encode(fn byte -> + URI.char_unreserved?(byte) || Enum.any?( + Pleroma.Constants.uri_path_allowed_reserved_chars, fn char -> + char == byte end) + end) end defp encode_query(nil), do: nil From 619f247e38eb70746426d6440a373c4d682c776b Mon Sep 17 00:00:00 2001 From: Phantasm Date: Sat, 23 Aug 2025 23:55:36 +0200 Subject: [PATCH 02/22] Add more URL-encoding tests --- test/pleroma/http_test.exs | 21 ++++++++++++++ test/pleroma/reverse_proxy_test.exs | 43 +++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs index 61347015d..513802dfb 100644 --- a/test/pleroma/http_test.exs +++ b/test/pleroma/http_test.exs @@ -28,6 +28,15 @@ defmodule Pleroma.HTTPTest do %{method: :get, url: "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz"} -> %Tesla.Env{status: 200, body: "emoji data"} + + %{ + method: :get, + url: "https://example.com/media/foo/bar%20!$&'()*+,;=/:%20@a%20%5Bbaz%5D.mp4" + } -> + %Tesla.Env{status: 200, body: "video data"} + + %{method: :get, url: "https://example.com/media/unicode%20%F0%9F%99%82%20.gif"} -> + %Tesla.Env{status: 200, body: "unicode data"} end) :ok @@ -85,5 +94,17 @@ defmodule Pleroma.HTTPTest do {:ok, result} = HTTP.get(properly_encoded_url) assert result.status == 200 + + url_with_reserved_chars = "https://example.com/media/foo/bar !$&'()*+,;=/: @a [baz].mp4" + + {:ok, result} = HTTP.get(url_with_reserved_chars) + + assert result.status == 200 + + url_with_unicode = "https://example.com/media/unicode 🙂 .gif" + + {:ok, result} = HTTP.get(url_with_unicode) + + assert result.status == 200 end end diff --git a/test/pleroma/reverse_proxy_test.exs b/test/pleroma/reverse_proxy_test.exs index 034ab28a5..62ad2f38a 100644 --- a/test/pleroma/reverse_proxy_test.exs +++ b/test/pleroma/reverse_proxy_test.exs @@ -402,12 +402,27 @@ defmodule Pleroma.ReverseProxyTest do describe "Hackney URL encoding:" do setup do ClientMock - |> expect(:request, fn :get, - "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz", - _headers, - _body, - _opts -> - {:ok, 200, [{"content-type", "image/png"}], "It works!"} + |> expect(:request, fn + :get, + "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz", + _headers, + _body, + _opts -> + {:ok, 200, [{"content-type", "image/png"}], "It works!"} + + :get, + "https://example.com/media/foo/bar%20!$&'()*+,;=/:%20@a%20%5Bbaz%5D.mp4", + _headers, + _body, + _opts -> + {:ok, 200, [{"content-type", "video/mp4"}], "Allowed reserved chars."} + + :get, + "https://example.com/media/unicode%20%F0%9F%99%82%20.gif", + _headers, + _body, + _opts -> + {:ok, 200, [{"content-type", "image/gif"}], "Unicode emoji in path"} end) |> stub(:stream_body, fn _ -> :done end) |> stub(:close, fn _ -> :ok end) @@ -430,5 +445,21 @@ defmodule Pleroma.ReverseProxyTest do assert result.status == 200 end + + test "properly encodes URLs with allowed reserved characters", %{conn: conn} do + url_with_reserved_chars = "https://example.com/media/foo/bar !$&'()*+,;=/: @a [baz].mp4" + + result = ReverseProxy.call(conn, url_with_reserved_chars) + + assert result.status == 200 + end + + test "properly encodes URLs with unicode in path", %{conn: conn} do + url_with_unicode = "https://example.com/media/unicode 🙂 .gif" + + result = ReverseProxy.call(conn, url_with_unicode) + + assert result.status == 200 + end end end From 0a8423fdf726e6c1d9d6bf781e14d610bb917ed9 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Fri, 29 Aug 2025 14:47:19 +0200 Subject: [PATCH 03/22] Add ability to bypass url decode/parse in Pleroma.HTTP, fix encode in Pleroma.Upload --- lib/pleroma/http.ex | 43 ++++++++++++++++++++++++++---------- lib/pleroma/upload.ex | 11 +++++++-- test/pleroma/upload_test.exs | 15 +++++++++++++ 3 files changed, 55 insertions(+), 14 deletions(-) diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index c6bf20127..6f4abb30a 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -133,32 +133,51 @@ defmodule Pleroma.HTTP do defp default_middleware, do: [Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.EncodeUrl] - def encode_url(url) when is_binary(url) do - URI.parse(url) - |> then(fn parsed -> - path = encode_path(parsed.path) - query = encode_query(parsed.query) + # We don't always want to decode the path first, like is the case in + # Pleroma.Upload.url_from_spec/3. + def encode_url(url, opts \\ []) when is_binary(url) and is_list(opts) do + bypass_parse = Keyword.get(opts, :bypass_parse, false) + bypass_decode = Keyword.get(opts, :bypass_decode, false) - %{parsed | path: path, query: query} - end) - |> URI.to_string() + cond do + bypass_parse -> + encode_path(url, bypass_decode) + + true -> + URI.parse(url) + |> then(fn parsed -> + path = encode_path(parsed.path, bypass_decode) + query = encode_query(parsed.query) + + %{parsed | path: path, query: query} + end) + |> URI.to_string() + end end - defp encode_path(nil), do: nil + defp encode_path(nil, _bypass_decode), do: nil # URI.encode/2 deliberately does not encode all chars that are forbidden # in the path component of a URI. It only encodes chars that are forbidden # in the whole URI. A predicate in the 2nd argument is used to fix that here. # URI.encode/2 uses the predicate function to determine whether each byte # (in an integer representation) should be encoded or not. - defp encode_path(path) when is_binary(path) do + defp encode_path(path, bypass_decode) when is_binary(path) do + path = + cond do + bypass_decode -> + path + + true -> + URI.decode(path) + end + path - |> URI.decode() |> URI.encode(fn byte -> URI.char_unreserved?(byte) || Enum.any?( Pleroma.Constants.uri_path_allowed_reserved_chars, fn char -> char == byte end) - end) + end) end defp encode_query(nil), do: nil diff --git a/lib/pleroma/upload.ex b/lib/pleroma/upload.ex index b0aef2592..8f75a1d57 100644 --- a/lib/pleroma/upload.ex +++ b/lib/pleroma/upload.ex @@ -34,6 +34,7 @@ defmodule Pleroma.Upload do """ alias Ecto.UUID + alias Pleroma.HTTP alias Pleroma.Maps alias Pleroma.Web.ActivityPub.Utils require Logger @@ -230,11 +231,17 @@ defmodule Pleroma.Upload do tmp_path end + # Encoding the whole path here is fine since the path is in a + # UUID/ form. + # The file at this point isn't %-encoded, so the path shouldn't + # be decoded first like Pleroma.HTTP.encode_url/1 does. defp url_from_spec(%__MODULE__{name: name}, base_url, {:file, path}) do + encode_opts = [bypass_decode: true, bypass_parse: true] + path = - URI.encode(path, &char_unescaped?/1) <> + HTTP.encode_url(path, encode_opts) <> if Pleroma.Config.get([__MODULE__, :link_name], false) do - "?name=#{URI.encode(name, &char_unescaped?/1)}" + "?name=#{URI.encode_query(name)}" else "" end diff --git a/test/pleroma/upload_test.exs b/test/pleroma/upload_test.exs index 5fd62fa43..a99d01f2a 100644 --- a/test/pleroma/upload_test.exs +++ b/test/pleroma/upload_test.exs @@ -242,6 +242,21 @@ defmodule Pleroma.UploadTest do assert Path.basename(attachment_url["href"]) == "%3A%3F%23%5B%5D%40%21%24%26%5C%27%28%29%2A%2B%2C%3B%3D.jpg" end + + test "double %-encodes filename" do + File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg") + + file = %Plug.Upload{ + content_type: "image/jpeg", + path: Path.absname("test/fixtures/image_tmp.jpg"), + filename: "file with %20.jpg" + } + + {:ok, data} = Upload.store(file) + [attachment_url | _] = data["url"] + + assert Path.basename(attachment_url["href"]) == "file%20with%20%2520.jpg" + end end describe "Setting a custom base_url for uploaded media" do From 80db6f1328212a0b00e26f548f02aa5d6a3c1715 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Fri, 29 Aug 2025 15:04:02 +0200 Subject: [PATCH 04/22] Fix character escaping test for Pleroma.Upload --- lib/pleroma/http.ex | 12 ++++++++---- test/pleroma/http_test.exs | 20 ++++++++++++++++++++ test/pleroma/reverse_proxy_test.exs | 22 +++++++++------------- test/pleroma/upload_test.exs | 6 +++--- 4 files changed, 40 insertions(+), 20 deletions(-) diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index 6f4abb30a..eea8416c6 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -174,10 +174,14 @@ defmodule Pleroma.HTTP do path |> URI.encode(fn byte -> - URI.char_unreserved?(byte) || Enum.any?( - Pleroma.Constants.uri_path_allowed_reserved_chars, fn char -> - char == byte end) - end) + URI.char_unreserved?(byte) || + Enum.any?( + Pleroma.Constants.uri_path_allowed_reserved_chars(), + fn char -> + char == byte + end + ) + end) end defp encode_query(nil), do: nil diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs index 513802dfb..2af0d21c7 100644 --- a/test/pleroma/http_test.exs +++ b/test/pleroma/http_test.exs @@ -107,4 +107,24 @@ defmodule Pleroma.HTTPTest do assert result.status == 200 end + + test "decodes URL first by default" do + clear_config(:test_url_encoding, true) + + normal_url = "https://example.com/media/file%20with%20space.jpg?name=a+space.jpg" + + result = HTTP.encode_url(normal_url) + + assert result == "https://example.com/media/file%20with%20space.jpg?name=a+space.jpg" + end + + test "doesn't decode URL first when specified" do + clear_config(:test_url_encoding, true) + + normal_url = "https://example.com/media/file%20with%20space.jpg" + + result = HTTP.encode_url(normal_url, bypass_decode: true) + + assert result == "https://example.com/media/file%2520with%2520space.jpg" + end end diff --git a/test/pleroma/reverse_proxy_test.exs b/test/pleroma/reverse_proxy_test.exs index 62ad2f38a..3595568cf 100644 --- a/test/pleroma/reverse_proxy_test.exs +++ b/test/pleroma/reverse_proxy_test.exs @@ -404,24 +404,20 @@ defmodule Pleroma.ReverseProxyTest do ClientMock |> expect(:request, fn :get, - "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz", - _headers, - _body, - _opts -> + "https://example.com/emoji/Pack%201/koronebless.png?foo=bar+baz", + _headers, + _body, + _opts -> {:ok, 200, [{"content-type", "image/png"}], "It works!"} :get, - "https://example.com/media/foo/bar%20!$&'()*+,;=/:%20@a%20%5Bbaz%5D.mp4", - _headers, - _body, - _opts -> + "https://example.com/media/foo/bar%20!$&'()*+,;=/:%20@a%20%5Bbaz%5D.mp4", + _headers, + _body, + _opts -> {:ok, 200, [{"content-type", "video/mp4"}], "Allowed reserved chars."} - :get, - "https://example.com/media/unicode%20%F0%9F%99%82%20.gif", - _headers, - _body, - _opts -> + :get, "https://example.com/media/unicode%20%F0%9F%99%82%20.gif", _headers, _body, _opts -> {:ok, 200, [{"content-type", "image/gif"}], "Unicode emoji in path"} end) |> stub(:stream_body, fn _ -> :done end) diff --git a/test/pleroma/upload_test.exs b/test/pleroma/upload_test.exs index a99d01f2a..91c7f49ea 100644 --- a/test/pleroma/upload_test.exs +++ b/test/pleroma/upload_test.exs @@ -227,20 +227,20 @@ defmodule Pleroma.UploadTest do assert Path.basename(attachment_url["href"]) == "an%E2%80%A6%20image.jpg" end - test "escapes reserved uri characters" do + test "escapes disallowed reserved characters in uri path" do File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg") file = %Plug.Upload{ content_type: "image/jpeg", path: Path.absname("test/fixtures/image_tmp.jpg"), - filename: ":?#[]@!$&\\'()*+,;=.jpg" + filename: ":?#[]@!$&'()*+,;=.jpg" } {:ok, data} = Upload.store(file) [attachment_url | _] = data["url"] assert Path.basename(attachment_url["href"]) == - "%3A%3F%23%5B%5D%40%21%24%26%5C%27%28%29%2A%2B%2C%3B%3D.jpg" + ":%3F%23%5B%5D@!$&'()*+,;=.jpg" end test "double %-encodes filename" do From 99a1c0890a5aee2a8051791b39b599d8e930dcbc Mon Sep 17 00:00:00 2001 From: Phantasm Date: Fri, 29 Aug 2025 20:53:16 +0200 Subject: [PATCH 05/22] URI.encode_query needs an enum, add test for this case --- lib/pleroma/upload.ex | 3 ++- test/pleroma/upload_test.exs | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/upload.ex b/lib/pleroma/upload.ex index 8f75a1d57..de8949848 100644 --- a/lib/pleroma/upload.ex +++ b/lib/pleroma/upload.ex @@ -241,7 +241,8 @@ defmodule Pleroma.Upload do path = HTTP.encode_url(path, encode_opts) <> if Pleroma.Config.get([__MODULE__, :link_name], false) do - "?name=#{URI.encode_query(name)}" + enum = %{name: name} + "?#{URI.encode_query(enum)}" else "" end diff --git a/test/pleroma/upload_test.exs b/test/pleroma/upload_test.exs index 91c7f49ea..9a51c612b 100644 --- a/test/pleroma/upload_test.exs +++ b/test/pleroma/upload_test.exs @@ -282,4 +282,23 @@ defmodule Pleroma.UploadTest do refute String.starts_with?(url, base_url <> "/media/") end end + + describe "Setting a link_name for uploaded media" do + setup do: clear_config([Pleroma.Upload, :link_name], true) + + test "encodes name parameter in query" do + File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg") + + file = %Plug.Upload{ + content_type: "image/jpeg", + path: Path.absname("test/fixtures/image_tmp.jpg"), + filename: "test file.jpg" + } + + {:ok, data} = Upload.store(file) + [attachment_url | _] = data["url"] + + assert Path.basename(attachment_url["href"]) == "test%20file.jpg?name=test+file.jpg" + end + end end From 9445ab9096b7a69200e3c1f8bdb3befdc019a6e9 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Mon, 8 Sep 2025 22:05:49 +0200 Subject: [PATCH 06/22] ReverseProxy: Log request after potentional %-encoding --- lib/pleroma/reverse_proxy.ex | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/reverse_proxy.ex b/lib/pleroma/reverse_proxy.ex index cd58f29e4..6c8163f2a 100644 --- a/lib/pleroma/reverse_proxy.ex +++ b/lib/pleroma/reverse_proxy.ex @@ -155,11 +155,12 @@ defmodule Pleroma.ReverseProxy do end defp request(method, url, headers, opts) do - Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}") method = method |> String.downcase() |> String.to_existing_atom() url = maybe_encode_url(url) + Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}") + case client().request(method, url, headers, "", opts) do {:ok, code, headers, client} when code in @valid_resp_codes -> {:ok, code, downcase_headers(headers), client} From 004ea90b29fec5e7b7b53b40f6acb6455a2340db Mon Sep 17 00:00:00 2001 From: Phantasm Date: Tue, 23 Sep 2025 22:55:54 +0200 Subject: [PATCH 07/22] MediaProxy: Fix 424 caused by inconsistent %-encoding from remote instances Notably this would fail to redirect to original proxied file when preview generation criteria haven't been met. --- lib/pleroma/web/media_proxy.ex | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/pleroma/web/media_proxy.ex b/lib/pleroma/web/media_proxy.ex index 29882542c..a29debbfc 100644 --- a/lib/pleroma/web/media_proxy.ex +++ b/lib/pleroma/web/media_proxy.ex @@ -5,6 +5,7 @@ defmodule Pleroma.Web.MediaProxy do alias Pleroma.Config alias Pleroma.Helpers.UriHelper + alias Pleroma.HTTP alias Pleroma.Upload alias Pleroma.Web.Endpoint alias Pleroma.Web.MediaProxy.Invalidation @@ -99,13 +100,20 @@ defmodule Pleroma.Web.MediaProxy do {base64, sig64} end + # The URL coming into MediaProxy from the outside might have wrong %-encoding + # (like older Pleroma versions) + # This would cause an inconsistency with the encoded URL here and the requested + # URL fixed with Pleroma.Tesla.Middleware.EncodeUrl. + # End result is a failing HEAD request in Pleroma.Web.MediaProxy.MediaProxyController.handle_preview/2 def encode_url(url) do + url = HTTP.encode_url(url) {base64, sig64} = base64_sig64(url) build_url(sig64, base64, filename(url)) end def encode_preview_url(url, preview_params \\ []) do + url = HTTP.encode_url(url) {base64, sig64} = base64_sig64(url) build_preview_url(sig64, base64, filename(url), preview_params) From d413f9bf7078cf93c3efcd0ae195539dd3c7ede0 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Sat, 27 Sep 2025 12:19:47 +0200 Subject: [PATCH 08/22] MediaProxy: fix Pleroma.HTTP.encode_url not being available in test env --- .../mrf/media_proxy_warming_policy_test.exs | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs b/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs index 0da3afa3b..13b4d24b8 100644 --- a/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs +++ b/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs @@ -58,10 +58,13 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do {:ok, %Tesla.Env{status: 200, body: ""}} end) - with_mock HTTP, get: fn _, _, _ -> {:ok, []} end do - MediaProxyWarmingPolicy.filter(@message) + with_mock HTTP, + get: fn _, _, _ -> {:ok, []} end, + encode_url: fn url -> :meck.passthrough([url]) end, + encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do + MediaProxyWarmingPolicy.filter(@message) - assert called(HTTP.get(:_, :_, :_)) + assert called(HTTP.get(:_, :_, :_)) end end @@ -85,10 +88,13 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do {:ok, %Tesla.Env{status: 200, body: ""}} end) - with_mock HTTP, get: fn _, _, _ -> {:ok, []} end do - MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history) + with_mock HTTP, + get: fn _, _, _ -> {:ok, []} end, + encode_url: fn url -> :meck.passthrough([url]) end, + encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do + MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history) - assert called(HTTP.get(:_, :_, :_)) + assert called(HTTP.get(:_, :_, :_)) end end @@ -97,8 +103,11 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do {:ok, %Tesla.Env{status: 200, body: ""}} end) - with_mock HTTP, get: fn _, _, _ -> {:ok, []} end do - MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history |> Map.put("type", "Update")) + with_mock HTTP, + get: fn _, _, _ -> {:ok, []} end, + encode_url: fn url -> :meck.passthrough([url]) end, + encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do + MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history |> Map.put("type", "Update")) assert called(HTTP.get(:_, :_, :_)) end From 1b438fd167368623ad9a0fbec92552e520f77bf5 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Sat, 27 Sep 2025 14:30:22 +0200 Subject: [PATCH 09/22] MediaProxy: fix query params test Elixir and Erlang both add a traling = when encoding queries --- test/pleroma/web/media_proxy_test.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/pleroma/web/media_proxy_test.exs b/test/pleroma/web/media_proxy_test.exs index 718892665..7a845b7a7 100644 --- a/test/pleroma/web/media_proxy_test.exs +++ b/test/pleroma/web/media_proxy_test.exs @@ -73,7 +73,7 @@ defmodule Pleroma.Web.MediaProxyTest do end test "encodes and decodes URL and ignores query params for the path" do - url = "https://pleroma.soykaf.com/static/logo.png?93939393939&bunny=true" + url = "https://pleroma.soykaf.com/static/logo.png?93939393939=&bunny=true" encoded = MediaProxy.url(url) assert String.ends_with?(encoded, "/logo.png") assert decode_result(encoded) == url From a0f73d0e2f1afca990f6f1f2b39c986acdee8408 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Wed, 8 Oct 2025 23:34:43 +0200 Subject: [PATCH 10/22] Reimplement URI.encode_query/2 to support quirks, add Guardian quirk This solves the issue with Guardian rich media cards not loading, thanks to them using "," and ":" in queries which get improperly encoded. Guardian also needs specific ordering of the query keys, this also fixes that. --- lib/pleroma/http.ex | 49 ++++++++++++++++++++++++++-- test/pleroma/http_test.exs | 67 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index eea8416c6..561b015be 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -147,6 +147,7 @@ defmodule Pleroma.HTTP do URI.parse(url) |> then(fn parsed -> path = encode_path(parsed.path, bypass_decode) + |> maybe_apply_path_encoding_quirks() query = encode_query(parsed.query) %{parsed | path: path, query: query} @@ -186,9 +187,53 @@ defmodule Pleroma.HTTP do defp encode_query(nil), do: nil + # Order of kv pairs in query is not preserved when using URI.decode_query. + # URI.query_decoder/2 returns a stream which so far appears to not change order. + # Immediately switch to a list to prevent breakage for sites that expect + # the order of query keys to be always the same. defp encode_query(query) when is_binary(query) do query - |> URI.decode_query() - |> URI.encode_query() + |> URI.query_decoder() + |> Enum.to_list() + |> do_encode_query() + end + + defp maybe_apply_path_encoding_quirks(path), do: path + + # Always uses www_form encoding + defp do_encode_query(enumerable) do + Enum.map_join(enumerable, "&", &maybe_apply_query_quirk(&1)) + end + + defp maybe_apply_query_quirk({key, value}) do + case key do + "precrop" -> + query_encode_kv_pair({key, value}, ~c":,") + + key -> + query_encode_kv_pair({key, value}) + end + end + + defp query_encode_kv_pair({key, value}, rules \\ []) when is_list(rules) do + cond do + length(rules) > 0 -> + # URI.encode_query/2 does not appear to follow spec and encodes all parts of our URI path Constant. + # This appears to work outside of edge-cases like The Guardian Rich Media Cards, + # keeping behavior same as with URI.encode_query/2 unless otherwise specified via rules. + URI.encode_www_form(Kernel.to_string(key)) <> "=" <> + URI.encode(value, fn byte -> + URI.char_unreserved?(byte) || + Enum.any?( + rules, + fn char -> + char == byte + end) + end) + |> String.replace("%20", "+") + + true -> + URI.encode_www_form(Kernel.to_string(key)) <> "=" <> URI.encode_www_form(Kernel.to_string(value)) + end end end diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs index 2af0d21c7..fe6afea04 100644 --- a/test/pleroma/http_test.exs +++ b/test/pleroma/http_test.exs @@ -37,6 +37,15 @@ defmodule Pleroma.HTTPTest do %{method: :get, url: "https://example.com/media/unicode%20%F0%9F%99%82%20.gif"} -> %Tesla.Env{status: 200, body: "unicode data"} + + %{method: :get, url: "https://i.guim.co.uk/img/media/1069ef13c447908272c4de94174cec2b6352cb2f/0_91_2000_1201/master/2000.jpg?width=1200&height=630&quality=85&auto=format&fit=crop&precrop=40:21,offset-x50,offset-y0&overlay-align=bottom%2Cleft&overlay-width=100p&overlay-base64=L2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctb3BpbmlvbnMtYWdlLTIwMTkucG5n&enable=upscale&s=cba21427a73512fdc9863c486c03fdd8"} -> + %Tesla.Env{status: 200, body: "Guardian image quirk"} + + %{method: :get, url: "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz"} -> + %Tesla.Env{status: 200, body: "Space in query with Guardian quirk"} + + %{method: :get, url: "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY%2F20130721%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host"} -> + %Tesla.Env{status: 200, body: "AWS S3 data"} end) :ok @@ -127,4 +136,62 @@ defmodule Pleroma.HTTPTest do assert result == "https://example.com/media/file%2520with%2520space.jpg" end + + test "properly applies Guardian image query quirk" do + clear_config(:test_url_encoding, true) + + url = "https://i.guim.co.uk/img/media/1069ef13c447908272c4de94174cec2b6352cb2f/0_91_2000_1201/master/2000.jpg?width=1200&height=630&quality=85&auto=format&fit=crop&precrop=40:21,offset-x50,offset-y0&overlay-align=bottom%2Cleft&overlay-width=100p&overlay-base64=L2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctb3BpbmlvbnMtYWdlLTIwMTkucG5n&enable=upscale&s=cba21427a73512fdc9863c486c03fdd8" + + result = HTTP.encode_url(url) + + assert result == url + + {:ok, result_get} = HTTP.get(result) + + assert result_get.status == 200 + end + + test "properly encodes spaces as \"pluses\" in query when using quirks" do + clear_config(:test_url_encoding, true) + + url = "https://example.com/emoji/Pack 1/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar baz" + + properly_encoded_url = "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz" + + result = HTTP.encode_url(url) + + assert result == properly_encoded_url + + {:ok, result_get} = HTTP.get(result) + + assert result_get.status == 200 + end + + test "properly encode AWS S3 queries" do + clear_config(:test_url_encoding, true) + + url = "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY%2F20130721%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host" + unencoded_url = "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY/20130721/us-east-1/s3/aws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host" + + result = HTTP.encode_url(url) + result_unencoded = HTTP.encode_url(unencoded_url) + + assert result == url + assert result == result_unencoded + + {:ok, result_get} = HTTP.get(result) + + assert result_get.status == 200 + + end + + test "preserves query key order" do + clear_config(:test_url_encoding, true) + + url = "https://example.com/foo?hjkl=qwertz&xyz=abc&bar=baz" + + result = HTTP.encode_url(url) + + assert result == url + end end From cfd2c08ef6f090a669dbb646b12d554f816b4c35 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Fri, 10 Oct 2025 22:37:16 +0200 Subject: [PATCH 11/22] lint --- lib/pleroma/http.ex | 33 +++++++++-------- test/pleroma/http_test.exs | 35 ++++++++++++++----- .../mrf/media_proxy_warming_policy_test.exs | 10 +++--- 3 files changed, 50 insertions(+), 28 deletions(-) diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index 561b015be..5ac7f96f9 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -146,8 +146,10 @@ defmodule Pleroma.HTTP do true -> URI.parse(url) |> then(fn parsed -> - path = encode_path(parsed.path, bypass_decode) - |> maybe_apply_path_encoding_quirks() + path = + encode_path(parsed.path, bypass_decode) + |> maybe_apply_path_encoding_quirks() + query = encode_query(parsed.query) %{parsed | path: path, query: query} @@ -197,7 +199,7 @@ defmodule Pleroma.HTTP do |> Enum.to_list() |> do_encode_query() end - + defp maybe_apply_path_encoding_quirks(path), do: path # Always uses www_form encoding @@ -221,19 +223,22 @@ defmodule Pleroma.HTTP do # URI.encode_query/2 does not appear to follow spec and encodes all parts of our URI path Constant. # This appears to work outside of edge-cases like The Guardian Rich Media Cards, # keeping behavior same as with URI.encode_query/2 unless otherwise specified via rules. - URI.encode_www_form(Kernel.to_string(key)) <> "=" <> - URI.encode(value, fn byte -> - URI.char_unreserved?(byte) || - Enum.any?( - rules, - fn char -> - char == byte - end) - end) - |> String.replace("%20", "+") + (URI.encode_www_form(Kernel.to_string(key)) <> + "=" <> + URI.encode(value, fn byte -> + URI.char_unreserved?(byte) || + Enum.any?( + rules, + fn char -> + char == byte + end + ) + end)) + |> String.replace("%20", "+") true -> - URI.encode_www_form(Kernel.to_string(key)) <> "=" <> URI.encode_www_form(Kernel.to_string(value)) + URI.encode_www_form(Kernel.to_string(key)) <> + "=" <> URI.encode_www_form(Kernel.to_string(value)) end end end diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs index fe6afea04..9bdb6ec35 100644 --- a/test/pleroma/http_test.exs +++ b/test/pleroma/http_test.exs @@ -38,13 +38,25 @@ defmodule Pleroma.HTTPTest do %{method: :get, url: "https://example.com/media/unicode%20%F0%9F%99%82%20.gif"} -> %Tesla.Env{status: 200, body: "unicode data"} - %{method: :get, url: "https://i.guim.co.uk/img/media/1069ef13c447908272c4de94174cec2b6352cb2f/0_91_2000_1201/master/2000.jpg?width=1200&height=630&quality=85&auto=format&fit=crop&precrop=40:21,offset-x50,offset-y0&overlay-align=bottom%2Cleft&overlay-width=100p&overlay-base64=L2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctb3BpbmlvbnMtYWdlLTIwMTkucG5n&enable=upscale&s=cba21427a73512fdc9863c486c03fdd8"} -> + %{ + method: :get, + url: + "https://i.guim.co.uk/img/media/1069ef13c447908272c4de94174cec2b6352cb2f/0_91_2000_1201/master/2000.jpg?width=1200&height=630&quality=85&auto=format&fit=crop&precrop=40:21,offset-x50,offset-y0&overlay-align=bottom%2Cleft&overlay-width=100p&overlay-base64=L2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctb3BpbmlvbnMtYWdlLTIwMTkucG5n&enable=upscale&s=cba21427a73512fdc9863c486c03fdd8" + } -> %Tesla.Env{status: 200, body: "Guardian image quirk"} - %{method: :get, url: "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz"} -> + %{ + method: :get, + url: + "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz" + } -> %Tesla.Env{status: 200, body: "Space in query with Guardian quirk"} - %{method: :get, url: "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY%2F20130721%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host"} -> + %{ + method: :get, + url: + "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY%2F20130721%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host" + } -> %Tesla.Env{status: 200, body: "AWS S3 data"} end) @@ -140,7 +152,8 @@ defmodule Pleroma.HTTPTest do test "properly applies Guardian image query quirk" do clear_config(:test_url_encoding, true) - url = "https://i.guim.co.uk/img/media/1069ef13c447908272c4de94174cec2b6352cb2f/0_91_2000_1201/master/2000.jpg?width=1200&height=630&quality=85&auto=format&fit=crop&precrop=40:21,offset-x50,offset-y0&overlay-align=bottom%2Cleft&overlay-width=100p&overlay-base64=L2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctb3BpbmlvbnMtYWdlLTIwMTkucG5n&enable=upscale&s=cba21427a73512fdc9863c486c03fdd8" + url = + "https://i.guim.co.uk/img/media/1069ef13c447908272c4de94174cec2b6352cb2f/0_91_2000_1201/master/2000.jpg?width=1200&height=630&quality=85&auto=format&fit=crop&precrop=40:21,offset-x50,offset-y0&overlay-align=bottom%2Cleft&overlay-width=100p&overlay-base64=L2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctb3BpbmlvbnMtYWdlLTIwMTkucG5n&enable=upscale&s=cba21427a73512fdc9863c486c03fdd8" result = HTTP.encode_url(url) @@ -154,9 +167,11 @@ defmodule Pleroma.HTTPTest do test "properly encodes spaces as \"pluses\" in query when using quirks" do clear_config(:test_url_encoding, true) - url = "https://example.com/emoji/Pack 1/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar baz" + url = + "https://example.com/emoji/Pack 1/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar baz" - properly_encoded_url = "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz" + properly_encoded_url = + "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz" result = HTTP.encode_url(url) @@ -170,8 +185,11 @@ defmodule Pleroma.HTTPTest do test "properly encode AWS S3 queries" do clear_config(:test_url_encoding, true) - url = "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY%2F20130721%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host" - unencoded_url = "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY/20130721/us-east-1/s3/aws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host" + url = + "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY%2F20130721%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host" + + unencoded_url = + "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY/20130721/us-east-1/s3/aws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host" result = HTTP.encode_url(url) result_unencoded = HTTP.encode_url(unencoded_url) @@ -182,7 +200,6 @@ defmodule Pleroma.HTTPTest do {:ok, result_get} = HTTP.get(result) assert result_get.status == 200 - end test "preserves query key order" do diff --git a/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs b/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs index 13b4d24b8..b589068c7 100644 --- a/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs +++ b/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs @@ -62,9 +62,9 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do get: fn _, _, _ -> {:ok, []} end, encode_url: fn url -> :meck.passthrough([url]) end, encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do - MediaProxyWarmingPolicy.filter(@message) + MediaProxyWarmingPolicy.filter(@message) - assert called(HTTP.get(:_, :_, :_)) + assert called(HTTP.get(:_, :_, :_)) end end @@ -92,9 +92,9 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do get: fn _, _, _ -> {:ok, []} end, encode_url: fn url -> :meck.passthrough([url]) end, encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do - MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history) + MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history) - assert called(HTTP.get(:_, :_, :_)) + assert called(HTTP.get(:_, :_, :_)) end end @@ -107,7 +107,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do get: fn _, _, _ -> {:ok, []} end, encode_url: fn url -> :meck.passthrough([url]) end, encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do - MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history |> Map.put("type", "Update")) + MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history |> Map.put("type", "Update")) assert called(HTTP.get(:_, :_, :_)) end From f36851acbd20904220384ccc8128ccbe55b62282 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Fri, 10 Oct 2025 22:41:24 +0200 Subject: [PATCH 12/22] credo lint --- lib/pleroma/http.ex | 7 ++++--- lib/pleroma/web/media_proxy.ex | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index 5ac7f96f9..0d9f28420 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -220,9 +220,10 @@ defmodule Pleroma.HTTP do defp query_encode_kv_pair({key, value}, rules \\ []) when is_list(rules) do cond do length(rules) > 0 -> - # URI.encode_query/2 does not appear to follow spec and encodes all parts of our URI path Constant. - # This appears to work outside of edge-cases like The Guardian Rich Media Cards, - # keeping behavior same as with URI.encode_query/2 unless otherwise specified via rules. + # URI.encode_query/2 does not appear to follow spec and encodes all part + # of our URI path Constant. This appears to work outside of edge-cases + # like The Guardian Rich Media Cards, keeping behavior same as with + # URI.encode_query/2 unless otherwise specified via rules. (URI.encode_www_form(Kernel.to_string(key)) <> "=" <> URI.encode(value, fn byte -> diff --git a/lib/pleroma/web/media_proxy.ex b/lib/pleroma/web/media_proxy.ex index a29debbfc..22f37eea8 100644 --- a/lib/pleroma/web/media_proxy.ex +++ b/lib/pleroma/web/media_proxy.ex @@ -101,10 +101,11 @@ defmodule Pleroma.Web.MediaProxy do end # The URL coming into MediaProxy from the outside might have wrong %-encoding - # (like older Pleroma versions) + # (like older Pleroma versions). # This would cause an inconsistency with the encoded URL here and the requested # URL fixed with Pleroma.Tesla.Middleware.EncodeUrl. - # End result is a failing HEAD request in Pleroma.Web.MediaProxy.MediaProxyController.handle_preview/2 + # End result is a failing HEAD request in + # Pleroma.Web.MediaProxy.MediaProxyController.handle_preview/2 def encode_url(url) do url = HTTP.encode_url(url) {base64, sig64} = base64_sig64(url) From 6487c93c476aef40c78ad678e0e8352e747cd9a5 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Fri, 10 Oct 2025 23:11:10 +0200 Subject: [PATCH 13/22] credo lint 2 --- lib/pleroma/http.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index 0d9f28420..7d321de62 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -222,7 +222,7 @@ defmodule Pleroma.HTTP do length(rules) > 0 -> # URI.encode_query/2 does not appear to follow spec and encodes all part # of our URI path Constant. This appears to work outside of edge-cases - # like The Guardian Rich Media Cards, keeping behavior same as with + # like The Guardian Rich Media Cards, keeping behavior same as with # URI.encode_query/2 unless otherwise specified via rules. (URI.encode_www_form(Kernel.to_string(key)) <> "=" <> From f290b159875b68a4ee03ac9f9ced80242ee7085a Mon Sep 17 00:00:00 2001 From: Phantasm Date: Mon, 20 Oct 2025 22:10:28 +0200 Subject: [PATCH 14/22] Move custom URI encoding functions to Pleroma.Utils.URIEncoding --- lib/pleroma/http.ex | 111 ---------------- lib/pleroma/reverse_proxy.ex | 8 +- lib/pleroma/tesla/middleware/encode_url.ex | 2 +- lib/pleroma/upload.ex | 6 +- lib/pleroma/utils/uri_encoding.ex | 121 ++++++++++++++++++ lib/pleroma/web/media_proxy.ex | 6 +- test/pleroma/http_test.exs | 17 ++- .../mrf/media_proxy_warming_policy_test.exs | 15 +-- 8 files changed, 146 insertions(+), 140 deletions(-) create mode 100644 lib/pleroma/utils/uri_encoding.ex diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index 7d321de62..bdeb2171e 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -14,7 +14,6 @@ defmodule Pleroma.HTTP do alias Tesla.Env require Logger - require Pleroma.Constants @type t :: __MODULE__ @type method() :: :get | :post | :put | :delete | :head @@ -132,114 +131,4 @@ defmodule Pleroma.HTTP do defp default_middleware, do: [Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.EncodeUrl] - - # We don't always want to decode the path first, like is the case in - # Pleroma.Upload.url_from_spec/3. - def encode_url(url, opts \\ []) when is_binary(url) and is_list(opts) do - bypass_parse = Keyword.get(opts, :bypass_parse, false) - bypass_decode = Keyword.get(opts, :bypass_decode, false) - - cond do - bypass_parse -> - encode_path(url, bypass_decode) - - true -> - URI.parse(url) - |> then(fn parsed -> - path = - encode_path(parsed.path, bypass_decode) - |> maybe_apply_path_encoding_quirks() - - query = encode_query(parsed.query) - - %{parsed | path: path, query: query} - end) - |> URI.to_string() - end - end - - defp encode_path(nil, _bypass_decode), do: nil - - # URI.encode/2 deliberately does not encode all chars that are forbidden - # in the path component of a URI. It only encodes chars that are forbidden - # in the whole URI. A predicate in the 2nd argument is used to fix that here. - # URI.encode/2 uses the predicate function to determine whether each byte - # (in an integer representation) should be encoded or not. - defp encode_path(path, bypass_decode) when is_binary(path) do - path = - cond do - bypass_decode -> - path - - true -> - URI.decode(path) - end - - path - |> URI.encode(fn byte -> - URI.char_unreserved?(byte) || - Enum.any?( - Pleroma.Constants.uri_path_allowed_reserved_chars(), - fn char -> - char == byte - end - ) - end) - end - - defp encode_query(nil), do: nil - - # Order of kv pairs in query is not preserved when using URI.decode_query. - # URI.query_decoder/2 returns a stream which so far appears to not change order. - # Immediately switch to a list to prevent breakage for sites that expect - # the order of query keys to be always the same. - defp encode_query(query) when is_binary(query) do - query - |> URI.query_decoder() - |> Enum.to_list() - |> do_encode_query() - end - - defp maybe_apply_path_encoding_quirks(path), do: path - - # Always uses www_form encoding - defp do_encode_query(enumerable) do - Enum.map_join(enumerable, "&", &maybe_apply_query_quirk(&1)) - end - - defp maybe_apply_query_quirk({key, value}) do - case key do - "precrop" -> - query_encode_kv_pair({key, value}, ~c":,") - - key -> - query_encode_kv_pair({key, value}) - end - end - - defp query_encode_kv_pair({key, value}, rules \\ []) when is_list(rules) do - cond do - length(rules) > 0 -> - # URI.encode_query/2 does not appear to follow spec and encodes all part - # of our URI path Constant. This appears to work outside of edge-cases - # like The Guardian Rich Media Cards, keeping behavior same as with - # URI.encode_query/2 unless otherwise specified via rules. - (URI.encode_www_form(Kernel.to_string(key)) <> - "=" <> - URI.encode(value, fn byte -> - URI.char_unreserved?(byte) || - Enum.any?( - rules, - fn char -> - char == byte - end - ) - end)) - |> String.replace("%20", "+") - - true -> - URI.encode_www_form(Kernel.to_string(key)) <> - "=" <> URI.encode_www_form(Kernel.to_string(value)) - end - end end diff --git a/lib/pleroma/reverse_proxy.ex b/lib/pleroma/reverse_proxy.ex index 6c8163f2a..bb55a4984 100644 --- a/lib/pleroma/reverse_proxy.ex +++ b/lib/pleroma/reverse_proxy.ex @@ -3,6 +3,8 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.ReverseProxy do + alias Pleroma.Utils.URIEncoding + @range_headers ~w(range if-range) @keep_req_headers ~w(accept accept-encoding cache-control if-modified-since) ++ ~w(if-unmodified-since if-none-match) ++ @range_headers @@ -460,9 +462,9 @@ defmodule Pleroma.ReverseProxy do # Also do it for test environment defp maybe_encode_url(url) do case Application.get_env(:tesla, :adapter) do - Tesla.Adapter.Hackney -> Pleroma.HTTP.encode_url(url) - {Tesla.Adapter.Finch, _} -> Pleroma.HTTP.encode_url(url) - Tesla.Mock -> Pleroma.HTTP.encode_url(url) + Tesla.Adapter.Hackney -> URIEncoding.encode_url(url) + {Tesla.Adapter.Finch, _} -> URIEncoding.encode_url(url) + Tesla.Mock -> URIEncoding.encode_url(url) _ -> url end end diff --git a/lib/pleroma/tesla/middleware/encode_url.ex b/lib/pleroma/tesla/middleware/encode_url.ex index 32c559d3b..677577c2f 100644 --- a/lib/pleroma/tesla/middleware/encode_url.ex +++ b/lib/pleroma/tesla/middleware/encode_url.ex @@ -17,7 +17,7 @@ defmodule Pleroma.Tesla.Middleware.EncodeUrl do @impl Tesla.Middleware def call(%Tesla.Env{url: url} = env, next, _) do - url = Pleroma.HTTP.encode_url(url) + url = Pleroma.Utils.URIEncoding.encode_url(url) env = %{env | url: url} diff --git a/lib/pleroma/upload.ex b/lib/pleroma/upload.ex index de8949848..06d8005bc 100644 --- a/lib/pleroma/upload.ex +++ b/lib/pleroma/upload.ex @@ -34,8 +34,8 @@ defmodule Pleroma.Upload do """ alias Ecto.UUID - alias Pleroma.HTTP alias Pleroma.Maps + alias Pleroma.Utils.URIEncoding alias Pleroma.Web.ActivityPub.Utils require Logger @@ -234,12 +234,12 @@ defmodule Pleroma.Upload do # Encoding the whole path here is fine since the path is in a # UUID/ form. # The file at this point isn't %-encoded, so the path shouldn't - # be decoded first like Pleroma.HTTP.encode_url/1 does. + # be decoded first like Pleroma.Utils.URIEncoding.encode_url/1 does. defp url_from_spec(%__MODULE__{name: name}, base_url, {:file, path}) do encode_opts = [bypass_decode: true, bypass_parse: true] path = - HTTP.encode_url(path, encode_opts) <> + URIEncoding.encode_url(path, encode_opts) <> if Pleroma.Config.get([__MODULE__, :link_name], false) do enum = %{name: name} "?#{URI.encode_query(enum)}" diff --git a/lib/pleroma/utils/uri_encoding.ex b/lib/pleroma/utils/uri_encoding.ex new file mode 100644 index 000000000..0a61671c3 --- /dev/null +++ b/lib/pleroma/utils/uri_encoding.ex @@ -0,0 +1,121 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2025 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Utils.URIEncoding do + @moduledoc """ + Utility functions for dealing with URI encoding of paths and queries + with support for query-encoding quirks. + """ + require Pleroma.Constants + + # We don't always want to decode the path first, like is the case in + # Pleroma.Upload.url_from_spec/3. + def encode_url(url, opts \\ []) when is_binary(url) and is_list(opts) do + bypass_parse = Keyword.get(opts, :bypass_parse, false) + bypass_decode = Keyword.get(opts, :bypass_decode, false) + + cond do + bypass_parse -> + encode_path(url, bypass_decode) + + true -> + URI.parse(url) + |> then(fn parsed -> + path = + encode_path(parsed.path, bypass_decode) + |> maybe_apply_path_encoding_quirks() + + query = encode_query(parsed.query) + + %{parsed | path: path, query: query} + end) + |> URI.to_string() + end + end + + defp encode_path(nil, _bypass_decode), do: nil + + # URI.encode/2 deliberately does not encode all chars that are forbidden + # in the path component of a URI. It only encodes chars that are forbidden + # in the whole URI. A predicate in the 2nd argument is used to fix that here. + # URI.encode/2 uses the predicate function to determine whether each byte + # (in an integer representation) should be encoded or not. + defp encode_path(path, bypass_decode) when is_binary(path) do + path = + cond do + bypass_decode -> + path + + true -> + URI.decode(path) + end + + path + |> URI.encode(fn byte -> + URI.char_unreserved?(byte) || + Enum.any?( + Pleroma.Constants.uri_path_allowed_reserved_chars(), + fn char -> + char == byte + end + ) + end) + end + + defp encode_query(nil), do: nil + + # Order of kv pairs in query is not preserved when using URI.decode_query. + # URI.query_decoder/2 returns a stream which so far appears to not change order. + # Immediately switch to a list to prevent breakage for sites that expect + # the order of query keys to be always the same. + defp encode_query(query) when is_binary(query) do + query + |> URI.query_decoder() + |> Enum.to_list() + |> do_encode_query() + end + + defp maybe_apply_path_encoding_quirks(path), do: path + + # Always uses www_form encoding + defp do_encode_query(enumerable) do + Enum.map_join(enumerable, "&", &maybe_apply_query_quirk(&1)) + end + + defp maybe_apply_query_quirk({key, value}) do + case key do + "precrop" -> + query_encode_kv_pair({key, value}, ~c":,") + + key -> + query_encode_kv_pair({key, value}) + end + end + + defp query_encode_kv_pair({key, value}, rules \\ []) when is_list(rules) do + cond do + length(rules) > 0 -> + # URI.encode_query/2 does not appear to follow spec and encodes all parts + # of our URI path Constant. This appears to work outside of edge-cases + # like The Guardian Rich Media Cards, keeping behavior same as with + # URI.encode_query/2 unless otherwise specified via rules. + (URI.encode_www_form(Kernel.to_string(key)) <> + "=" <> + URI.encode(value, fn byte -> + URI.char_unreserved?(byte) || + Enum.any?( + rules, + fn char -> + char == byte + end + ) + end)) + |> String.replace("%20", "+") + + true -> + URI.encode_www_form(Kernel.to_string(key)) <> + "=" <> URI.encode_www_form(Kernel.to_string(value)) + end + end +end diff --git a/lib/pleroma/web/media_proxy.ex b/lib/pleroma/web/media_proxy.ex index 22f37eea8..f9376b508 100644 --- a/lib/pleroma/web/media_proxy.ex +++ b/lib/pleroma/web/media_proxy.ex @@ -5,8 +5,8 @@ defmodule Pleroma.Web.MediaProxy do alias Pleroma.Config alias Pleroma.Helpers.UriHelper - alias Pleroma.HTTP alias Pleroma.Upload + alias Pleroma.Utils.URIEncoding alias Pleroma.Web.Endpoint alias Pleroma.Web.MediaProxy.Invalidation @@ -107,14 +107,14 @@ defmodule Pleroma.Web.MediaProxy do # End result is a failing HEAD request in # Pleroma.Web.MediaProxy.MediaProxyController.handle_preview/2 def encode_url(url) do - url = HTTP.encode_url(url) + url = URIEncoding.encode_url(url) {base64, sig64} = base64_sig64(url) build_url(sig64, base64, filename(url)) end def encode_preview_url(url, preview_params \\ []) do - url = HTTP.encode_url(url) + url = URIEncoding.encode_url(url) {base64, sig64} = base64_sig64(url) build_preview_url(sig64, base64, filename(url), preview_params) diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs index 9bdb6ec35..12b040833 100644 --- a/test/pleroma/http_test.exs +++ b/test/pleroma/http_test.exs @@ -5,8 +5,11 @@ defmodule Pleroma.HTTPTest do use ExUnit.Case, async: true use Pleroma.Tests.Helpers + import Tesla.Mock + alias Pleroma.HTTP + alias Pleroma.Utils.URIEncoding setup do mock(fn @@ -134,7 +137,7 @@ defmodule Pleroma.HTTPTest do normal_url = "https://example.com/media/file%20with%20space.jpg?name=a+space.jpg" - result = HTTP.encode_url(normal_url) + result = URIEncoding.encode_url(normal_url) assert result == "https://example.com/media/file%20with%20space.jpg?name=a+space.jpg" end @@ -144,7 +147,7 @@ defmodule Pleroma.HTTPTest do normal_url = "https://example.com/media/file%20with%20space.jpg" - result = HTTP.encode_url(normal_url, bypass_decode: true) + result = URIEncoding.encode_url(normal_url, bypass_decode: true) assert result == "https://example.com/media/file%2520with%2520space.jpg" end @@ -155,7 +158,7 @@ defmodule Pleroma.HTTPTest do url = "https://i.guim.co.uk/img/media/1069ef13c447908272c4de94174cec2b6352cb2f/0_91_2000_1201/master/2000.jpg?width=1200&height=630&quality=85&auto=format&fit=crop&precrop=40:21,offset-x50,offset-y0&overlay-align=bottom%2Cleft&overlay-width=100p&overlay-base64=L2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctb3BpbmlvbnMtYWdlLTIwMTkucG5n&enable=upscale&s=cba21427a73512fdc9863c486c03fdd8" - result = HTTP.encode_url(url) + result = URIEncoding.encode_url(url) assert result == url @@ -173,7 +176,7 @@ defmodule Pleroma.HTTPTest do properly_encoded_url = "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz" - result = HTTP.encode_url(url) + result = URIEncoding.encode_url(url) assert result == properly_encoded_url @@ -191,8 +194,8 @@ defmodule Pleroma.HTTPTest do unencoded_url = "https://examplebucket.s3.amazonaws.com/test.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=accessKEY/20130721/us-east-1/s3/aws4_request&X-Amz-Date=20130721T201207Z&X-Amz-Expires=86400&X-Amz-Signature=SIGNATURE&X-Amz-SignedHeaders=host" - result = HTTP.encode_url(url) - result_unencoded = HTTP.encode_url(unencoded_url) + result = URIEncoding.encode_url(url) + result_unencoded = URIEncoding.encode_url(unencoded_url) assert result == url assert result == result_unencoded @@ -207,7 +210,7 @@ defmodule Pleroma.HTTPTest do url = "https://example.com/foo?hjkl=qwertz&xyz=abc&bar=baz" - result = HTTP.encode_url(url) + result = URIEncoding.encode_url(url) assert result == url end diff --git a/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs b/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs index b589068c7..0da3afa3b 100644 --- a/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs +++ b/test/pleroma/web/activity_pub/mrf/media_proxy_warming_policy_test.exs @@ -58,10 +58,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do {:ok, %Tesla.Env{status: 200, body: ""}} end) - with_mock HTTP, - get: fn _, _, _ -> {:ok, []} end, - encode_url: fn url -> :meck.passthrough([url]) end, - encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do + with_mock HTTP, get: fn _, _, _ -> {:ok, []} end do MediaProxyWarmingPolicy.filter(@message) assert called(HTTP.get(:_, :_, :_)) @@ -88,10 +85,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do {:ok, %Tesla.Env{status: 200, body: ""}} end) - with_mock HTTP, - get: fn _, _, _ -> {:ok, []} end, - encode_url: fn url -> :meck.passthrough([url]) end, - encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do + with_mock HTTP, get: fn _, _, _ -> {:ok, []} end do MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history) assert called(HTTP.get(:_, :_, :_)) @@ -103,10 +97,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicyTest do {:ok, %Tesla.Env{status: 200, body: ""}} end) - with_mock HTTP, - get: fn _, _, _ -> {:ok, []} end, - encode_url: fn url -> :meck.passthrough([url]) end, - encode_url: fn url, opts -> :meck.passthrough([url, opts]) end do + with_mock HTTP, get: fn _, _, _ -> {:ok, []} end do MRF.filter_one(MediaProxyWarmingPolicy, @message_with_history |> Map.put("type", "Update")) assert called(HTTP.get(:_, :_, :_)) From c31454fac1cf15916a406f9e7cc5c3deaf7514a5 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Mon, 20 Oct 2025 22:44:42 +0200 Subject: [PATCH 15/22] Fix unicode URL encoding test --- test/pleroma/web/media_proxy_test.exs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/pleroma/web/media_proxy_test.exs b/test/pleroma/web/media_proxy_test.exs index 7a845b7a7..ee9c01a08 100644 --- a/test/pleroma/web/media_proxy_test.exs +++ b/test/pleroma/web/media_proxy_test.exs @@ -182,11 +182,13 @@ defmodule Pleroma.Web.MediaProxyTest do assert decode_result(encoded) == url end - test "preserve unicode characters" do + # Improperly encoded URLs should not happen even when input was wrong. + test "does not preserve unicode characters" do url = "https://ko.wikipedia.org/wiki/위키백과:대문" + encoded_url = "https://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8" encoded = MediaProxy.url(url) - assert decode_result(encoded) == url + assert decode_result(encoded) == encoded_url end end From bfe8372ad2de4d827eac89458f93620be5b05542 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Mon, 20 Oct 2025 22:46:01 +0200 Subject: [PATCH 16/22] Remove "preserve ASCII encoding" test in MediaProxy issue 580: Should not happen again, tested in HTTPTest issue 1055: Fixed with quirk support in query encoding, tested in HTTPTest --- test/pleroma/web/media_proxy_test.exs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/test/pleroma/web/media_proxy_test.exs b/test/pleroma/web/media_proxy_test.exs index ee9c01a08..f227b9ac1 100644 --- a/test/pleroma/web/media_proxy_test.exs +++ b/test/pleroma/web/media_proxy_test.exs @@ -159,18 +159,6 @@ defmodule Pleroma.Web.MediaProxyTest do assert String.starts_with?(encoded, base_url) end - # Some sites expect ASCII encoded characters in the URL to be preserved even if - # unnecessary. - # Issues: https://git.pleroma.social/pleroma/pleroma/issues/580 - # https://git.pleroma.social/pleroma/pleroma/issues/1055 - test "preserve ASCII encoding" do - url = - "https://pleroma.com/%20/%21/%22/%23/%24/%25/%26/%27/%28/%29/%2A/%2B/%2C/%2D/%2E/%2F/%30/%31/%32/%33/%34/%35/%36/%37/%38/%39/%3A/%3B/%3C/%3D/%3E/%3F/%40/%41/%42/%43/%44/%45/%46/%47/%48/%49/%4A/%4B/%4C/%4D/%4E/%4F/%50/%51/%52/%53/%54/%55/%56/%57/%58/%59/%5A/%5B/%5C/%5D/%5E/%5F/%60/%61/%62/%63/%64/%65/%66/%67/%68/%69/%6A/%6B/%6C/%6D/%6E/%6F/%70/%71/%72/%73/%74/%75/%76/%77/%78/%79/%7A/%7B/%7C/%7D/%7E/%7F/%80/%81/%82/%83/%84/%85/%86/%87/%88/%89/%8A/%8B/%8C/%8D/%8E/%8F/%90/%91/%92/%93/%94/%95/%96/%97/%98/%99/%9A/%9B/%9C/%9D/%9E/%9F/%C2%A0/%A1/%A2/%A3/%A4/%A5/%A6/%A7/%A8/%A9/%AA/%AB/%AC/%C2%AD/%AE/%AF/%B0/%B1/%B2/%B3/%B4/%B5/%B6/%B7/%B8/%B9/%BA/%BB/%BC/%BD/%BE/%BF/%C0/%C1/%C2/%C3/%C4/%C5/%C6/%C7/%C8/%C9/%CA/%CB/%CC/%CD/%CE/%CF/%D0/%D1/%D2/%D3/%D4/%D5/%D6/%D7/%D8/%D9/%DA/%DB/%DC/%DD/%DE/%DF/%E0/%E1/%E2/%E3/%E4/%E5/%E6/%E7/%E8/%E9/%EA/%EB/%EC/%ED/%EE/%EF/%F0/%F1/%F2/%F3/%F4/%F5/%F6/%F7/%F8/%F9/%FA/%FB/%FC/%FD/%FE/%FF" - - encoded = MediaProxy.url(url) - assert decode_result(encoded) == url - end - # This includes unsafe/reserved characters which are not interpreted as part of the URL # and would otherwise have to be ASCII encoded. It is our role to ensure the proxied URL # is unmodified, so we are testing these characters anyway. From 0935823be98822d06a35c454bf58ab996e6f700b Mon Sep 17 00:00:00 2001 From: Phantasm Date: Mon, 20 Oct 2025 23:30:38 +0200 Subject: [PATCH 17/22] Add test for mangling incorrect URL in MediaProxy link generation --- test/pleroma/web/media_proxy_test.exs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/test/pleroma/web/media_proxy_test.exs b/test/pleroma/web/media_proxy_test.exs index f227b9ac1..86d30c04c 100644 --- a/test/pleroma/web/media_proxy_test.exs +++ b/test/pleroma/web/media_proxy_test.exs @@ -173,11 +173,28 @@ defmodule Pleroma.Web.MediaProxyTest do # Improperly encoded URLs should not happen even when input was wrong. test "does not preserve unicode characters" do url = "https://ko.wikipedia.org/wiki/위키백과:대문" - encoded_url = "https://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8" + + encoded_url = + "https://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8" encoded = MediaProxy.url(url) assert decode_result(encoded) == encoded_url end + + # If we preserve wrongly encoded URLs in MediaProxy, it will get fixed + # when we GET these URLs and will result in 424 when MediaProxy previews are enabled. + test "does not preserve incorrect URLs when making MediaProxy link" do + incorrect_original_url = "https://example.com/media/cofe%20%28with%20milk%29.png" + corrected_original_url = "https://example.com/media/cofe%20(with%20milk).png" + + unpreserved_encoded_original_url = + "http://localhost:4001/proxy/Sv6tt6xjA72_i4d8gXbuMAOXQSs/aHR0cHM6Ly9leGFtcGxlLmNvbS9tZWRpYS9jb2ZlJTIwKHdpdGglMjBtaWxrKS5wbmc/cofe%20(with%20milk).png" + + encoded = MediaProxy.url(incorrect_original_url) + + assert encoded == unpreserved_encoded_original_url + assert decode_result(encoded) == corrected_original_url + end end describe "when disabled" do From bcdd78fba513146a297fa5ece73f059f95f29561 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Mon, 20 Oct 2025 23:34:19 +0200 Subject: [PATCH 18/22] Add changelog --- changelog.d/url-encoding-pt2.fix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/url-encoding-pt2.fix diff --git a/changelog.d/url-encoding-pt2.fix b/changelog.d/url-encoding-pt2.fix new file mode 100644 index 000000000..bc6857e02 --- /dev/null +++ b/changelog.d/url-encoding-pt2.fix @@ -0,0 +1 @@ +Fix sometimes incorrect URI percent encoding From 07ba3bb829f8f501b3e98dd48c72e85fe543bc1e Mon Sep 17 00:00:00 2001 From: Phantasm Date: Mon, 20 Oct 2025 23:36:38 +0200 Subject: [PATCH 19/22] Remove "support" for path encoding quirks Currently there isn't any known quirk that would be needed and this is just dead code that does nothing. --- lib/pleroma/utils/uri_encoding.ex | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lib/pleroma/utils/uri_encoding.ex b/lib/pleroma/utils/uri_encoding.ex index 0a61671c3..f8a7ba49c 100644 --- a/lib/pleroma/utils/uri_encoding.ex +++ b/lib/pleroma/utils/uri_encoding.ex @@ -22,9 +22,7 @@ defmodule Pleroma.Utils.URIEncoding do true -> URI.parse(url) |> then(fn parsed -> - path = - encode_path(parsed.path, bypass_decode) - |> maybe_apply_path_encoding_quirks() + path = encode_path(parsed.path, bypass_decode) query = encode_query(parsed.query) @@ -76,8 +74,6 @@ defmodule Pleroma.Utils.URIEncoding do |> do_encode_query() end - defp maybe_apply_path_encoding_quirks(path), do: path - # Always uses www_form encoding defp do_encode_query(enumerable) do Enum.map_join(enumerable, "&", &maybe_apply_query_quirk(&1)) From 0f7ad318d354c82a5727a62da377084edeeb37d9 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Tue, 21 Oct 2025 00:27:12 +0200 Subject: [PATCH 20/22] Add encode_url @spec and docs, and a check whether opts are booleans --- lib/pleroma/utils/uri_encoding.ex | 42 +++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/lib/pleroma/utils/uri_encoding.ex b/lib/pleroma/utils/uri_encoding.ex index f8a7ba49c..e37ade0b6 100644 --- a/lib/pleroma/utils/uri_encoding.ex +++ b/lib/pleroma/utils/uri_encoding.ex @@ -11,24 +11,44 @@ defmodule Pleroma.Utils.URIEncoding do # We don't always want to decode the path first, like is the case in # Pleroma.Upload.url_from_spec/3. + @doc """ + Wraps URI encoding/decoding functions from Elixir's standard library to fix usually unintended side-effects. + + Supports two URL processing options in the optional 2nd argument with the default being `false`: + + * `bypass_parse` - Bypasses `URI.parse` stage, useful when it's not desirable to parse to URL first + before encoding it. Supports only encoding as the Path segment of a URI. + * `bypass_decode` - Bypasses `URI.decode` stage for the Path segment of a URI. Used when a URL + has to be double %-encoded for internal reasons. + + Options must be specified as a Keyword with tuples with booleans, otherwise + `{:error, :invalid_opts}` is returned. Example: + `encode_url(url, [bypass_parse: true, bypass_decode: true])` + """ + @spec encode_url(String.t(), Keyword.t()) :: String.t() | {:error, :invalid_opts} def encode_url(url, opts \\ []) when is_binary(url) and is_list(opts) do bypass_parse = Keyword.get(opts, :bypass_parse, false) bypass_decode = Keyword.get(opts, :bypass_decode, false) - cond do - bypass_parse -> - encode_path(url, bypass_decode) + with true <- is_boolean(bypass_parse), + true <- is_boolean(bypass_decode) do + cond do + bypass_parse -> + encode_path(url, bypass_decode) - true -> - URI.parse(url) - |> then(fn parsed -> - path = encode_path(parsed.path, bypass_decode) + true -> + URI.parse(url) + |> then(fn parsed -> + path = encode_path(parsed.path, bypass_decode) - query = encode_query(parsed.query) + query = encode_query(parsed.query) - %{parsed | path: path, query: query} - end) - |> URI.to_string() + %{parsed | path: path, query: query} + end) + |> URI.to_string() + end + else + _ -> {:error, :invalid_opts} end end From 73b337245bcc2b3f3809bcc476021f64943bdbcf Mon Sep 17 00:00:00 2001 From: Phantasm Date: Tue, 21 Oct 2025 00:50:08 +0200 Subject: [PATCH 21/22] Make URI encoding query quirks host-aware --- lib/pleroma/utils/uri_encoding.ex | 23 ++++++++++++++--------- test/pleroma/http_test.exs | 6 +++--- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/lib/pleroma/utils/uri_encoding.ex b/lib/pleroma/utils/uri_encoding.ex index e37ade0b6..dc6b387fa 100644 --- a/lib/pleroma/utils/uri_encoding.ex +++ b/lib/pleroma/utils/uri_encoding.ex @@ -41,7 +41,7 @@ defmodule Pleroma.Utils.URIEncoding do |> then(fn parsed -> path = encode_path(parsed.path, bypass_decode) - query = encode_query(parsed.query) + query = encode_query(parsed.query, parsed.host) %{parsed | path: path, query: query} end) @@ -81,25 +81,27 @@ defmodule Pleroma.Utils.URIEncoding do end) end - defp encode_query(nil), do: nil - # Order of kv pairs in query is not preserved when using URI.decode_query. # URI.query_decoder/2 returns a stream which so far appears to not change order. # Immediately switch to a list to prevent breakage for sites that expect # the order of query keys to be always the same. - defp encode_query(query) when is_binary(query) do + defp encode_query(query, host) when is_binary(query) do query |> URI.query_decoder() |> Enum.to_list() - |> do_encode_query() + |> do_encode_query(host) end - # Always uses www_form encoding - defp do_encode_query(enumerable) do - Enum.map_join(enumerable, "&", &maybe_apply_query_quirk(&1)) + defp encode_query(nil, _), do: nil + + # Always uses www_form encoding. + # Taken from Elixir's URI module. + defp do_encode_query(enumerable, host) do + Enum.map_join(enumerable, "&", &maybe_apply_query_quirk(&1, host)) end - defp maybe_apply_query_quirk({key, value}) do + # https://git.pleroma.social/pleroma/pleroma/-/issues/1055 + defp maybe_apply_query_quirk({key, value}, "i.guim.co.uk" = _host) do case key do "precrop" -> query_encode_kv_pair({key, value}, ~c":,") @@ -109,6 +111,9 @@ defmodule Pleroma.Utils.URIEncoding do end end + defp maybe_apply_query_quirk({key, value}, _), do: query_encode_kv_pair({key, value}) + + # Taken from Elixir's URI module and modified to support quirks. defp query_encode_kv_pair({key, value}, rules \\ []) when is_list(rules) do cond do length(rules) > 0 -> diff --git a/test/pleroma/http_test.exs b/test/pleroma/http_test.exs index 12b040833..7b6847cf9 100644 --- a/test/pleroma/http_test.exs +++ b/test/pleroma/http_test.exs @@ -51,7 +51,7 @@ defmodule Pleroma.HTTPTest do %{ method: :get, url: - "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz" + "https://i.guim.co.uk/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz" } -> %Tesla.Env{status: 200, body: "Space in query with Guardian quirk"} @@ -171,10 +171,10 @@ defmodule Pleroma.HTTPTest do clear_config(:test_url_encoding, true) url = - "https://example.com/emoji/Pack 1/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar baz" + "https://i.guim.co.uk/emoji/Pack 1/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar baz" properly_encoded_url = - "https://example.com/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz" + "https://i.guim.co.uk/emoji/Pack%201/koronebless.png?precrop=40:21,overlay-x0,overlay-y0&foo=bar+baz" result = URIEncoding.encode_url(url) From 7d8a18896742f65f2d5d4aeb4383ab7273900b05 Mon Sep 17 00:00:00 2001 From: Phantasm Date: Mon, 27 Oct 2025 18:49:34 +0100 Subject: [PATCH 22/22] Disable Hackney URL encoding function Hackney interferes with out URI encoding and implements older RFC 2396 instead of RFC 3986 which we and Elixir implement. As an example "'" and "!" will get encoded by it and cause problems with our MediaProxy making unexpected 302 redirects. If an admin supplies a different function via *.secret.exs, we don't override it. https://github.com/benoitc/hackney/issues/399 --- lib/pleroma/http/adapter_helper/hackney.ex | 5 +++++ lib/pleroma/reverse_proxy/client/hackney.ex | 5 +++++ test/pleroma/reverse_proxy_test.exs | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/http/adapter_helper/hackney.ex b/lib/pleroma/http/adapter_helper/hackney.ex index f3be1f3d0..f3451cf9c 100644 --- a/lib/pleroma/http/adapter_helper/hackney.ex +++ b/lib/pleroma/http/adapter_helper/hackney.ex @@ -16,7 +16,12 @@ defmodule Pleroma.HTTP.AdapterHelper.Hackney do config_opts = Pleroma.Config.get([:http, :adapter], []) + url_encoding = + Keyword.new() + |> Keyword.put(:path_encode_fun, fn path -> path end) + @defaults + |> Keyword.merge(url_encoding) |> Keyword.merge(config_opts) |> Keyword.merge(connection_opts) |> add_scheme_opts(uri) diff --git a/lib/pleroma/reverse_proxy/client/hackney.ex b/lib/pleroma/reverse_proxy/client/hackney.ex index d3e986912..0aa5f5715 100644 --- a/lib/pleroma/reverse_proxy/client/hackney.ex +++ b/lib/pleroma/reverse_proxy/client/hackney.ex @@ -7,6 +7,11 @@ defmodule Pleroma.ReverseProxy.Client.Hackney do @impl true def request(method, url, headers, body, opts \\ []) do + opts = + Keyword.put_new(opts, :path_encode_fun, fn path -> + path + end) + :hackney.request(method, url, headers, body, opts) end diff --git a/test/pleroma/reverse_proxy_test.exs b/test/pleroma/reverse_proxy_test.exs index 3595568cf..8dbe9c6bf 100644 --- a/test/pleroma/reverse_proxy_test.exs +++ b/test/pleroma/reverse_proxy_test.exs @@ -396,7 +396,7 @@ defmodule Pleroma.ReverseProxyTest do end end - # Hackey is used for Reverse Proxy when Hackney or Finch is the Tesla Adapter + # Hackney is used for Reverse Proxy when Hackney or Finch is the Tesla Adapter # Gun is able to proxy through Tesla, so it does not need testing as the # test cases in the Pleroma.HTTPTest module are sufficient describe "Hackney URL encoding:" do