diff --git a/changelog.d/truncate-rich-media.change b/changelog.d/truncate-rich-media.change
new file mode 100644
index 000000000..1df064be1
--- /dev/null
+++ b/changelog.d/truncate-rich-media.change
@@ -0,0 +1 @@
+Truncate the length of Rich Media title and description fields
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex
index a3a522d7a..9c8ec7a9f 100644
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@@ -4,6 +4,7 @@
defmodule Pleroma.Web.RichMedia.Parser do
alias Pleroma.Web.RichMedia.Helpers
+ import Pleroma.Web.Metadata.Utils, only: [scrub_html_and_truncate: 2]
require Logger
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
@@ -63,8 +64,20 @@ defmodule Pleroma.Web.RichMedia.Parser do
not match?({:ok, _}, Jason.encode(%{key => val}))
end)
|> Map.new()
+ |> truncate_title()
+ |> truncate_desc()
end
+ defp truncate_title(%{"title" => title} = data) when is_binary(title),
+ do: %{data | "title" => scrub_html_and_truncate(title, 120)}
+
+ defp truncate_title(data), do: data
+
+ defp truncate_desc(%{"description" => desc} = data) when is_binary(desc),
+ do: %{data | "description" => scrub_html_and_truncate(desc, 200)}
+
+ defp truncate_desc(data), do: data
+
@spec validate_page_url(URI.t() | binary()) :: :ok | :error
defp validate_page_url(page_url) when is_binary(page_url) do
validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld])
diff --git a/test/fixtures/rich_media/instagram_longtext.html b/test/fixtures/rich_media/instagram_longtext.html
new file mode 100644
index 000000000..e833f408c
--- /dev/null
+++ b/test/fixtures/rich_media/instagram_longtext.html
@@ -0,0 +1,90 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+CAPTURE THE ATLAS | ✨ A Once-in-a-Lifetime Shot: Total Lunar Eclipse + Aurora Substorm! 🔴💚
+
+Last Thursday night, under the freezing skies of Northern Alaska, I... | Instagram
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs
index 20f61badc..1f01d657a 100644
--- a/test/pleroma/web/rich_media/parser_test.exs
+++ b/test/pleroma/web/rich_media/parser_test.exs
@@ -61,6 +61,13 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
}}
end
+ test "truncates title and description fields" do
+ {:ok, parsed} = Parser.parse("https://instagram.com/longtext")
+
+ assert String.length(parsed["title"]) == 120
+ assert String.length(parsed["description"]) == 200
+ end
+
test "parses OEmbed and filters HTML tags" do
assert Parser.parse("https://example.com/oembed") ==
{:ok,
diff --git a/test/support/http_request_mock.ex b/test/support/http_request_mock.ex
index 1c472fca9..a8f954af9 100644
--- a/test/support/http_request_mock.ex
+++ b/test/support/http_request_mock.ex
@@ -1494,6 +1494,11 @@ defmodule HttpRequestMock do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}}
end
+ def get("https://instagram.com/longtext", _, _, _) do
+ {:ok,
+ %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/instagram_longtext.html")}}
+ end
+
def get("https://example.com/non-ogp", _, _, _) do
{:ok,
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}}
@@ -1720,7 +1725,8 @@ defmodule HttpRequestMock do
"https://example.com/twitter-card",
"https://google.com/",
"https://pleroma.local/notice/9kCP7V",
- "https://yahoo.com/"
+ "https://yahoo.com/",
+ "https://instagram.com/longtext"
]
def head(url, _query, _body, _headers) when url in @rich_media_mocks do