Fix Rich Media Previews for updated activities

The Rich Media Previews were not regenerated when a post was updated due to a cache invalidation issue. They are now cached by the activity id so they can be evicted with the other activity cache objects in the :scrubber_cache.
This commit is contained in:
Mark Felder 2024-02-04 19:24:52 -05:00
commit 04fc4eddaa
8 changed files with 96 additions and 23 deletions

12
test/fixtures/rich_media/google.html vendored Normal file
View file

@ -0,0 +1,12 @@
<meta property="og:url" content="https://google.com">
<meta property="og:type" content="website">
<meta property="og:title" content="Google">
<meta property="og:description" content="Search the world's information, including webpages, images, videos and more. Google has many special features to help you find exactly what you're looking for.">
<meta property="og:image" content="">
<meta name="twitter:card" content="summary_large_image">
<meta property="twitter:domain" content="google.com">
<meta property="twitter:url" content="https://google.com">
<meta name="twitter:title" content="Google">
<meta name="twitter:description" content="Search the world's information, including webpages, images, videos and more. Google has many special features to help you find exactly what you're looking for.">
<meta name="twitter:image" content="">

12
test/fixtures/rich_media/yahoo.html vendored Normal file
View file

@ -0,0 +1,12 @@
<meta property="og:url" content="https://yahoo.com">
<meta property="og:type" content="website">
<meta property="og:title" content="Yahoo | Mail, Weather, Search, Politics, News, Finance, Sports & Videos">
<meta property="og:description" content="Latest news coverage, email, free stock quotes, live scores and video are just the beginning. Discover more every day at Yahoo!">
<meta property="og:image" content="https://s.yimg.com/cv/apiv2/social/images/yahoo_default_logo.png">
<meta name="twitter:card" content="summary_large_image">
<meta property="twitter:domain" content="yahoo.com">
<meta property="twitter:url" content="https://yahoo.com">
<meta name="twitter:title" content="Yahoo | Mail, Weather, Search, Politics, News, Finance, Sports & Videos">
<meta name="twitter:description" content="Latest news coverage, email, free stock quotes, live scores and video are just the beginning. Discover more every day at Yahoo!">
<meta name="twitter:image" content="https://s.yimg.com/cv/apiv2/social/images/yahoo_default_logo.png">

View file

@ -83,6 +83,34 @@ defmodule Pleroma.Web.RichMedia.HelpersTest do
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
end
test "recrawls URLs on updates" do
original_url = "https://google.com/"
updated_url = "https://yahoo.com/"
Pleroma.StaticStubbedConfigMock
|> stub(:get, fn
[:rich_media, :enabled] -> true
path -> Pleroma.Test.StaticConfig.get(path)
end)
user = insert(:user)
{:ok, activity} = CommonAPI.post(user, %{status: "I like this site #{original_url}"})
assert match?(
%{page_url: ^original_url, rich_media: _},
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
)
{:ok, _} = CommonAPI.update(user, activity, %{status: "I like this site #{updated_url}"})
activity = Pleroma.Activity.get_by_id(activity.id)
assert match?(
%{page_url: ^updated_url, rich_media: _},
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
)
end
# This does not seem to work. The urls are being fetched.
@tag skip: true
test "refuses to crawl URLs of private network from posts" do

View file

@ -1464,6 +1464,14 @@ defmodule HttpRequestMock do
}}
end
def get("https://google.com/", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/google.html")}}
end
def get("https://yahoo.com/", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/yahoo.html")}}
end
def get(url, query, body, headers) do
{:error,
"Mock response not implemented for GET #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"}
@ -1539,7 +1547,9 @@ defmodule HttpRequestMock do
@rich_media_mocks [
"https://example.com/ogp",
"https://example.com/ogp-missing-data",
"https://example.com/twitter-card"
"https://example.com/twitter-card",
"https://google.com/",
"https://yahoo.com/"
]
def head(url, _query, _body, _headers) when url in @rich_media_mocks do
{:ok, %Tesla.Env{status: 404, body: ""}}