diff --git a/changelog.d/search-indexing.change b/changelog.d/search-indexing.change new file mode 100644 index 000000000..766934f3f --- /dev/null +++ b/changelog.d/search-indexing.change @@ -0,0 +1 @@ +Filter indexable activities before inserting indexing jobs into the queue. diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex index 30b3ba958..8477873f9 100644 --- a/lib/pleroma/search.ex +++ b/lib/pleroma/search.ex @@ -1,11 +1,28 @@ defmodule Pleroma.Search do + alias Pleroma.Activity + alias Pleroma.Object + alias Pleroma.Web.ActivityPub.Visibility alias Pleroma.Workers.SearchIndexingWorker - def add_to_index(%Pleroma.Activity{id: activity_id}) do - SearchIndexingWorker.new(%{"op" => "add_to_index", "activity" => activity_id}) - |> Oban.insert() + @spec add_to_index(Activity.t()) :: {:ok, Oban.Job.t() | :noop} | {:error, Oban.Job.changeset()} + def add_to_index(%Activity{id: activity_id, object: %Object{} = object} = activity) do + with {_, true} <- {:indexable, indexable?(activity)}, + {_, "public"} <- {:visibility, Visibility.get_visibility(object)} do + SearchIndexingWorker.new(%{"op" => "add_to_index", "activity" => activity_id}) + |> Oban.insert() + else + _ -> {:ok, :noop} + end end + def add_to_index(%Activity{id: activity_id}) do + case Activity.get_by_id_with_object(activity_id) do + %Activity{} = preloaded -> add_to_index(preloaded) + _ -> :ok + end + end + + @spec remove_from_index(Object.t()) :: {:ok, Oban.Job.t()} | {:error, Oban.Job.changeset()} def remove_from_index(%Pleroma.Object{id: object_id}) do SearchIndexingWorker.new(%{"op" => "remove_from_index", "object" => object_id}) |> Oban.insert() @@ -20,4 +37,7 @@ defmodule Pleroma.Search do search_module = Pleroma.Config.get([Pleroma.Search, :module]) search_module.healthcheck_endpoints() end + + defp indexable?(%Activity{data: %{"type" => "Create"}}), do: true + defp indexable?(_), do: false end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index cafae8099..4541ef14a 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -4,6 +4,7 @@ defmodule Pleroma.Search.Meilisearch do alias Pleroma.Activity alias Pleroma.Config.Getting, as: Config + alias Pleroma.Object import Pleroma.Search.DatabaseSearch import Ecto.Query @@ -156,28 +157,23 @@ defmodule Pleroma.Search.Meilisearch do end @impl true - def add_to_index(activity) do - maybe_search_data = object_to_search_data(activity.object) + def add_to_index(%Activity{object: %Object{} = object} = activity) do + search_data = object_to_search_data(object) - if activity.data["type"] == "Create" and maybe_search_data do - result = - meili_put( - "/indexes/objects/documents", - [maybe_search_data] - ) + result = + meili_put( + "/indexes/objects/documents", + [search_data] + ) - with {:ok, %{"status" => "enqueued"}} <- result do - # Added successfully - :ok - else - _ -> - # There was an error, report it - Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") - {:error, result} - end - else - # The post isn't something we can search, that's ok + with {:ok, %{"status" => "enqueued"}} <- result do + # Added successfully :ok + else + _ -> + # There was an error, report it + Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") + {:error, result} end end diff --git a/lib/pleroma/search/qdrant_search.ex b/lib/pleroma/search/qdrant_search.ex index 5142a273f..06f5b1983 100644 --- a/lib/pleroma/search/qdrant_search.ex +++ b/lib/pleroma/search/qdrant_search.ex @@ -4,6 +4,7 @@ defmodule Pleroma.Search.QdrantSearch do alias Pleroma.Activity alias Pleroma.Config.Getting, as: Config + alias Pleroma.Object alias __MODULE__.OpenAIClient alias __MODULE__.QdrantClient @@ -82,23 +83,18 @@ defmodule Pleroma.Search.QdrantSearch do end @impl true - def add_to_index(activity) do - # This will only index public or unlisted notes - maybe_search_data = object_to_search_data(activity.object) + def add_to_index(%Activity{object: %Object{} = object} = activity) do + search_data = object_to_search_data(object) - if activity.data["type"] == "Create" and maybe_search_data do - with {:ok, embedding} <- get_embedding(maybe_search_data.content), - {:ok, %{status: 200}} <- - QdrantClient.put( - "/collections/posts/points", - build_index_payload(activity, embedding) - ) do - :ok - else - e -> {:error, e} - end - else + with {:ok, embedding} <- get_embedding(search_data.content), + {:ok, %{status: 200}} <- + QdrantClient.put( + "/collections/posts/points", + build_index_payload(activity, embedding) + ) do :ok + else + e -> {:error, e} end end diff --git a/test/pleroma/search/meilisearch_test.exs b/test/pleroma/search/meilisearch_test.exs index eea454323..ff32491c5 100644 --- a/test/pleroma/search/meilisearch_test.exs +++ b/test/pleroma/search/meilisearch_test.exs @@ -74,29 +74,6 @@ defmodule Pleroma.Search.MeilisearchTest do assert_received("posted_to_meilisearch") end - test "doesn't index posts that are not public" do - user = insert(:user) - - Enum.each(["private", "direct"], fn visibility -> - {:ok, activity} = - CommonAPI.post(user, %{ - status: "guys i just don't wanna leave the swamp", - visibility: visibility - }) - - args = %{"op" => "add_to_index", "activity" => activity.id} - - Config - |> expect(:get, fn - [Pleroma.Search, :module], nil -> - Meilisearch - end) - - assert_enqueued(worker: SearchIndexingWorker, args: args) - assert :ok = perform_job(SearchIndexingWorker, args) - end) - end - test "deletes posts from index when deleted locally" do user = insert(:user) diff --git a/test/pleroma/search_test.exs b/test/pleroma/search_test.exs new file mode 100644 index 000000000..d777bcda2 --- /dev/null +++ b/test/pleroma/search_test.exs @@ -0,0 +1,69 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.SearchTest do + use Pleroma.DataCase, async: true + use Oban.Testing, repo: Pleroma.Repo + + import Pleroma.Factory + + alias Pleroma.Web.CommonAPI + alias Pleroma.Workers.SearchIndexingWorker + + test "indexes posts that are public" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{ + status: "Well this is a story all about how my life got flipped turned upside down", + visibility: "public" + }) + + args = %{"op" => "add_to_index", "activity" => activity.id} + + assert_enqueued(worker: SearchIndexingWorker, args: args) + end + + test "doesn't index posts that are not public" do + user = insert(:user) + + Enum.each(["private", "direct"], fn visibility -> + {:ok, activity} = + CommonAPI.post(user, %{ + status: "guys i just don't wanna leave the swamp", + visibility: visibility + }) + + args = %{"op" => "add_to_index", "activity" => activity.id} + + refute_enqueued(worker: SearchIndexingWorker, args: args) + end) + end + + test "Indexes appropriate activity types" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{ + status: "I'm my own hype man", + visibility: "public" + }) + + args = %{"op" => "add_to_index", "activity" => activity.id} + + assert_enqueued(worker: SearchIndexingWorker, args: args) + + {:ok, fav_activity} = CommonAPI.favorite(activity.id, user) + + args = %{"op" => "add_to_index", "activity" => fav_activity.id} + + refute_enqueued(worker: SearchIndexingWorker, args: args) + + {:ok, repeat_activity} = CommonAPI.repeat(activity.id, user) + + args = %{"op" => "add_to_index", "activity" => repeat_activity.id} + + refute_enqueued(worker: SearchIndexingWorker, args: args) + end +end