From f06a0eab50b9ed3e6d4ed4db714d5632225ff9f0 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 25 Mar 2026 14:47:39 -0700 Subject: [PATCH] Move object_to_search_data/1 to Pleroma.Search This standardizes this functionality within the Search module so it doesn't need to be imported by other search backends from Meilisearch Also integrate its filtering rules into Search.indexable?/1 for consistency --- changelog.d/search-indexing.skip | 0 lib/mix/tasks/pleroma/search/meilisearch.ex | 2 +- lib/pleroma/search.ex | 39 +++++++++++++++++++- lib/pleroma/search/meilisearch.ex | 40 ++------------------- lib/pleroma/search/qdrant_search.ex | 4 +-- 5 files changed, 43 insertions(+), 42 deletions(-) create mode 100644 changelog.d/search-indexing.skip diff --git a/changelog.d/search-indexing.skip b/changelog.d/search-indexing.skip new file mode 100644 index 000000000..e69de29bb diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index edce9e871..facc38815 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -72,7 +72,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do query, timeout: :infinity ) - |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1) + |> Stream.map(&Pleroma.Search.object_to_search_data/1) |> Stream.filter(fn o -> not is_nil(o) end) |> Stream.chunk_every(chunk_size) |> Stream.transform(0, fn objects, acc -> diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex index f78495c8d..9cd2768c4 100644 --- a/lib/pleroma/search.ex +++ b/lib/pleroma/search.ex @@ -38,6 +38,43 @@ defmodule Pleroma.Search do search_module.healthcheck_endpoints() end - defp indexable?(%Activity{data: %{"type" => "Create"}}), do: true + def object_to_search_data(%Object{} = object) do + data = object.data + + content_str = + case data["content"] do + [nil | rest] -> to_string(rest) + str -> str + end + + content = + with {:ok, scrubbed} <- + FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing), + trimmed <- String.trim(scrubbed) do + trimmed + end + + # Make sure we have a non-empty string + if content != "" do + {:ok, published, _} = DateTime.from_iso8601(data["published"]) + + %{ + id: object.id, + content: content, + ap: data["id"], + published: published |> DateTime.to_unix() + } + end + end + + defp indexable?(%Activity{ + data: %{"type" => "Create"}, + object: %Object{ + data: %{"content" => content, "published" => published, "type" => "Note"} + } + }) + when not is_nil(content) and content not in ["", "."] and not is_nil(published), + do: true + defp indexable?(_), do: false end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 4541ef14a..dc10076e1 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -5,6 +5,7 @@ defmodule Pleroma.Search.Meilisearch do alias Pleroma.Activity alias Pleroma.Config.Getting, as: Config alias Pleroma.Object + alias Pleroma.Search import Pleroma.Search.DatabaseSearch import Ecto.Query @@ -119,46 +120,9 @@ defmodule Pleroma.Search.Meilisearch do end end - def object_to_search_data(object) do - # Only index public or unlisted Notes - if not is_nil(object) and object.data["type"] == "Note" and - not is_nil(object.data["content"]) and - not is_nil(object.data["published"]) and - (Pleroma.Constants.as_public() in object.data["to"] or - Pleroma.Constants.as_public() in object.data["cc"]) and - object.data["content"] not in ["", "."] do - data = object.data - - content_str = - case data["content"] do - [nil | rest] -> to_string(rest) - str -> str - end - - content = - with {:ok, scrubbed} <- - FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing), - trimmed <- String.trim(scrubbed) do - trimmed - end - - # Make sure we have a non-empty string - if content != "" do - {:ok, published, _} = DateTime.from_iso8601(data["published"]) - - %{ - id: object.id, - content: content, - ap: data["id"], - published: published |> DateTime.to_unix() - } - end - end - end - @impl true def add_to_index(%Activity{object: %Object{} = object} = activity) do - search_data = object_to_search_data(object) + search_data = Search.object_to_search_data(object) result = meili_put( diff --git a/lib/pleroma/search/qdrant_search.ex b/lib/pleroma/search/qdrant_search.ex index 06f5b1983..4d57cfa88 100644 --- a/lib/pleroma/search/qdrant_search.ex +++ b/lib/pleroma/search/qdrant_search.ex @@ -5,11 +5,11 @@ defmodule Pleroma.Search.QdrantSearch do alias Pleroma.Activity alias Pleroma.Config.Getting, as: Config alias Pleroma.Object + alias Pleroma.Search alias __MODULE__.OpenAIClient alias __MODULE__.QdrantClient - import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1] import Pleroma.Search.DatabaseSearch, only: [maybe_fetch: 3] @impl true @@ -84,7 +84,7 @@ defmodule Pleroma.Search.QdrantSearch do @impl true def add_to_index(%Activity{object: %Object{} = object} = activity) do - search_data = object_to_search_data(object) + search_data = Search.object_to_search_data(object) with {:ok, embedding} <- get_embedding(search_data.content), {:ok, %{status: 200}} <-