Move object_to_search_data/1 to Pleroma.Search

This standardizes this functionality within the Search module so
it doesn't need to be imported by other search backends from Meilisearch

Also integrate its filtering rules into Search.indexable?/1 for consistency
This commit is contained in:
Mark Felder 2026-03-25 14:47:39 -07:00
commit f06a0eab50
5 changed files with 43 additions and 42 deletions

View file

@ -72,7 +72,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
query,
timeout: :infinity
)
|> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
|> Stream.map(&Pleroma.Search.object_to_search_data/1)
|> Stream.filter(fn o -> not is_nil(o) end)
|> Stream.chunk_every(chunk_size)
|> Stream.transform(0, fn objects, acc ->

View file

@ -38,6 +38,43 @@ defmodule Pleroma.Search do
search_module.healthcheck_endpoints()
end
defp indexable?(%Activity{data: %{"type" => "Create"}}), do: true
def object_to_search_data(%Object{} = object) do
data = object.data
content_str =
case data["content"] do
[nil | rest] -> to_string(rest)
str -> str
end
content =
with {:ok, scrubbed} <-
FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
trimmed <- String.trim(scrubbed) do
trimmed
end
# Make sure we have a non-empty string
if content != "" do
{:ok, published, _} = DateTime.from_iso8601(data["published"])
%{
id: object.id,
content: content,
ap: data["id"],
published: published |> DateTime.to_unix()
}
end
end
defp indexable?(%Activity{
data: %{"type" => "Create"},
object: %Object{
data: %{"content" => content, "published" => published, "type" => "Note"}
}
})
when not is_nil(content) and content not in ["", "."] and not is_nil(published),
do: true
defp indexable?(_), do: false
end

View file

@ -5,6 +5,7 @@ defmodule Pleroma.Search.Meilisearch do
alias Pleroma.Activity
alias Pleroma.Config.Getting, as: Config
alias Pleroma.Object
alias Pleroma.Search
import Pleroma.Search.DatabaseSearch
import Ecto.Query
@ -119,46 +120,9 @@ defmodule Pleroma.Search.Meilisearch do
end
end
def object_to_search_data(object) do
# Only index public or unlisted Notes
if not is_nil(object) and object.data["type"] == "Note" and
not is_nil(object.data["content"]) and
not is_nil(object.data["published"]) and
(Pleroma.Constants.as_public() in object.data["to"] or
Pleroma.Constants.as_public() in object.data["cc"]) and
object.data["content"] not in ["", "."] do
data = object.data
content_str =
case data["content"] do
[nil | rest] -> to_string(rest)
str -> str
end
content =
with {:ok, scrubbed} <-
FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
trimmed <- String.trim(scrubbed) do
trimmed
end
# Make sure we have a non-empty string
if content != "" do
{:ok, published, _} = DateTime.from_iso8601(data["published"])
%{
id: object.id,
content: content,
ap: data["id"],
published: published |> DateTime.to_unix()
}
end
end
end
@impl true
def add_to_index(%Activity{object: %Object{} = object} = activity) do
search_data = object_to_search_data(object)
search_data = Search.object_to_search_data(object)
result =
meili_put(

View file

@ -5,11 +5,11 @@ defmodule Pleroma.Search.QdrantSearch do
alias Pleroma.Activity
alias Pleroma.Config.Getting, as: Config
alias Pleroma.Object
alias Pleroma.Search
alias __MODULE__.OpenAIClient
alias __MODULE__.QdrantClient
import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1]
import Pleroma.Search.DatabaseSearch, only: [maybe_fetch: 3]
@impl true
@ -84,7 +84,7 @@ defmodule Pleroma.Search.QdrantSearch do
@impl true
def add_to_index(%Activity{object: %Object{} = object} = activity) do
search_data = object_to_search_data(object)
search_data = Search.object_to_search_data(object)
with {:ok, embedding} <- get_embedding(search_data.content),
{:ok, %{status: 200}} <-