Merge pull request 'Additional Search Indexing cleanup' (#7864) from search-indexing into develop
Reviewed-on: https://git.pleroma.social/pleroma/pleroma/pulls/7864
This commit is contained in:
commit
9af26e5fb5
5 changed files with 44 additions and 43 deletions
0
changelog.d/search-indexing.skip
Normal file
0
changelog.d/search-indexing.skip
Normal file
|
|
@ -72,7 +72,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
|
|||
query,
|
||||
timeout: :infinity
|
||||
)
|
||||
|> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
|
||||
|> Stream.map(&Pleroma.Search.object_to_search_data/1)
|
||||
|> Stream.filter(fn o -> not is_nil(o) end)
|
||||
|> Stream.chunk_every(chunk_size)
|
||||
|> Stream.transform(0, fn objects, acc ->
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ defmodule Pleroma.Search do
|
|||
def add_to_index(%Activity{id: activity_id}) do
|
||||
case Activity.get_by_id_with_object(activity_id) do
|
||||
%Activity{} = preloaded -> add_to_index(preloaded)
|
||||
_ -> :ok
|
||||
_ -> {:ok, :noop}
|
||||
end
|
||||
end
|
||||
|
||||
|
|
@ -38,6 +38,43 @@ defmodule Pleroma.Search do
|
|||
search_module.healthcheck_endpoints()
|
||||
end
|
||||
|
||||
defp indexable?(%Activity{data: %{"type" => "Create"}}), do: true
|
||||
def object_to_search_data(%Object{} = object) do
|
||||
data = object.data
|
||||
|
||||
content_str =
|
||||
case data["content"] do
|
||||
[nil | rest] -> to_string(rest)
|
||||
str -> str
|
||||
end
|
||||
|
||||
content =
|
||||
with {:ok, scrubbed} <-
|
||||
FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
|
||||
trimmed <- String.trim(scrubbed) do
|
||||
trimmed
|
||||
end
|
||||
|
||||
# Make sure we have a non-empty string
|
||||
if content != "" do
|
||||
{:ok, published, _} = DateTime.from_iso8601(data["published"])
|
||||
|
||||
%{
|
||||
id: object.id,
|
||||
content: content,
|
||||
ap: data["id"],
|
||||
published: published |> DateTime.to_unix()
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
defp indexable?(%Activity{
|
||||
data: %{"type" => "Create"},
|
||||
object: %Object{
|
||||
data: %{"content" => content, "published" => published, "type" => "Note"}
|
||||
}
|
||||
})
|
||||
when not is_nil(content) and content not in ["", "."] and not is_nil(published),
|
||||
do: true
|
||||
|
||||
defp indexable?(_), do: false
|
||||
end
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ defmodule Pleroma.Search.Meilisearch do
|
|||
alias Pleroma.Activity
|
||||
alias Pleroma.Config.Getting, as: Config
|
||||
alias Pleroma.Object
|
||||
alias Pleroma.Search
|
||||
|
||||
import Pleroma.Search.DatabaseSearch
|
||||
import Ecto.Query
|
||||
|
|
@ -119,46 +120,9 @@ defmodule Pleroma.Search.Meilisearch do
|
|||
end
|
||||
end
|
||||
|
||||
def object_to_search_data(object) do
|
||||
# Only index public or unlisted Notes
|
||||
if not is_nil(object) and object.data["type"] == "Note" and
|
||||
not is_nil(object.data["content"]) and
|
||||
not is_nil(object.data["published"]) and
|
||||
(Pleroma.Constants.as_public() in object.data["to"] or
|
||||
Pleroma.Constants.as_public() in object.data["cc"]) and
|
||||
object.data["content"] not in ["", "."] do
|
||||
data = object.data
|
||||
|
||||
content_str =
|
||||
case data["content"] do
|
||||
[nil | rest] -> to_string(rest)
|
||||
str -> str
|
||||
end
|
||||
|
||||
content =
|
||||
with {:ok, scrubbed} <-
|
||||
FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
|
||||
trimmed <- String.trim(scrubbed) do
|
||||
trimmed
|
||||
end
|
||||
|
||||
# Make sure we have a non-empty string
|
||||
if content != "" do
|
||||
{:ok, published, _} = DateTime.from_iso8601(data["published"])
|
||||
|
||||
%{
|
||||
id: object.id,
|
||||
content: content,
|
||||
ap: data["id"],
|
||||
published: published |> DateTime.to_unix()
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@impl true
|
||||
def add_to_index(%Activity{object: %Object{} = object} = activity) do
|
||||
search_data = object_to_search_data(object)
|
||||
search_data = Search.object_to_search_data(object)
|
||||
|
||||
result =
|
||||
meili_put(
|
||||
|
|
|
|||
|
|
@ -5,11 +5,11 @@ defmodule Pleroma.Search.QdrantSearch do
|
|||
alias Pleroma.Activity
|
||||
alias Pleroma.Config.Getting, as: Config
|
||||
alias Pleroma.Object
|
||||
alias Pleroma.Search
|
||||
|
||||
alias __MODULE__.OpenAIClient
|
||||
alias __MODULE__.QdrantClient
|
||||
|
||||
import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1]
|
||||
import Pleroma.Search.DatabaseSearch, only: [maybe_fetch: 3]
|
||||
|
||||
@impl true
|
||||
|
|
@ -84,7 +84,7 @@ defmodule Pleroma.Search.QdrantSearch do
|
|||
|
||||
@impl true
|
||||
def add_to_index(%Activity{object: %Object{} = object} = activity) do
|
||||
search_data = object_to_search_data(object)
|
||||
search_data = Search.object_to_search_data(object)
|
||||
|
||||
with {:ok, embedding} <- get_embedding(search_data.content),
|
||||
{:ok, %{status: 200}} <-
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue