Use ranking to improve order of results

This commit is contained in:
Mark Felder 2025-07-31 18:02:33 -07:00
commit b1acc9281a
2 changed files with 65 additions and 7 deletions

View file

@ -149,9 +149,39 @@ defmodule Pleroma.Hashtag do
# This is much more efficient than multiple OR clauses # This is much more efficient than multiple OR clauses
search_patterns = Enum.map(search_terms, &"%#{&1}%") search_patterns = Enum.map(search_terms, &"%#{&1}%")
from(ht in Hashtag, # Create ranking query that prioritizes exact matches and closer matches
where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns), # Use a subquery to properly handle computed columns in ORDER BY
order_by: [asc: ht.name], base_query =
from(ht in Hashtag,
where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns),
select: %{
name: ht.name,
# Ranking: exact matches get highest priority (0), then prefix matches (1), then contains (2)
match_rank:
fragment(
"""
CASE
WHEN LOWER(?) = ANY(?) THEN 0
WHEN LOWER(?) LIKE ANY(?) THEN 1
ELSE 2
END
""",
ht.name,
^search_terms,
ht.name,
^Enum.map(search_terms, &"#{&1}%")
),
# Secondary sort by name length (shorter names first)
name_length: fragment("LENGTH(?)", ht.name)
}
)
from(result in subquery(base_query),
order_by: [
asc: result.match_rank,
asc: result.name_length,
asc: result.name
],
limit: ^limit, limit: ^limit,
offset: ^offset offset: ^offset
) )

View file

@ -39,7 +39,6 @@ defmodule Pleroma.HashtagTest do
end end
test "searches hashtags by multiple words in query" do test "searches hashtags by multiple words in query" do
# Create some hashtags
{:ok, _} = Hashtag.get_or_create_by_name("computer") {:ok, _} = Hashtag.get_or_create_by_name("computer")
{:ok, _} = Hashtag.get_or_create_by_name("laptop") {:ok, _} = Hashtag.get_or_create_by_name("laptop")
{:ok, _} = Hashtag.get_or_create_by_name("desktop") {:ok, _} = Hashtag.get_or_create_by_name("desktop")
@ -80,19 +79,48 @@ defmodule Pleroma.HashtagTest do
assert length(results) == 2 assert length(results) == 2
end end
test "handles many search terms efficiently" do test "handles matching many search terms" do
# Create hashtags
{:ok, _} = Hashtag.get_or_create_by_name("computer") {:ok, _} = Hashtag.get_or_create_by_name("computer")
{:ok, _} = Hashtag.get_or_create_by_name("laptop") {:ok, _} = Hashtag.get_or_create_by_name("laptop")
{:ok, _} = Hashtag.get_or_create_by_name("phone") {:ok, _} = Hashtag.get_or_create_by_name("phone")
{:ok, _} = Hashtag.get_or_create_by_name("tablet") {:ok, _} = Hashtag.get_or_create_by_name("tablet")
# Search with many terms - should be efficient with PostgreSQL ANY operator
results = Hashtag.search("new fast computer laptop phone tablet device") results = Hashtag.search("new fast computer laptop phone tablet device")
assert "computer" in results assert "computer" in results
assert "laptop" in results assert "laptop" in results
assert "phone" in results assert "phone" in results
assert "tablet" in results assert "tablet" in results
end end
test "ranks results by match quality" do
{:ok, _} = Hashtag.get_or_create_by_name("my_computer")
{:ok, _} = Hashtag.get_or_create_by_name("computer_science")
{:ok, _} = Hashtag.get_or_create_by_name("computer")
results = Hashtag.search("computer")
# Exact match first
assert Enum.at(results, 0) == "computer"
# Prefix match would be next
assert Enum.at(results, 1) == "computer_science"
# worst match is last
assert Enum.at(results, 2) == "my_computer"
end
test "prioritizes shorter names when ranking is equal" do
# Create hashtags with same ranking but different lengths
{:ok, _} = Hashtag.get_or_create_by_name("car")
{:ok, _} = Hashtag.get_or_create_by_name("racecar")
{:ok, _} = Hashtag.get_or_create_by_name("nascar")
# Search for "car" - shorter names should come first
results = Hashtag.search("car")
# Shortest exact match first
assert Enum.at(results, 0) == "car"
assert "racecar" in results
assert "nascar" in results
end
end end
end end