Use ranking to improve order of results

This commit is contained in:
Mark Felder 2025-07-31 18:02:33 -07:00
commit b1acc9281a
2 changed files with 65 additions and 7 deletions

View file

@ -149,9 +149,39 @@ defmodule Pleroma.Hashtag do
# This is much more efficient than multiple OR clauses
search_patterns = Enum.map(search_terms, &"%#{&1}%")
from(ht in Hashtag,
where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns),
order_by: [asc: ht.name],
# Create ranking query that prioritizes exact matches and closer matches
# Use a subquery to properly handle computed columns in ORDER BY
base_query =
from(ht in Hashtag,
where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns),
select: %{
name: ht.name,
# Ranking: exact matches get highest priority (0), then prefix matches (1), then contains (2)
match_rank:
fragment(
"""
CASE
WHEN LOWER(?) = ANY(?) THEN 0
WHEN LOWER(?) LIKE ANY(?) THEN 1
ELSE 2
END
""",
ht.name,
^search_terms,
ht.name,
^Enum.map(search_terms, &"#{&1}%")
),
# Secondary sort by name length (shorter names first)
name_length: fragment("LENGTH(?)", ht.name)
}
)
from(result in subquery(base_query),
order_by: [
asc: result.match_rank,
asc: result.name_length,
asc: result.name
],
limit: ^limit,
offset: ^offset
)

View file

@ -39,7 +39,6 @@ defmodule Pleroma.HashtagTest do
end
test "searches hashtags by multiple words in query" do
# Create some hashtags
{:ok, _} = Hashtag.get_or_create_by_name("computer")
{:ok, _} = Hashtag.get_or_create_by_name("laptop")
{:ok, _} = Hashtag.get_or_create_by_name("desktop")
@ -80,19 +79,48 @@ defmodule Pleroma.HashtagTest do
assert length(results) == 2
end
test "handles many search terms efficiently" do
# Create hashtags
test "handles matching many search terms" do
{:ok, _} = Hashtag.get_or_create_by_name("computer")
{:ok, _} = Hashtag.get_or_create_by_name("laptop")
{:ok, _} = Hashtag.get_or_create_by_name("phone")
{:ok, _} = Hashtag.get_or_create_by_name("tablet")
# Search with many terms - should be efficient with PostgreSQL ANY operator
results = Hashtag.search("new fast computer laptop phone tablet device")
assert "computer" in results
assert "laptop" in results
assert "phone" in results
assert "tablet" in results
end
test "ranks results by match quality" do
{:ok, _} = Hashtag.get_or_create_by_name("my_computer")
{:ok, _} = Hashtag.get_or_create_by_name("computer_science")
{:ok, _} = Hashtag.get_or_create_by_name("computer")
results = Hashtag.search("computer")
# Exact match first
assert Enum.at(results, 0) == "computer"
# Prefix match would be next
assert Enum.at(results, 1) == "computer_science"
# worst match is last
assert Enum.at(results, 2) == "my_computer"
end
test "prioritizes shorter names when ranking is equal" do
# Create hashtags with same ranking but different lengths
{:ok, _} = Hashtag.get_or_create_by_name("car")
{:ok, _} = Hashtag.get_or_create_by_name("racecar")
{:ok, _} = Hashtag.get_or_create_by_name("nascar")
# Search for "car" - shorter names should come first
results = Hashtag.search("car")
# Shortest exact match first
assert Enum.at(results, 0) == "car"
assert "racecar" in results
assert "nascar" in results
end
end
end