Use ranking to improve order of results
This commit is contained in:
parent
93c144e397
commit
b1acc9281a
2 changed files with 65 additions and 7 deletions
|
|
@ -149,9 +149,39 @@ defmodule Pleroma.Hashtag do
|
||||||
# This is much more efficient than multiple OR clauses
|
# This is much more efficient than multiple OR clauses
|
||||||
search_patterns = Enum.map(search_terms, &"%#{&1}%")
|
search_patterns = Enum.map(search_terms, &"%#{&1}%")
|
||||||
|
|
||||||
from(ht in Hashtag,
|
# Create ranking query that prioritizes exact matches and closer matches
|
||||||
where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns),
|
# Use a subquery to properly handle computed columns in ORDER BY
|
||||||
order_by: [asc: ht.name],
|
base_query =
|
||||||
|
from(ht in Hashtag,
|
||||||
|
where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns),
|
||||||
|
select: %{
|
||||||
|
name: ht.name,
|
||||||
|
# Ranking: exact matches get highest priority (0), then prefix matches (1), then contains (2)
|
||||||
|
match_rank:
|
||||||
|
fragment(
|
||||||
|
"""
|
||||||
|
CASE
|
||||||
|
WHEN LOWER(?) = ANY(?) THEN 0
|
||||||
|
WHEN LOWER(?) LIKE ANY(?) THEN 1
|
||||||
|
ELSE 2
|
||||||
|
END
|
||||||
|
""",
|
||||||
|
ht.name,
|
||||||
|
^search_terms,
|
||||||
|
ht.name,
|
||||||
|
^Enum.map(search_terms, &"#{&1}%")
|
||||||
|
),
|
||||||
|
# Secondary sort by name length (shorter names first)
|
||||||
|
name_length: fragment("LENGTH(?)", ht.name)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
from(result in subquery(base_query),
|
||||||
|
order_by: [
|
||||||
|
asc: result.match_rank,
|
||||||
|
asc: result.name_length,
|
||||||
|
asc: result.name
|
||||||
|
],
|
||||||
limit: ^limit,
|
limit: ^limit,
|
||||||
offset: ^offset
|
offset: ^offset
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,6 @@ defmodule Pleroma.HashtagTest do
|
||||||
end
|
end
|
||||||
|
|
||||||
test "searches hashtags by multiple words in query" do
|
test "searches hashtags by multiple words in query" do
|
||||||
# Create some hashtags
|
|
||||||
{:ok, _} = Hashtag.get_or_create_by_name("computer")
|
{:ok, _} = Hashtag.get_or_create_by_name("computer")
|
||||||
{:ok, _} = Hashtag.get_or_create_by_name("laptop")
|
{:ok, _} = Hashtag.get_or_create_by_name("laptop")
|
||||||
{:ok, _} = Hashtag.get_or_create_by_name("desktop")
|
{:ok, _} = Hashtag.get_or_create_by_name("desktop")
|
||||||
|
|
@ -80,19 +79,48 @@ defmodule Pleroma.HashtagTest do
|
||||||
assert length(results) == 2
|
assert length(results) == 2
|
||||||
end
|
end
|
||||||
|
|
||||||
test "handles many search terms efficiently" do
|
test "handles matching many search terms" do
|
||||||
# Create hashtags
|
|
||||||
{:ok, _} = Hashtag.get_or_create_by_name("computer")
|
{:ok, _} = Hashtag.get_or_create_by_name("computer")
|
||||||
{:ok, _} = Hashtag.get_or_create_by_name("laptop")
|
{:ok, _} = Hashtag.get_or_create_by_name("laptop")
|
||||||
{:ok, _} = Hashtag.get_or_create_by_name("phone")
|
{:ok, _} = Hashtag.get_or_create_by_name("phone")
|
||||||
{:ok, _} = Hashtag.get_or_create_by_name("tablet")
|
{:ok, _} = Hashtag.get_or_create_by_name("tablet")
|
||||||
|
|
||||||
# Search with many terms - should be efficient with PostgreSQL ANY operator
|
|
||||||
results = Hashtag.search("new fast computer laptop phone tablet device")
|
results = Hashtag.search("new fast computer laptop phone tablet device")
|
||||||
assert "computer" in results
|
assert "computer" in results
|
||||||
assert "laptop" in results
|
assert "laptop" in results
|
||||||
assert "phone" in results
|
assert "phone" in results
|
||||||
assert "tablet" in results
|
assert "tablet" in results
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "ranks results by match quality" do
|
||||||
|
{:ok, _} = Hashtag.get_or_create_by_name("my_computer")
|
||||||
|
{:ok, _} = Hashtag.get_or_create_by_name("computer_science")
|
||||||
|
{:ok, _} = Hashtag.get_or_create_by_name("computer")
|
||||||
|
|
||||||
|
results = Hashtag.search("computer")
|
||||||
|
|
||||||
|
# Exact match first
|
||||||
|
assert Enum.at(results, 0) == "computer"
|
||||||
|
|
||||||
|
# Prefix match would be next
|
||||||
|
assert Enum.at(results, 1) == "computer_science"
|
||||||
|
|
||||||
|
# worst match is last
|
||||||
|
assert Enum.at(results, 2) == "my_computer"
|
||||||
|
end
|
||||||
|
|
||||||
|
test "prioritizes shorter names when ranking is equal" do
|
||||||
|
# Create hashtags with same ranking but different lengths
|
||||||
|
{:ok, _} = Hashtag.get_or_create_by_name("car")
|
||||||
|
{:ok, _} = Hashtag.get_or_create_by_name("racecar")
|
||||||
|
{:ok, _} = Hashtag.get_or_create_by_name("nascar")
|
||||||
|
|
||||||
|
# Search for "car" - shorter names should come first
|
||||||
|
results = Hashtag.search("car")
|
||||||
|
# Shortest exact match first
|
||||||
|
assert Enum.at(results, 0) == "car"
|
||||||
|
assert "racecar" in results
|
||||||
|
assert "nascar" in results
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue