Merge branch 'hashtag-search' into 'develop'

Fix Hashtag search

See merge request pleroma/pleroma!4389
This commit is contained in:
feld 2025-08-02 18:49:57 +00:00
commit 62993871e4
5 changed files with 268 additions and 122 deletions

View file

@ -0,0 +1 @@
Hashtag searches return real results based on words in your query

View file

@ -130,4 +130,66 @@ defmodule Pleroma.Hashtag do
end end
def get_recipients_for_activity(_activity), do: [] def get_recipients_for_activity(_activity), do: []
def search(query, options \\ []) do
limit = Keyword.get(options, :limit, 20)
offset = Keyword.get(options, :offset, 0)
search_terms =
query
|> String.downcase()
|> String.trim()
|> String.split(~r/\s+/)
|> Enum.filter(&(&1 != ""))
|> Enum.map(&String.trim_leading(&1, "#"))
|> Enum.filter(&(&1 != ""))
if Enum.empty?(search_terms) do
[]
else
# Use PostgreSQL's ANY operator with array for efficient multi-term search
# This is much more efficient than multiple OR clauses
search_patterns = Enum.map(search_terms, &"%#{&1}%")
# Create ranking query that prioritizes exact matches and closer matches
# Use a subquery to properly handle computed columns in ORDER BY
base_query =
from(ht in Hashtag,
where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns),
select: %{
name: ht.name,
# Ranking: exact matches get highest priority (0)
# then prefix matches (1), then contains (2)
match_rank:
fragment(
"""
CASE
WHEN LOWER(?) = ANY(?) THEN 0
WHEN LOWER(?) LIKE ANY(?) THEN 1
ELSE 2
END
""",
ht.name,
^search_terms,
ht.name,
^Enum.map(search_terms, &"#{&1}%")
),
# Secondary sort by name length (shorter names first)
name_length: fragment("LENGTH(?)", ht.name)
}
)
from(result in subquery(base_query),
order_by: [
asc: result.match_rank,
asc: result.name_length,
asc: result.name
],
limit: ^limit,
offset: ^offset
)
|> Repo.all()
|> Enum.map(& &1.name)
end
end
end end

View file

@ -5,6 +5,7 @@
defmodule Pleroma.Web.MastodonAPI.SearchController do defmodule Pleroma.Web.MastodonAPI.SearchController do
use Pleroma.Web, :controller use Pleroma.Web, :controller
alias Pleroma.Hashtag
alias Pleroma.Repo alias Pleroma.Repo
alias Pleroma.User alias Pleroma.User
alias Pleroma.Web.ControllerHelper alias Pleroma.Web.ControllerHelper
@ -120,69 +121,14 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do
defp resource_search(:v2, "hashtags", query, options) do defp resource_search(:v2, "hashtags", query, options) do
tags_path = Endpoint.url() <> "/tag/" tags_path = Endpoint.url() <> "/tag/"
query Hashtag.search(query, options)
|> prepare_tags(options)
|> Enum.map(fn tag -> |> Enum.map(fn tag ->
%{name: tag, url: tags_path <> tag} %{name: tag, url: tags_path <> tag}
end) end)
end end
defp resource_search(:v1, "hashtags", query, options) do defp resource_search(:v1, "hashtags", query, options) do
prepare_tags(query, options) Hashtag.search(query, options)
end
defp prepare_tags(query, options) do
tags =
query
|> preprocess_uri_query()
|> String.split(~r/[^#\w]+/u, trim: true)
|> Enum.uniq_by(&String.downcase/1)
explicit_tags = Enum.filter(tags, fn tag -> String.starts_with?(tag, "#") end)
tags =
if Enum.any?(explicit_tags) do
explicit_tags
else
tags
end
tags = Enum.map(tags, fn tag -> String.trim_leading(tag, "#") end)
tags =
if Enum.empty?(explicit_tags) && !options[:skip_joined_tag] do
add_joined_tag(tags)
else
tags
end
Pleroma.Pagination.paginate_list(tags, options)
end
defp add_joined_tag(tags) do
tags
|> Kernel.++([joined_tag(tags)])
|> Enum.uniq_by(&String.downcase/1)
end
# If `query` is a URI, returns last component of its path, otherwise returns `query`
defp preprocess_uri_query(query) do
if query =~ ~r/https?:\/\// do
query
|> String.trim_trailing("/")
|> URI.parse()
|> Map.get(:path)
|> String.split("/")
|> Enum.at(-1)
else
query
end
end
defp joined_tag(tags) do
tags
|> Enum.map(fn tag -> String.capitalize(tag) end)
|> Enum.join()
end end
defp with_fallback(f, fallback \\ []) do defp with_fallback(f, fallback \\ []) do

View file

@ -14,4 +14,133 @@ defmodule Pleroma.HashtagTest do
assert {:name, {"can't be blank", [validation: :required]}} in changeset.errors assert {:name, {"can't be blank", [validation: :required]}} in changeset.errors
end end
end end
describe "search_hashtags" do
test "searches hashtags by partial match" do
{:ok, _} = Hashtag.get_or_create_by_name("car")
{:ok, _} = Hashtag.get_or_create_by_name("racecar")
{:ok, _} = Hashtag.get_or_create_by_name("nascar")
{:ok, _} = Hashtag.get_or_create_by_name("bicycle")
results = Hashtag.search("car")
assert "car" in results
assert "racecar" in results
assert "nascar" in results
refute "bicycle" in results
results = Hashtag.search("race")
assert "racecar" in results
refute "car" in results
refute "nascar" in results
refute "bicycle" in results
results = Hashtag.search("nonexistent")
assert results == []
end
test "searches hashtags by multiple words in query" do
{:ok, _} = Hashtag.get_or_create_by_name("computer")
{:ok, _} = Hashtag.get_or_create_by_name("laptop")
{:ok, _} = Hashtag.get_or_create_by_name("desktop")
{:ok, _} = Hashtag.get_or_create_by_name("phone")
# Search for "new computer" - should return "computer"
results = Hashtag.search("new computer")
assert "computer" in results
refute "laptop" in results
refute "desktop" in results
refute "phone" in results
# Search for "computer laptop" - should return both
results = Hashtag.search("computer laptop")
assert "computer" in results
assert "laptop" in results
refute "desktop" in results
refute "phone" in results
# Search for "new phone" - should return "phone"
results = Hashtag.search("new phone")
assert "phone" in results
refute "computer" in results
refute "laptop" in results
refute "desktop" in results
end
test "supports pagination" do
{:ok, _} = Hashtag.get_or_create_by_name("alpha")
{:ok, _} = Hashtag.get_or_create_by_name("beta")
{:ok, _} = Hashtag.get_or_create_by_name("gamma")
{:ok, _} = Hashtag.get_or_create_by_name("delta")
results = Hashtag.search("a", limit: 2)
assert length(results) == 2
results = Hashtag.search("a", limit: 2, offset: 1)
assert length(results) == 2
end
test "handles matching many search terms" do
{:ok, _} = Hashtag.get_or_create_by_name("computer")
{:ok, _} = Hashtag.get_or_create_by_name("laptop")
{:ok, _} = Hashtag.get_or_create_by_name("phone")
{:ok, _} = Hashtag.get_or_create_by_name("tablet")
results = Hashtag.search("new fast computer laptop phone tablet device")
assert "computer" in results
assert "laptop" in results
assert "phone" in results
assert "tablet" in results
end
test "ranks results by match quality" do
{:ok, _} = Hashtag.get_or_create_by_name("my_computer")
{:ok, _} = Hashtag.get_or_create_by_name("computer_science")
{:ok, _} = Hashtag.get_or_create_by_name("computer")
results = Hashtag.search("computer")
# Exact match first
assert Enum.at(results, 0) == "computer"
# Prefix match would be next
assert Enum.at(results, 1) == "computer_science"
# worst match is last
assert Enum.at(results, 2) == "my_computer"
end
test "prioritizes shorter names when ranking is equal" do
# Create hashtags with same ranking but different lengths
{:ok, _} = Hashtag.get_or_create_by_name("car")
{:ok, _} = Hashtag.get_or_create_by_name("racecar")
{:ok, _} = Hashtag.get_or_create_by_name("nascar")
# Search for "car" - shorter names should come first
results = Hashtag.search("car")
# Shortest exact match first
assert Enum.at(results, 0) == "car"
assert "racecar" in results
assert "nascar" in results
end
test "handles hashtag symbols in search query" do
{:ok, _} = Hashtag.get_or_create_by_name("computer")
{:ok, _} = Hashtag.get_or_create_by_name("laptop")
{:ok, _} = Hashtag.get_or_create_by_name("phone")
results_with_hash = Hashtag.search("#computer #laptop")
results_without_hash = Hashtag.search("computer laptop")
assert results_with_hash == results_without_hash
results_mixed = Hashtag.search("#computer laptop #phone")
assert "computer" in results_mixed
assert "laptop" in results_mixed
assert "phone" in results_mixed
results_only_hash = Hashtag.search("#computer")
results_no_hash = Hashtag.search("computer")
assert results_only_hash == results_no_hash
end
end
end end

View file

@ -7,7 +7,6 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.Web.CommonAPI alias Pleroma.Web.CommonAPI
alias Pleroma.Web.Endpoint
import Pleroma.Factory import Pleroma.Factory
import ExUnit.CaptureLog import ExUnit.CaptureLog
import Tesla.Mock import Tesla.Mock
@ -66,9 +65,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do
[account | _] = results["accounts"] [account | _] = results["accounts"]
assert account["id"] == to_string(user_three.id) assert account["id"] == to_string(user_three.id)
assert results["hashtags"] == [ assert results["hashtags"] == []
%{"name" => "private", "url" => "#{Endpoint.url()}/tag/private"}
]
[status] = results["statuses"] [status] = results["statuses"]
assert status["id"] == to_string(activity.id) assert status["id"] == to_string(activity.id)
@ -77,9 +74,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do
get(conn, "/api/v2/search?q=天子") get(conn, "/api/v2/search?q=天子")
|> json_response_and_validate_schema(200) |> json_response_and_validate_schema(200)
assert results["hashtags"] == [ assert results["hashtags"] == []
%{"name" => "天子", "url" => "#{Endpoint.url()}/tag/天子"}
]
[status] = results["statuses"] [status] = results["statuses"]
assert status["id"] == to_string(activity.id) assert status["id"] == to_string(activity.id)
@ -130,84 +125,97 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do
assert [] = results["statuses"] assert [] = results["statuses"]
end end
test "constructs hashtags from search query", %{conn: conn} do test "returns empty results when no hashtags match", %{conn: conn} do
results = results =
conn conn
|> get("/api/v2/search?#{URI.encode_query(%{q: "some text with #explicit #hashtags"})}") |> get("/api/v2/search?#{URI.encode_query(%{q: "nonexistent"})}")
|> json_response_and_validate_schema(200) |> json_response_and_validate_schema(200)
assert results["hashtags"] == [ assert results["hashtags"] == []
%{"name" => "explicit", "url" => "#{Endpoint.url()}/tag/explicit"}, end
%{"name" => "hashtags", "url" => "#{Endpoint.url()}/tag/hashtags"}
] test "searches hashtags by multiple words in query", %{conn: conn} do
user = insert(:user)
{:ok, _activity1} = CommonAPI.post(user, %{status: "This is my new #computer"})
{:ok, _activity2} = CommonAPI.post(user, %{status: "Check out this #laptop"})
{:ok, _activity3} = CommonAPI.post(user, %{status: "My #desktop setup"})
{:ok, _activity4} = CommonAPI.post(user, %{status: "New #phone arrived"})
results = results =
conn conn
|> get("/api/v2/search?#{URI.encode_query(%{q: "john doe JOHN DOE"})}") |> get("/api/v2/search?#{URI.encode_query(%{q: "new computer"})}")
|> json_response_and_validate_schema(200) |> json_response_and_validate_schema(200)
assert results["hashtags"] == [ hashtag_names = Enum.map(results["hashtags"], & &1["name"])
%{"name" => "john", "url" => "#{Endpoint.url()}/tag/john"}, assert "computer" in hashtag_names
%{"name" => "doe", "url" => "#{Endpoint.url()}/tag/doe"}, refute "laptop" in hashtag_names
%{"name" => "JohnDoe", "url" => "#{Endpoint.url()}/tag/JohnDoe"} refute "desktop" in hashtag_names
] refute "phone" in hashtag_names
results = results =
conn conn
|> get("/api/v2/search?#{URI.encode_query(%{q: "accident-prone"})}") |> get("/api/v2/search?#{URI.encode_query(%{q: "computer laptop"})}")
|> json_response_and_validate_schema(200) |> json_response_and_validate_schema(200)
assert results["hashtags"] == [ hashtag_names = Enum.map(results["hashtags"], & &1["name"])
%{"name" => "accident", "url" => "#{Endpoint.url()}/tag/accident"}, assert "computer" in hashtag_names
%{"name" => "prone", "url" => "#{Endpoint.url()}/tag/prone"}, assert "laptop" in hashtag_names
%{"name" => "AccidentProne", "url" => "#{Endpoint.url()}/tag/AccidentProne"} refute "desktop" in hashtag_names
] refute "phone" in hashtag_names
results =
conn
|> get("/api/v2/search?#{URI.encode_query(%{q: "https://shpposter.club/users/shpuld"})}")
|> json_response_and_validate_schema(200)
assert results["hashtags"] == [
%{"name" => "shpuld", "url" => "#{Endpoint.url()}/tag/shpuld"}
]
results =
conn
|> get(
"/api/v2/search?#{URI.encode_query(%{q: "https://www.washingtonpost.com/sports/2020/06/10/" <> "nascar-ban-display-confederate-flag-all-events-properties/"})}"
)
|> json_response_and_validate_schema(200)
assert results["hashtags"] == [
%{"name" => "nascar", "url" => "#{Endpoint.url()}/tag/nascar"},
%{"name" => "ban", "url" => "#{Endpoint.url()}/tag/ban"},
%{"name" => "display", "url" => "#{Endpoint.url()}/tag/display"},
%{"name" => "confederate", "url" => "#{Endpoint.url()}/tag/confederate"},
%{"name" => "flag", "url" => "#{Endpoint.url()}/tag/flag"},
%{"name" => "all", "url" => "#{Endpoint.url()}/tag/all"},
%{"name" => "events", "url" => "#{Endpoint.url()}/tag/events"},
%{"name" => "properties", "url" => "#{Endpoint.url()}/tag/properties"},
%{
"name" => "NascarBanDisplayConfederateFlagAllEventsProperties",
"url" =>
"#{Endpoint.url()}/tag/NascarBanDisplayConfederateFlagAllEventsProperties"
}
]
end end
test "supports pagination of hashtags search results", %{conn: conn} do test "supports pagination of hashtags search results", %{conn: conn} do
user = insert(:user)
{:ok, _activity1} = CommonAPI.post(user, %{status: "First #alpha hashtag"})
{:ok, _activity2} = CommonAPI.post(user, %{status: "Second #beta hashtag"})
{:ok, _activity3} = CommonAPI.post(user, %{status: "Third #gamma hashtag"})
{:ok, _activity4} = CommonAPI.post(user, %{status: "Fourth #delta hashtag"})
results = results =
conn conn
|> get( |> get("/api/v2/search?#{URI.encode_query(%{q: "a", limit: 2, offset: 1})}")
"/api/v2/search?#{URI.encode_query(%{q: "#some #text #with #hashtags", limit: 2, offset: 1})}"
)
|> json_response_and_validate_schema(200) |> json_response_and_validate_schema(200)
assert results["hashtags"] == [ hashtag_names = Enum.map(results["hashtags"], & &1["name"])
%{"name" => "text", "url" => "#{Endpoint.url()}/tag/text"},
%{"name" => "with", "url" => "#{Endpoint.url()}/tag/with"} # Should return 2 hashtags (alpha, beta, gamma, delta all contain 'a')
] # With offset 1, we skip the first one, so we get 2 of the remaining 3
assert length(hashtag_names) == 2
assert Enum.all?(hashtag_names, &String.contains?(&1, "a"))
end
test "searches real hashtags from database", %{conn: conn} do
user = insert(:user)
{:ok, _activity1} = CommonAPI.post(user, %{status: "Check out this #car"})
{:ok, _activity2} = CommonAPI.post(user, %{status: "Fast #racecar on the track"})
{:ok, _activity3} = CommonAPI.post(user, %{status: "NASCAR #nascar racing"})
results =
conn
|> get("/api/v2/search?#{URI.encode_query(%{q: "car"})}")
|> json_response_and_validate_schema(200)
hashtag_names = Enum.map(results["hashtags"], & &1["name"])
# Should return car, racecar, and nascar since they all contain "car"
assert "car" in hashtag_names
assert "racecar" in hashtag_names
assert "nascar" in hashtag_names
# Search for "race" - should return racecar
results =
conn
|> get("/api/v2/search?#{URI.encode_query(%{q: "race"})}")
|> json_response_and_validate_schema(200)
hashtag_names = Enum.map(results["hashtags"], & &1["name"])
assert "racecar" in hashtag_names
refute "car" in hashtag_names
refute "nascar" in hashtag_names
end end
test "excludes a blocked users from search results", %{conn: conn} do test "excludes a blocked users from search results", %{conn: conn} do
@ -314,7 +322,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do
[account | _] = results["accounts"] [account | _] = results["accounts"]
assert account["id"] == to_string(user_three.id) assert account["id"] == to_string(user_three.id)
assert results["hashtags"] == ["2hu"] assert results["hashtags"] == []
[status] = results["statuses"] [status] = results["statuses"]
assert status["id"] == to_string(activity.id) assert status["id"] == to_string(activity.id)