diff --git a/changelog.d/hashtag-search.change b/changelog.d/hashtag-search.change new file mode 100644 index 000000000..f17e711ce --- /dev/null +++ b/changelog.d/hashtag-search.change @@ -0,0 +1 @@ +Hashtag searches return real results based on words in your query diff --git a/lib/pleroma/hashtag.ex b/lib/pleroma/hashtag.ex index 3682f0c14..91c30c6e7 100644 --- a/lib/pleroma/hashtag.ex +++ b/lib/pleroma/hashtag.ex @@ -130,4 +130,66 @@ defmodule Pleroma.Hashtag do end def get_recipients_for_activity(_activity), do: [] + + def search(query, options \\ []) do + limit = Keyword.get(options, :limit, 20) + offset = Keyword.get(options, :offset, 0) + + search_terms = + query + |> String.downcase() + |> String.trim() + |> String.split(~r/\s+/) + |> Enum.filter(&(&1 != "")) + |> Enum.map(&String.trim_leading(&1, "#")) + |> Enum.filter(&(&1 != "")) + + if Enum.empty?(search_terms) do + [] + else + # Use PostgreSQL's ANY operator with array for efficient multi-term search + # This is much more efficient than multiple OR clauses + search_patterns = Enum.map(search_terms, &"%#{&1}%") + + # Create ranking query that prioritizes exact matches and closer matches + # Use a subquery to properly handle computed columns in ORDER BY + base_query = + from(ht in Hashtag, + where: fragment("LOWER(?) LIKE ANY(?)", ht.name, ^search_patterns), + select: %{ + name: ht.name, + # Ranking: exact matches get highest priority (0) + # then prefix matches (1), then contains (2) + match_rank: + fragment( + """ + CASE + WHEN LOWER(?) = ANY(?) THEN 0 + WHEN LOWER(?) LIKE ANY(?) THEN 1 + ELSE 2 + END + """, + ht.name, + ^search_terms, + ht.name, + ^Enum.map(search_terms, &"#{&1}%") + ), + # Secondary sort by name length (shorter names first) + name_length: fragment("LENGTH(?)", ht.name) + } + ) + + from(result in subquery(base_query), + order_by: [ + asc: result.match_rank, + asc: result.name_length, + asc: result.name + ], + limit: ^limit, + offset: ^offset + ) + |> Repo.all() + |> Enum.map(& &1.name) + end + end end diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index d9a1ba41e..53f1216fd 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -5,6 +5,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do use Pleroma.Web, :controller + alias Pleroma.Hashtag alias Pleroma.Repo alias Pleroma.User alias Pleroma.Web.ControllerHelper @@ -120,69 +121,14 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do defp resource_search(:v2, "hashtags", query, options) do tags_path = Endpoint.url() <> "/tag/" - query - |> prepare_tags(options) + Hashtag.search(query, options) |> Enum.map(fn tag -> %{name: tag, url: tags_path <> tag} end) end defp resource_search(:v1, "hashtags", query, options) do - prepare_tags(query, options) - end - - defp prepare_tags(query, options) do - tags = - query - |> preprocess_uri_query() - |> String.split(~r/[^#\w]+/u, trim: true) - |> Enum.uniq_by(&String.downcase/1) - - explicit_tags = Enum.filter(tags, fn tag -> String.starts_with?(tag, "#") end) - - tags = - if Enum.any?(explicit_tags) do - explicit_tags - else - tags - end - - tags = Enum.map(tags, fn tag -> String.trim_leading(tag, "#") end) - - tags = - if Enum.empty?(explicit_tags) && !options[:skip_joined_tag] do - add_joined_tag(tags) - else - tags - end - - Pleroma.Pagination.paginate_list(tags, options) - end - - defp add_joined_tag(tags) do - tags - |> Kernel.++([joined_tag(tags)]) - |> Enum.uniq_by(&String.downcase/1) - end - - # If `query` is a URI, returns last component of its path, otherwise returns `query` - defp preprocess_uri_query(query) do - if query =~ ~r/https?:\/\// do - query - |> String.trim_trailing("/") - |> URI.parse() - |> Map.get(:path) - |> String.split("/") - |> Enum.at(-1) - else - query - end - end - - defp joined_tag(tags) do - tags - |> Enum.map(fn tag -> String.capitalize(tag) end) - |> Enum.join() + Hashtag.search(query, options) end defp with_fallback(f, fallback \\ []) do diff --git a/test/pleroma/hashtag_test.exs b/test/pleroma/hashtag_test.exs index 8531b1879..0e16b8155 100644 --- a/test/pleroma/hashtag_test.exs +++ b/test/pleroma/hashtag_test.exs @@ -14,4 +14,133 @@ defmodule Pleroma.HashtagTest do assert {:name, {"can't be blank", [validation: :required]}} in changeset.errors end end + + describe "search_hashtags" do + test "searches hashtags by partial match" do + {:ok, _} = Hashtag.get_or_create_by_name("car") + {:ok, _} = Hashtag.get_or_create_by_name("racecar") + {:ok, _} = Hashtag.get_or_create_by_name("nascar") + {:ok, _} = Hashtag.get_or_create_by_name("bicycle") + + results = Hashtag.search("car") + assert "car" in results + assert "racecar" in results + assert "nascar" in results + refute "bicycle" in results + + results = Hashtag.search("race") + assert "racecar" in results + refute "car" in results + refute "nascar" in results + refute "bicycle" in results + + results = Hashtag.search("nonexistent") + assert results == [] + end + + test "searches hashtags by multiple words in query" do + {:ok, _} = Hashtag.get_or_create_by_name("computer") + {:ok, _} = Hashtag.get_or_create_by_name("laptop") + {:ok, _} = Hashtag.get_or_create_by_name("desktop") + {:ok, _} = Hashtag.get_or_create_by_name("phone") + + # Search for "new computer" - should return "computer" + results = Hashtag.search("new computer") + assert "computer" in results + refute "laptop" in results + refute "desktop" in results + refute "phone" in results + + # Search for "computer laptop" - should return both + results = Hashtag.search("computer laptop") + assert "computer" in results + assert "laptop" in results + refute "desktop" in results + refute "phone" in results + + # Search for "new phone" - should return "phone" + results = Hashtag.search("new phone") + assert "phone" in results + refute "computer" in results + refute "laptop" in results + refute "desktop" in results + end + + test "supports pagination" do + {:ok, _} = Hashtag.get_or_create_by_name("alpha") + {:ok, _} = Hashtag.get_or_create_by_name("beta") + {:ok, _} = Hashtag.get_or_create_by_name("gamma") + {:ok, _} = Hashtag.get_or_create_by_name("delta") + + results = Hashtag.search("a", limit: 2) + assert length(results) == 2 + + results = Hashtag.search("a", limit: 2, offset: 1) + assert length(results) == 2 + end + + test "handles matching many search terms" do + {:ok, _} = Hashtag.get_or_create_by_name("computer") + {:ok, _} = Hashtag.get_or_create_by_name("laptop") + {:ok, _} = Hashtag.get_or_create_by_name("phone") + {:ok, _} = Hashtag.get_or_create_by_name("tablet") + + results = Hashtag.search("new fast computer laptop phone tablet device") + assert "computer" in results + assert "laptop" in results + assert "phone" in results + assert "tablet" in results + end + + test "ranks results by match quality" do + {:ok, _} = Hashtag.get_or_create_by_name("my_computer") + {:ok, _} = Hashtag.get_or_create_by_name("computer_science") + {:ok, _} = Hashtag.get_or_create_by_name("computer") + + results = Hashtag.search("computer") + + # Exact match first + assert Enum.at(results, 0) == "computer" + + # Prefix match would be next + assert Enum.at(results, 1) == "computer_science" + + # worst match is last + assert Enum.at(results, 2) == "my_computer" + end + + test "prioritizes shorter names when ranking is equal" do + # Create hashtags with same ranking but different lengths + {:ok, _} = Hashtag.get_or_create_by_name("car") + {:ok, _} = Hashtag.get_or_create_by_name("racecar") + {:ok, _} = Hashtag.get_or_create_by_name("nascar") + + # Search for "car" - shorter names should come first + results = Hashtag.search("car") + # Shortest exact match first + assert Enum.at(results, 0) == "car" + assert "racecar" in results + assert "nascar" in results + end + + test "handles hashtag symbols in search query" do + {:ok, _} = Hashtag.get_or_create_by_name("computer") + {:ok, _} = Hashtag.get_or_create_by_name("laptop") + {:ok, _} = Hashtag.get_or_create_by_name("phone") + + results_with_hash = Hashtag.search("#computer #laptop") + results_without_hash = Hashtag.search("computer laptop") + + assert results_with_hash == results_without_hash + + results_mixed = Hashtag.search("#computer laptop #phone") + assert "computer" in results_mixed + assert "laptop" in results_mixed + assert "phone" in results_mixed + + results_only_hash = Hashtag.search("#computer") + results_no_hash = Hashtag.search("computer") + assert results_only_hash == results_no_hash + end + end end diff --git a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs index d8263dfad..f0c9c1901 100644 --- a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs @@ -7,7 +7,6 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do alias Pleroma.Object alias Pleroma.Web.CommonAPI - alias Pleroma.Web.Endpoint import Pleroma.Factory import ExUnit.CaptureLog import Tesla.Mock @@ -66,9 +65,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do [account | _] = results["accounts"] assert account["id"] == to_string(user_three.id) - assert results["hashtags"] == [ - %{"name" => "private", "url" => "#{Endpoint.url()}/tag/private"} - ] + assert results["hashtags"] == [] [status] = results["statuses"] assert status["id"] == to_string(activity.id) @@ -77,9 +74,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do get(conn, "/api/v2/search?q=天子") |> json_response_and_validate_schema(200) - assert results["hashtags"] == [ - %{"name" => "天子", "url" => "#{Endpoint.url()}/tag/天子"} - ] + assert results["hashtags"] == [] [status] = results["statuses"] assert status["id"] == to_string(activity.id) @@ -130,84 +125,97 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do assert [] = results["statuses"] end - test "constructs hashtags from search query", %{conn: conn} do + test "returns empty results when no hashtags match", %{conn: conn} do results = conn - |> get("/api/v2/search?#{URI.encode_query(%{q: "some text with #explicit #hashtags"})}") + |> get("/api/v2/search?#{URI.encode_query(%{q: "nonexistent"})}") |> json_response_and_validate_schema(200) - assert results["hashtags"] == [ - %{"name" => "explicit", "url" => "#{Endpoint.url()}/tag/explicit"}, - %{"name" => "hashtags", "url" => "#{Endpoint.url()}/tag/hashtags"} - ] + assert results["hashtags"] == [] + end + + test "searches hashtags by multiple words in query", %{conn: conn} do + user = insert(:user) + + {:ok, _activity1} = CommonAPI.post(user, %{status: "This is my new #computer"}) + {:ok, _activity2} = CommonAPI.post(user, %{status: "Check out this #laptop"}) + {:ok, _activity3} = CommonAPI.post(user, %{status: "My #desktop setup"}) + {:ok, _activity4} = CommonAPI.post(user, %{status: "New #phone arrived"}) results = conn - |> get("/api/v2/search?#{URI.encode_query(%{q: "john doe JOHN DOE"})}") + |> get("/api/v2/search?#{URI.encode_query(%{q: "new computer"})}") |> json_response_and_validate_schema(200) - assert results["hashtags"] == [ - %{"name" => "john", "url" => "#{Endpoint.url()}/tag/john"}, - %{"name" => "doe", "url" => "#{Endpoint.url()}/tag/doe"}, - %{"name" => "JohnDoe", "url" => "#{Endpoint.url()}/tag/JohnDoe"} - ] + hashtag_names = Enum.map(results["hashtags"], & &1["name"]) + assert "computer" in hashtag_names + refute "laptop" in hashtag_names + refute "desktop" in hashtag_names + refute "phone" in hashtag_names results = conn - |> get("/api/v2/search?#{URI.encode_query(%{q: "accident-prone"})}") + |> get("/api/v2/search?#{URI.encode_query(%{q: "computer laptop"})}") |> json_response_and_validate_schema(200) - assert results["hashtags"] == [ - %{"name" => "accident", "url" => "#{Endpoint.url()}/tag/accident"}, - %{"name" => "prone", "url" => "#{Endpoint.url()}/tag/prone"}, - %{"name" => "AccidentProne", "url" => "#{Endpoint.url()}/tag/AccidentProne"} - ] - - results = - conn - |> get("/api/v2/search?#{URI.encode_query(%{q: "https://shpposter.club/users/shpuld"})}") - |> json_response_and_validate_schema(200) - - assert results["hashtags"] == [ - %{"name" => "shpuld", "url" => "#{Endpoint.url()}/tag/shpuld"} - ] - - results = - conn - |> get( - "/api/v2/search?#{URI.encode_query(%{q: "https://www.washingtonpost.com/sports/2020/06/10/" <> "nascar-ban-display-confederate-flag-all-events-properties/"})}" - ) - |> json_response_and_validate_schema(200) - - assert results["hashtags"] == [ - %{"name" => "nascar", "url" => "#{Endpoint.url()}/tag/nascar"}, - %{"name" => "ban", "url" => "#{Endpoint.url()}/tag/ban"}, - %{"name" => "display", "url" => "#{Endpoint.url()}/tag/display"}, - %{"name" => "confederate", "url" => "#{Endpoint.url()}/tag/confederate"}, - %{"name" => "flag", "url" => "#{Endpoint.url()}/tag/flag"}, - %{"name" => "all", "url" => "#{Endpoint.url()}/tag/all"}, - %{"name" => "events", "url" => "#{Endpoint.url()}/tag/events"}, - %{"name" => "properties", "url" => "#{Endpoint.url()}/tag/properties"}, - %{ - "name" => "NascarBanDisplayConfederateFlagAllEventsProperties", - "url" => - "#{Endpoint.url()}/tag/NascarBanDisplayConfederateFlagAllEventsProperties" - } - ] + hashtag_names = Enum.map(results["hashtags"], & &1["name"]) + assert "computer" in hashtag_names + assert "laptop" in hashtag_names + refute "desktop" in hashtag_names + refute "phone" in hashtag_names end test "supports pagination of hashtags search results", %{conn: conn} do + user = insert(:user) + + {:ok, _activity1} = CommonAPI.post(user, %{status: "First #alpha hashtag"}) + {:ok, _activity2} = CommonAPI.post(user, %{status: "Second #beta hashtag"}) + {:ok, _activity3} = CommonAPI.post(user, %{status: "Third #gamma hashtag"}) + {:ok, _activity4} = CommonAPI.post(user, %{status: "Fourth #delta hashtag"}) + results = conn - |> get( - "/api/v2/search?#{URI.encode_query(%{q: "#some #text #with #hashtags", limit: 2, offset: 1})}" - ) + |> get("/api/v2/search?#{URI.encode_query(%{q: "a", limit: 2, offset: 1})}") |> json_response_and_validate_schema(200) - assert results["hashtags"] == [ - %{"name" => "text", "url" => "#{Endpoint.url()}/tag/text"}, - %{"name" => "with", "url" => "#{Endpoint.url()}/tag/with"} - ] + hashtag_names = Enum.map(results["hashtags"], & &1["name"]) + + # Should return 2 hashtags (alpha, beta, gamma, delta all contain 'a') + # With offset 1, we skip the first one, so we get 2 of the remaining 3 + assert length(hashtag_names) == 2 + assert Enum.all?(hashtag_names, &String.contains?(&1, "a")) + end + + test "searches real hashtags from database", %{conn: conn} do + user = insert(:user) + + {:ok, _activity1} = CommonAPI.post(user, %{status: "Check out this #car"}) + {:ok, _activity2} = CommonAPI.post(user, %{status: "Fast #racecar on the track"}) + {:ok, _activity3} = CommonAPI.post(user, %{status: "NASCAR #nascar racing"}) + + results = + conn + |> get("/api/v2/search?#{URI.encode_query(%{q: "car"})}") + |> json_response_and_validate_schema(200) + + hashtag_names = Enum.map(results["hashtags"], & &1["name"]) + + # Should return car, racecar, and nascar since they all contain "car" + assert "car" in hashtag_names + assert "racecar" in hashtag_names + assert "nascar" in hashtag_names + + # Search for "race" - should return racecar + results = + conn + |> get("/api/v2/search?#{URI.encode_query(%{q: "race"})}") + |> json_response_and_validate_schema(200) + + hashtag_names = Enum.map(results["hashtags"], & &1["name"]) + + assert "racecar" in hashtag_names + refute "car" in hashtag_names + refute "nascar" in hashtag_names end test "excludes a blocked users from search results", %{conn: conn} do @@ -314,7 +322,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do [account | _] = results["accounts"] assert account["id"] == to_string(user_three.id) - assert results["hashtags"] == ["2hu"] + assert results["hashtags"] == [] [status] = results["statuses"] assert status["id"] == to_string(activity.id)