Refactor ReachabilityWorker to use a 5-phase reachability testing approach

It will check reachability for an instance deemed unreachable at the following intervals:

4 attempts, once a minute
4 attempts, once every 15 minutes
4 attempts, once every 60 minutes
4 attempts, once every 8 hours
4 attempts, once every 24 hours

This should be effective and respectful of the resources of instances on the fediverse.

We have the Oban Pruner plugin enabled to keep the Oban Jobs table from growing indefinitely. It prunes every 15 minutes, but this will interfere with our ability to enforce uniqueness on the ReachabilityWorker jobs for a time period longer than 15 minutes. The solution is to exclude the ReachabilityWorker from the pruning operation and instead schedule a custom job that will prune the table for us once a day. The ReachabilityPruner cron task will clean up the history of the ReachabilityWorker jobs older than 6 days.
This commit is contained in:
Mark Felder 2025-06-27 16:35:10 -07:00
commit 77dca7c3e5
5 changed files with 296 additions and 60 deletions

View file

@ -0,0 +1,26 @@
defmodule Pleroma.Workers.Cron.ReachabilityPruner do
use Oban.Worker, queue: :background, max_attempts: 1
import Ecto.Query
require Logger
@reachability_worker "Elixir.Pleroma.Workers.ReachabilityWorker"
@prune_days 6
@impl true
def perform(_job) do
cutoff = DateTime.utc_now() |> DateTime.add(-@prune_days * 24 * 60 * 60, :second)
{count, _} =
from(j in Oban.Job,
where: j.worker == @reachability_worker and j.inserted_at < ^cutoff
)
|> Pleroma.Repo.delete_all()
if count > 0 do
Logger.debug(fn -> "Pruned #{count} old ReachabilityWorker jobs." end)
end
:ok
end
end

View file

@ -5,17 +5,31 @@
defmodule Pleroma.Workers.ReachabilityWorker do
use Oban.Worker,
queue: :background,
max_attempts: 3,
unique: [period: :infinity, states: [:available, :scheduled]]
max_attempts: 1,
unique: [period: :infinity, states: [:available, :scheduled], keys: [:domain]]
alias Pleroma.HTTP
alias Pleroma.Instances
@impl true
def perform(%Oban.Job{args: %{"domain" => domain}}) do
def perform(%Oban.Job{args: %{"domain" => domain, "phase" => phase, "attempt" => attempt}}) do
case check_reachability(domain) do
:ok ->
Instances.set_reachable("https://#{domain}")
:ok
{:error, _} = error ->
handle_failed_attempt(domain, phase, attempt)
error
end
end
@impl true
def timeout(_job), do: :timer.seconds(5)
defp check_reachability(domain) do
case HTTP.get("https://#{domain}/") do
{:ok, %{status: status}} when status in 200..299 ->
Instances.set_reachable("https://#{domain}")
:ok
{:ok, %{status: _status}} ->
@ -26,6 +40,51 @@ defmodule Pleroma.Workers.ReachabilityWorker do
end
end
@impl true
def timeout(_job), do: :timer.seconds(5)
defp handle_failed_attempt(_domain, "final", _attempt), do: :ok
defp handle_failed_attempt(domain, phase, attempt) do
{interval_minutes, max_attempts, next_phase} = get_phase_config(phase)
if attempt >= max_attempts do
# Move to next phase
schedule_next_phase(domain, next_phase)
else
# Retry same phase with incremented attempt
schedule_retry(domain, phase, attempt + 1, interval_minutes)
end
end
defp get_phase_config("phase_1min"), do: {1, 4, "phase_15min"}
defp get_phase_config("phase_15min"), do: {15, 4, "phase_1hour"}
defp get_phase_config("phase_1hour"), do: {60, 4, "phase_8hour"}
defp get_phase_config("phase_8hour"), do: {480, 4, "phase_24hour"}
defp get_phase_config("phase_24hour"), do: {1440, 4, "final"}
defp get_phase_config("final"), do: {nil, 0, nil}
defp schedule_next_phase(_domain, "final"), do: :ok
defp schedule_next_phase(domain, next_phase) do
{interval_minutes, _max_attempts, _next_phase} = get_phase_config(next_phase)
scheduled_at = DateTime.add(DateTime.utc_now(), interval_minutes * 60, :second)
%{
"domain" => domain,
"phase" => next_phase,
"attempt" => 1
}
|> new(scheduled_at: scheduled_at, replace: true)
|> Oban.insert()
end
def schedule_retry(domain, phase, attempt, interval_minutes) do
scheduled_at = DateTime.add(DateTime.utc_now(), interval_minutes * 60, :second)
%{
"domain" => domain,
"phase" => phase,
"attempt" => attempt
}
|> new(scheduled_at: scheduled_at, replace: true)
|> Oban.insert()
end
end