Refactor ReachabilityWorker to use a 5-phase reachability testing approach
It will check reachability for an instance deemed unreachable at the following intervals: 4 attempts, once a minute 4 attempts, once every 15 minutes 4 attempts, once every 60 minutes 4 attempts, once every 8 hours 4 attempts, once every 24 hours This should be effective and respectful of the resources of instances on the fediverse. We have the Oban Pruner plugin enabled to keep the Oban Jobs table from growing indefinitely. It prunes every 15 minutes, but this will interfere with our ability to enforce uniqueness on the ReachabilityWorker jobs for a time period longer than 15 minutes. The solution is to exclude the ReachabilityWorker from the pruning operation and instead schedule a custom job that will prune the table for us once a day. The ReachabilityPruner cron task will clean up the history of the ReachabilityWorker jobs older than 6 days.
This commit is contained in:
parent
59bfa83c9c
commit
77dca7c3e5
5 changed files with 296 additions and 60 deletions
26
lib/pleroma/workers/cron/reachability_pruner.ex
Normal file
26
lib/pleroma/workers/cron/reachability_pruner.ex
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
defmodule Pleroma.Workers.Cron.ReachabilityPruner do
|
||||
use Oban.Worker, queue: :background, max_attempts: 1
|
||||
|
||||
import Ecto.Query
|
||||
require Logger
|
||||
|
||||
@reachability_worker "Elixir.Pleroma.Workers.ReachabilityWorker"
|
||||
@prune_days 6
|
||||
|
||||
@impl true
|
||||
def perform(_job) do
|
||||
cutoff = DateTime.utc_now() |> DateTime.add(-@prune_days * 24 * 60 * 60, :second)
|
||||
|
||||
{count, _} =
|
||||
from(j in Oban.Job,
|
||||
where: j.worker == @reachability_worker and j.inserted_at < ^cutoff
|
||||
)
|
||||
|> Pleroma.Repo.delete_all()
|
||||
|
||||
if count > 0 do
|
||||
Logger.debug(fn -> "Pruned #{count} old ReachabilityWorker jobs." end)
|
||||
end
|
||||
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
|
@ -5,17 +5,31 @@
|
|||
defmodule Pleroma.Workers.ReachabilityWorker do
|
||||
use Oban.Worker,
|
||||
queue: :background,
|
||||
max_attempts: 3,
|
||||
unique: [period: :infinity, states: [:available, :scheduled]]
|
||||
max_attempts: 1,
|
||||
unique: [period: :infinity, states: [:available, :scheduled], keys: [:domain]]
|
||||
|
||||
alias Pleroma.HTTP
|
||||
alias Pleroma.Instances
|
||||
|
||||
@impl true
|
||||
def perform(%Oban.Job{args: %{"domain" => domain}}) do
|
||||
def perform(%Oban.Job{args: %{"domain" => domain, "phase" => phase, "attempt" => attempt}}) do
|
||||
case check_reachability(domain) do
|
||||
:ok ->
|
||||
Instances.set_reachable("https://#{domain}")
|
||||
:ok
|
||||
|
||||
{:error, _} = error ->
|
||||
handle_failed_attempt(domain, phase, attempt)
|
||||
error
|
||||
end
|
||||
end
|
||||
|
||||
@impl true
|
||||
def timeout(_job), do: :timer.seconds(5)
|
||||
|
||||
defp check_reachability(domain) do
|
||||
case HTTP.get("https://#{domain}/") do
|
||||
{:ok, %{status: status}} when status in 200..299 ->
|
||||
Instances.set_reachable("https://#{domain}")
|
||||
:ok
|
||||
|
||||
{:ok, %{status: _status}} ->
|
||||
|
|
@ -26,6 +40,51 @@ defmodule Pleroma.Workers.ReachabilityWorker do
|
|||
end
|
||||
end
|
||||
|
||||
@impl true
|
||||
def timeout(_job), do: :timer.seconds(5)
|
||||
defp handle_failed_attempt(_domain, "final", _attempt), do: :ok
|
||||
|
||||
defp handle_failed_attempt(domain, phase, attempt) do
|
||||
{interval_minutes, max_attempts, next_phase} = get_phase_config(phase)
|
||||
|
||||
if attempt >= max_attempts do
|
||||
# Move to next phase
|
||||
schedule_next_phase(domain, next_phase)
|
||||
else
|
||||
# Retry same phase with incremented attempt
|
||||
schedule_retry(domain, phase, attempt + 1, interval_minutes)
|
||||
end
|
||||
end
|
||||
|
||||
defp get_phase_config("phase_1min"), do: {1, 4, "phase_15min"}
|
||||
defp get_phase_config("phase_15min"), do: {15, 4, "phase_1hour"}
|
||||
defp get_phase_config("phase_1hour"), do: {60, 4, "phase_8hour"}
|
||||
defp get_phase_config("phase_8hour"), do: {480, 4, "phase_24hour"}
|
||||
defp get_phase_config("phase_24hour"), do: {1440, 4, "final"}
|
||||
defp get_phase_config("final"), do: {nil, 0, nil}
|
||||
|
||||
defp schedule_next_phase(_domain, "final"), do: :ok
|
||||
|
||||
defp schedule_next_phase(domain, next_phase) do
|
||||
{interval_minutes, _max_attempts, _next_phase} = get_phase_config(next_phase)
|
||||
scheduled_at = DateTime.add(DateTime.utc_now(), interval_minutes * 60, :second)
|
||||
|
||||
%{
|
||||
"domain" => domain,
|
||||
"phase" => next_phase,
|
||||
"attempt" => 1
|
||||
}
|
||||
|> new(scheduled_at: scheduled_at, replace: true)
|
||||
|> Oban.insert()
|
||||
end
|
||||
|
||||
def schedule_retry(domain, phase, attempt, interval_minutes) do
|
||||
scheduled_at = DateTime.add(DateTime.utc_now(), interval_minutes * 60, :second)
|
||||
|
||||
%{
|
||||
"domain" => domain,
|
||||
"phase" => phase,
|
||||
"attempt" => attempt
|
||||
}
|
||||
|> new(scheduled_at: scheduled_at, replace: true)
|
||||
|> Oban.insert()
|
||||
end
|
||||
end
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue