RichMedia: Do a HEAD request to check content type/length

This shouldn't be too expensive, since the connections are pooled, but it should save us some bandwidth since we won't fetch non-html files and files that are too large for us to process (especially since you can't cancel a request without closing the connection with HTTP1).
2020-09-14 14:45:58 +03:00 · 2020-09-14 14:45:58 +03:00 · f70335002d
commit f70335002d
parent f66a15c4a5
3 changed files with 91 additions and 1 deletions
--- a/test/web/rich_media/parser_test.exs
+++ b/test/web/rich_media/parser_test.exs
@ -56,6 +56,27 @@ defmodule Pleroma.Web.RichMedia.ParserTest do

      %{method: :get, url: "http://example.com/error"} ->
        {:error, :overload}
+
+      %{
+        method: :head,
+        url: "http://example.com/huge-page"
+      } ->
+        %Tesla.Env{
+          status: 200,
+          headers: [{"content-length", "2000001"}, {"content-type", "text/html"}]
+        }
+
+      %{
+        method: :head,
+        url: "http://example.com/pdf-file"
+      } ->
+        %Tesla.Env{
+          status: 200,
+          headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}]
+        }
+
+      %{method: :head} ->
+        %Tesla.Env{status: 404, body: "", headers: []}
    end)

    :ok
@ -144,4 +165,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
  test "returns error if getting page was not successful" do
    assert {:error, :overload} = Parser.parse("http://example.com/error")
  end
+
+  test "does a HEAD request to check if the body is too large" do
+    assert {:error, body_too_large} = Parser.parse("http://example.com/huge-page")
+  end
+
+  test "does a HEAD request to check if the body is html" do
+    assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file")
+  end
 end