Merge remote-tracking branch 'origin/develop' into merge-ogp-twitter-parsers

This commit is contained in:
Egor Kislitsyn 2020-06-15 16:03:40 +04:00
commit 58e4e3db8b
No known key found for this signature in database
GPG key ID: 1B49CB15B71E7805
27 changed files with 371 additions and 182 deletions

View file

@ -9,7 +9,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
alias Pleroma.Object
alias Pleroma.Web.RichMedia.Parser
@spec validate_page_url(any()) :: :ok | :error
@spec validate_page_url(URI.t() | binary()) :: :ok | :error
defp validate_page_url(page_url) when is_binary(page_url) do
validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld]
@ -18,8 +18,8 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|> parse_uri(page_url)
end
defp validate_page_url(%URI{host: host, scheme: scheme, authority: authority})
when scheme == "https" and not is_nil(authority) do
defp validate_page_url(%URI{host: host, scheme: "https", authority: authority})
when is_binary(authority) do
cond do
host in Config.get([:rich_media, :ignore_hosts], []) ->
:error

View file

@ -91,7 +91,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
html
|> parse_html()
|> maybe_parse()
|> Map.put(:url, url)
|> Map.put("url", url)
|> clean_parsed_data()
|> check_parsed_data()
rescue
@ -111,8 +111,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
end)
end
defp check_parsed_data(%{title: title} = data)
when is_binary(title) and byte_size(title) > 0 do
defp check_parsed_data(%{"title" => title} = data)
when is_binary(title) and title != "" do
{:ok, data}
end
@ -123,11 +123,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
defp clean_parsed_data(data) do
data
|> Enum.reject(fn {key, val} ->
with {:ok, _} <- Jason.encode(%{key => val}) do
false
else
_ -> true
end
not match?({:ok, _}, Jason.encode(%{key => val}))
end)
|> Map.new()
end

View file

@ -22,19 +22,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
{_tag, attributes, _children} = html_node
data =
Enum.into(attributes, %{}, fn {name, value} ->
Map.new(attributes, fn {name, value} ->
{name, String.trim_leading(value, "#{prefix}:")}
end)
%{String.to_atom(data[key_name]) => data[value_name]}
%{data[key_name] => data[value_name]}
end
defp maybe_put_title(%{title: _} = meta, _), do: meta
defp maybe_put_title(%{"title" => _} = meta, _), do: meta
defp maybe_put_title(meta, html) when meta != %{} do
case get_page_title(html) do
"" -> meta
title -> Map.put_new(meta, :title, title)
title -> Map.put_new(meta, "title", title)
end
end

View file

@ -5,7 +5,7 @@
defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
def parse(html, _data) do
with elements = [_ | _] <- get_discovery_data(html),
{:ok, oembed_url} <- get_oembed_url(elements),
oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
{:ok, oembed_data} <- get_oembed_data(oembed_url) do
oembed_data
else
@ -17,19 +17,13 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
html |> Floki.find("link[type='application/json+oembed']")
end
defp get_oembed_url(nodes) do
{"link", attributes, _children} = nodes |> hd()
{:ok, Enum.into(attributes, %{})["href"]}
defp get_oembed_url([{"link", attributes, _children} | _]) do
Enum.find_value(attributes, fn {k, v} -> if k == "href", do: v end)
end
defp get_oembed_data(url) do
{:ok, %Tesla.Env{body: json}} = Pleroma.HTTP.get(url, [], adapter: [pool: :media])
{:ok, data} = Jason.decode(json)
data = data |> Map.new(fn {k, v} -> {String.to_atom(k), v} end)
{:ok, data}
with {:ok, %Tesla.Env{body: json}} <- Pleroma.HTTP.get(url, [], adapter: [pool: :media]) do
Jason.decode(json)
end
end
end