Merge branch 'dev-lanodan-url-regex' into 'develop'

lib/pleroma/formatter.ex: Fix URL regex

Closes #127

See merge request pleroma/pleroma!69
This commit is contained in:
kaniini 2018-08-16 15:41:16 +00:00
commit 183ccd1812
3 changed files with 42 additions and 6 deletions

View file

@ -165,8 +165,29 @@ defmodule Pleroma.Formatter do
@emoji
end
@link_regex ~r/https?:\/\/[\w\.\/?=\-#\+%&@~'\(\):]+[\w\/]/u
@link_regex ~r/[0-9a-z+\-\.]+:[0-9a-z$-_.+!*'(),]+/ui
# IANA got a list https://www.iana.org/assignments/uri-schemes/ but
# Stuff like ipfs isnt in it
# There is very niche stuff
@uri_schemes [
"https://",
"http://",
"dat://",
"dweb://",
"gopher://",
"ipfs://",
"ipns://",
"irc:",
"ircs:",
"magnet:",
"mailto:",
"mumble:",
"ssb://",
"xmpp:"
]
# TODO: make it use something other than @link_regex
def html_escape(text) do
Regex.split(@link_regex, text, include_captures: true)
|> Enum.map_every(2, fn chunk ->
@ -176,11 +197,18 @@ defmodule Pleroma.Formatter do
|> Enum.join("")
end
@doc "changes http:... links to html links"
@doc "changes scheme:... urls to html links"
def add_links({subs, text}) do
additionnal_schemes =
Application.get_env(:pleroma, :uri_schemes, [])
|> Keyword.get(:additionnal_schemes, [])
links =
Regex.scan(@link_regex, text)
|> Enum.map(fn [url] -> {Ecto.UUID.generate(), url} end)
text
|> String.split([" ", "\t", "<br>"])
|> Enum.filter(fn word -> String.starts_with?(word, @uri_schemes ++ additionnal_schemes) end)
|> Enum.filter(fn word -> Regex.match?(@link_regex, word) end)
|> Enum.map(fn url -> {Ecto.UUID.generate(), url} end)
|> Enum.sort_by(fn {_, url} -> -String.length(url) end)
uuid_text =