Remove test code and add hostname blacklist to Linkbot

Linkbot will now block addresses that attempt to use localhost, *.local,
*.home, *.localdomain, hosts that don't have a dot in them, and IP
addresses. This is to avoid exposing the bot to local addresses on the
host computer.

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-06-14 16:41:40 -04:00
parent 6b44e21e53
commit f12bc13e13

View File

@@ -4,8 +4,16 @@ defmodule Omnibot.Contrib.Linkbot do
@default_config timeout: 30_000
@hostname_blacklist ~r/(^localhost$|\.local$|\.localdomain$|\.home$|^[^.]+$|^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$)/i
def blacklisted?(url) do
host = URI.parse(url).host
Regex.match?(@hostname_blacklist, host)
end
defmodule Client do
use Tesla
alias Omnibot.Contrib.Linkbot
plug Tesla.Middleware.Headers, [{"user-agent", "Tesla/Omnibot"}]
plug Tesla.Middleware.FollowRedirects, max_redirects: 10
@@ -15,7 +23,6 @@ defmodule Omnibot.Contrib.Linkbot do
def get_title(url) do
if should_get?(url) do
Process.sleep(11_000)
resp = get!(url)
%{"title" => title} = Regex.named_captures(@title_regex, resp.body)
title
@@ -23,12 +30,16 @@ defmodule Omnibot.Contrib.Linkbot do
end
defp should_get?(url) do
if Linkbot.blacklisted?(url) do
false
else
resp = head!(url)
Tesla.get_header(resp, "content-type")
|> String.downcase()
|> String.contains?(["html", "text"])
end
end
end
@url_regex ~r"\bhttps?://[^\s]+"