Remove test code and add hostname blacklist to Linkbot

Linkbot will now block addresses that attempt to use localhost, *.local,
*.home, *.localdomain, hosts that don't have a dot in them, and IP
addresses. This is to avoid exposing the bot to local addresses on the
host computer.

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-06-14 16:41:40 -04:00
parent 6b44e21e53
commit f12bc13e13

View File

@@ -4,8 +4,16 @@ defmodule Omnibot.Contrib.Linkbot do
@default_config timeout: 30_000 @default_config timeout: 30_000
@hostname_blacklist ~r/(^localhost$|\.local$|\.localdomain$|\.home$|^[^.]+$|^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$)/i
def blacklisted?(url) do
host = URI.parse(url).host
Regex.match?(@hostname_blacklist, host)
end
defmodule Client do defmodule Client do
use Tesla use Tesla
alias Omnibot.Contrib.Linkbot
plug Tesla.Middleware.Headers, [{"user-agent", "Tesla/Omnibot"}] plug Tesla.Middleware.Headers, [{"user-agent", "Tesla/Omnibot"}]
plug Tesla.Middleware.FollowRedirects, max_redirects: 10 plug Tesla.Middleware.FollowRedirects, max_redirects: 10
@@ -15,7 +23,6 @@ defmodule Omnibot.Contrib.Linkbot do
def get_title(url) do def get_title(url) do
if should_get?(url) do if should_get?(url) do
Process.sleep(11_000)
resp = get!(url) resp = get!(url)
%{"title" => title} = Regex.named_captures(@title_regex, resp.body) %{"title" => title} = Regex.named_captures(@title_regex, resp.body)
title title
@@ -23,12 +30,16 @@ defmodule Omnibot.Contrib.Linkbot do
end end
defp should_get?(url) do defp should_get?(url) do
if Linkbot.blacklisted?(url) do
false
else
resp = head!(url) resp = head!(url)
Tesla.get_header(resp, "content-type") Tesla.get_header(resp, "content-type")
|> String.downcase() |> String.downcase()
|> String.contains?(["html", "text"]) |> String.contains?(["html", "text"])
end end
end end
end
@url_regex ~r"\bhttps?://[^\s]+" @url_regex ~r"\bhttps?://[^\s]+"