Remove test code and add hostname blacklist to Linkbot
Linkbot will now block addresses that attempt to use localhost, *.local, *.home, *.localdomain, hosts that don't have a dot in them, and IP addresses. This is to avoid exposing the bot to local addresses on the host computer. Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
@@ -4,8 +4,16 @@ defmodule Omnibot.Contrib.Linkbot do
|
|||||||
|
|
||||||
@default_config timeout: 30_000
|
@default_config timeout: 30_000
|
||||||
|
|
||||||
|
@hostname_blacklist ~r/(^localhost$|\.local$|\.localdomain$|\.home$|^[^.]+$|^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$)/i
|
||||||
|
|
||||||
|
def blacklisted?(url) do
|
||||||
|
host = URI.parse(url).host
|
||||||
|
Regex.match?(@hostname_blacklist, host)
|
||||||
|
end
|
||||||
|
|
||||||
defmodule Client do
|
defmodule Client do
|
||||||
use Tesla
|
use Tesla
|
||||||
|
alias Omnibot.Contrib.Linkbot
|
||||||
|
|
||||||
plug Tesla.Middleware.Headers, [{"user-agent", "Tesla/Omnibot"}]
|
plug Tesla.Middleware.Headers, [{"user-agent", "Tesla/Omnibot"}]
|
||||||
plug Tesla.Middleware.FollowRedirects, max_redirects: 10
|
plug Tesla.Middleware.FollowRedirects, max_redirects: 10
|
||||||
@@ -15,7 +23,6 @@ defmodule Omnibot.Contrib.Linkbot do
|
|||||||
|
|
||||||
def get_title(url) do
|
def get_title(url) do
|
||||||
if should_get?(url) do
|
if should_get?(url) do
|
||||||
Process.sleep(11_000)
|
|
||||||
resp = get!(url)
|
resp = get!(url)
|
||||||
%{"title" => title} = Regex.named_captures(@title_regex, resp.body)
|
%{"title" => title} = Regex.named_captures(@title_regex, resp.body)
|
||||||
title
|
title
|
||||||
@@ -23,12 +30,16 @@ defmodule Omnibot.Contrib.Linkbot do
|
|||||||
end
|
end
|
||||||
|
|
||||||
defp should_get?(url) do
|
defp should_get?(url) do
|
||||||
|
if Linkbot.blacklisted?(url) do
|
||||||
|
false
|
||||||
|
else
|
||||||
resp = head!(url)
|
resp = head!(url)
|
||||||
Tesla.get_header(resp, "content-type")
|
Tesla.get_header(resp, "content-type")
|
||||||
|> String.downcase()
|
|> String.downcase()
|
||||||
|> String.contains?(["html", "text"])
|
|> String.contains?(["html", "text"])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
@url_regex ~r"\bhttps?://[^\s]+"
|
@url_regex ~r"\bhttps?://[^\s]+"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user