diff --git a/lib/contrib/linkbot.ex b/lib/contrib/linkbot.ex
index 8e0b989..38bb605 100644
--- a/lib/contrib/linkbot.ex
+++ b/lib/contrib/linkbot.ex
@@ -13,6 +13,7 @@ defmodule Omnibot.Contrib.Linkbot do
defmodule Client do
use Tesla
alias Omnibot.Contrib.Linkbot
+ import Meeseeks.CSS
plug Tesla.Middleware.Headers, [{"user-agent", "Tesla/Omnibot"}]
plug Tesla.Middleware.FollowRedirects, max_redirects: 10
@@ -26,11 +27,23 @@ defmodule Omnibot.Contrib.Linkbot do
@title_regex ~r"
(?.+)"i
def get_title(url) do
+ html = get_url(url)
+ document = Meeseeks.parse(html)
+ [title | _] = (Meeseeks.all(document, css("meta")) ++ [Meeseeks.one(document, css("title"))])
+ |> Enum.map(&(
+ Meeseeks.attr(&1, "property") == "og:title" && Meeseeks.attr(&1, "content")
+ || Meeseeks.attr(&1, "name") == "title" && Meeseeks.attr(&1, "content")
+ || Meeseeks.tag(&1) == "title" && Meeseeks.text(&1)
+ ))
+ |> Enum.filter(& &1)
+
+ title
+ end
+
+ defp get_url(url) do
if should_get?(url) do
Logger.info("Fetching #{url}")
- resp = get!(url)
- %{"title" => title} = Regex.named_captures(@title_regex, resp.body)
- title
+ get!(url).body
end
end
@@ -53,6 +66,7 @@ defmodule Omnibot.Contrib.Linkbot do
Regex.scan(@url_regex, line)
|> Enum.flat_map(& &1)
|> Enum.map(fn url -> Client.get_title(url) end)
+ |> Enum.filter(& &1)
|> Enum.each(fn title -> Irc.send_to(irc, channel, title) end)
end
diff --git a/mix.exs b/mix.exs
index c17db13..ae9b2a3 100644
--- a/mix.exs
+++ b/mix.exs
@@ -31,8 +31,9 @@ defmodule Omnibot.MixProject do
defp deps do
# TODO : figure out how to make contrib modules optional (umbrella project?) and enable specific requirements
[
- {:tesla, "~> 1.3.0"}, # Used by Omnibot.Contrib.Linkbot
- {:sqlitex, "~> 1.7"}, # Used by Omnibot.Contrib.Wordbot
+ {:tesla, "~> 1.3.0"}, # Used by Omnibot.Contrib.Linkbot
+ {:meeseeks, "~> 0.15.1"}, # Used by Omnibot.Contrib.Linkbot
+ {:sqlitex, "~> 1.7"}, # Used by Omnibot.Contrib.Wordbot
]
end
end
diff --git a/mix.lock b/mix.lock
index b1db375..e8bfbe3 100644
--- a/mix.lock
+++ b/mix.lock
@@ -4,13 +4,17 @@
"esqlite": {:hex, :esqlite, "0.4.1", "ba5d0bab6b9c8432ffe1bf12fee8e154a50f1c3c40eadc3a9c870c23ca94d961", [:rebar3], [], "hexpm"},
"hackney": {:hex, :hackney, "1.16.0", "5096ac8e823e3a441477b2d187e30dd3fff1a82991a806b2003845ce72ce2d84", [:rebar3], [{:certifi, "2.5.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.1", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.0", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.6", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},
"idna": {:hex, :idna, "6.0.1", "1d038fb2e7668ce41fbf681d2c45902e52b3cb9e9c77b55334353b222c2ee50c", [:rebar3], [{:unicode_util_compat, "0.5.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"},
+ "meeseeks": {:hex, :meeseeks, "0.15.1", "148d5d9ea879cdb415b8bc4162ac5528f9a2fe42fbfe1802c681a2842cb1c0a4", [:mix], [{:meeseeks_html5ever, "~> 0.12.1", [hex: :meeseeks_html5ever, repo: "hexpm", optional: false]}], "hexpm"},
+ "meeseeks_html5ever": {:hex, :meeseeks_html5ever, "0.12.1", "718fab10d05b83204524a518b2b88caa37ba6a6e02f82e80d6a7bc47552fb54a", [:mix], [{:rustler, "~> 0.21.0", [hex: :rustler, repo: "hexpm", optional: false]}], "hexpm"},
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"},
"mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm"},
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"},
"parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"},
+ "rustler": {:hex, :rustler, "0.21.1", "5299980be32da997c54382e945bacaa015ed97a60745e1e639beaf6a7b278c65", [:mix], [{:toml, "~> 0.5.2", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm"},
"sqlitex": {:hex, :sqlitex, "1.7.1", "022d477aab2ae999c43ae6fbd1782ff1457e0e95c251c7b5fa6f7b7b102040ff", [:mix], [{:decimal, "~> 1.7", [hex: :decimal, repo: "hexpm", optional: false]}, {:esqlite, "~> 0.4", [hex: :esqlite, repo: "hexpm", optional: false]}], "hexpm"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm"},
"tesla": {:hex, :tesla, "1.3.3", "26ae98627af5c406584aa6755ab5fc96315d70d69a24dd7f8369cfcb75094a45", [:mix], [{:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: true]}, {:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]}, {:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]}, {:gun, "~> 1.3", [hex: :gun, repo: "hexpm", optional: true]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]}, {:ibrowse, "~> 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, {:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: true]}, {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"},
+ "toml": {:hex, :toml, "0.5.2", "e471388a8726d1ce51a6b32f864b8228a1eb8edc907a0edf2bb50eab9321b526", [:mix], [], "hexpm"},
"tzdata": {:hex, :tzdata, "1.0.3", "73470ad29dde46e350c60a66e6b360d3b99d2d18b74c4c349dbebbc27a09a3eb", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
"unicode_util_compat": {:hex, :unicode_util_compat, "0.5.0", "8516502659002cec19e244ebd90d312183064be95025a319a6c7e89f4bccd65b", [:rebar3], [], "hexpm"},
}