Fix linkbot title bug

If an HTML title was parsed with whitespace, it would not strip that surrounding whitespace. This fixes that. Also, there are some new debug log messages in linkbot. Hooray! Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
2022-06-23 09:58:12 -07:00
parent d7dd0277ca
commit cac2cc20fb
1 changed files with 18 additions and 2 deletions
--- a/plugins/linkbot.py
+++ b/plugins/linkbot.py
@@ -22,7 +22,17 @@ class TitleParser(HTMLParser):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.parsing_title = False
-        self.parsed_title: str | None = None
+        self.__parsed_title: str | None = None
+
+    @property
+    def parsed_title(self) -> str | None:
+        return self.__parsed_title
+
+    @parsed_title.setter
+    def parsed_title(self, value: str | None):
+        if value:
+            value = value.strip()
+        self.__parsed_title = value

    def reset(self) -> None:
        super().reset()
@@ -107,7 +117,10 @@ class Linkbot(Plugin):
        return False

    async def fetch(self, url: str) -> Tuple[int, str, str] | None:
-        async with aiohttp.ClientSession() as session:
+        headers = {
+            "User-Agent": "Omnibot",
+        }
+        async with aiohttp.ClientSession(headers=headers) as session:
            async with session.get(url) as response:
                status = response.status
                content_type = response.headers["content-type"].lower()
@@ -124,12 +137,14 @@ class Linkbot(Plugin):
        if not matches:
            return
        for url in matches:
+            log.debug("trying URL %s", url)
            if await self.is_blocked(url):
                # Skip this URL if it's blocked
                log.debug("skipping URL %s because it is blocked", url)
                continue
            # Fetch the HTML at the URL
            result = await self.fetch(url)
+            log.debug("got %s characters back", len(result))
            if not result:
                # Could not fetch this URL
                log.debug("skipping URL %s because it couldn't be fetched", url)
@@ -142,6 +157,7 @@ class Linkbot(Plugin):
            if not (200 <= status <= 299):
                message = f"{who.nick}: (status {status})"
            else:
+                log.debug("got title %r", title_parser.parsed_title)
                message = title_parser.parsed_title

            if message: