From cac2cc20fb0dff0e034cf51af6f15afef99180b4 Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Thu, 23 Jun 2022 09:58:12 -0700 Subject: [PATCH] Fix linkbot title bug If an HTML title was parsed with whitespace, it would not strip that surrounding whitespace. This fixes that. Also, there are some new debug log messages in linkbot. Hooray! Signed-off-by: Alek Ratzloff --- plugins/linkbot.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/plugins/linkbot.py b/plugins/linkbot.py index 6703a76..d0470c2 100644 --- a/plugins/linkbot.py +++ b/plugins/linkbot.py @@ -22,7 +22,17 @@ class TitleParser(HTMLParser): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.parsing_title = False - self.parsed_title: str | None = None + self.__parsed_title: str | None = None + + @property + def parsed_title(self) -> str | None: + return self.__parsed_title + + @parsed_title.setter + def parsed_title(self, value: str | None): + if value: + value = value.strip() + self.__parsed_title = value def reset(self) -> None: super().reset() @@ -107,7 +117,10 @@ class Linkbot(Plugin): return False async def fetch(self, url: str) -> Tuple[int, str, str] | None: - async with aiohttp.ClientSession() as session: + headers = { + "User-Agent": "Omnibot", + } + async with aiohttp.ClientSession(headers=headers) as session: async with session.get(url) as response: status = response.status content_type = response.headers["content-type"].lower() @@ -124,12 +137,14 @@ class Linkbot(Plugin): if not matches: return for url in matches: + log.debug("trying URL %s", url) if await self.is_blocked(url): # Skip this URL if it's blocked log.debug("skipping URL %s because it is blocked", url) continue # Fetch the HTML at the URL result = await self.fetch(url) + log.debug("got %s characters back", len(result)) if not result: # Could not fetch this URL log.debug("skipping URL %s because it couldn't be fetched", url) @@ -142,6 +157,7 @@ class Linkbot(Plugin): if not (200 <= status <= 299): message = f"{who.nick}: (status {status})" else: + log.debug("got title %r", title_parser.parsed_title) message = title_parser.parsed_title if message: