Fix linkbot title bug

If an HTML title was parsed with whitespace, it would not strip that
surrounding whitespace. This fixes that.

Also, there are some new debug log messages in linkbot. Hooray!

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2022-06-23 09:58:12 -07:00
parent d7dd0277ca
commit cac2cc20fb

View File

@@ -22,7 +22,17 @@ class TitleParser(HTMLParser):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.parsing_title = False
self.parsed_title: str | None = None
self.__parsed_title: str | None = None
@property
def parsed_title(self) -> str | None:
return self.__parsed_title
@parsed_title.setter
def parsed_title(self, value: str | None):
if value:
value = value.strip()
self.__parsed_title = value
def reset(self) -> None:
super().reset()
@@ -107,7 +117,10 @@ class Linkbot(Plugin):
return False
async def fetch(self, url: str) -> Tuple[int, str, str] | None:
async with aiohttp.ClientSession() as session:
headers = {
"User-Agent": "Omnibot",
}
async with aiohttp.ClientSession(headers=headers) as session:
async with session.get(url) as response:
status = response.status
content_type = response.headers["content-type"].lower()
@@ -124,12 +137,14 @@ class Linkbot(Plugin):
if not matches:
return
for url in matches:
log.debug("trying URL %s", url)
if await self.is_blocked(url):
# Skip this URL if it's blocked
log.debug("skipping URL %s because it is blocked", url)
continue
# Fetch the HTML at the URL
result = await self.fetch(url)
log.debug("got %s characters back", len(result))
if not result:
# Could not fetch this URL
log.debug("skipping URL %s because it couldn't be fetched", url)
@@ -142,6 +157,7 @@ class Linkbot(Plugin):
if not (200 <= status <= 299):
message = f"{who.nick}: (status {status})"
else:
log.debug("got title %r", title_parser.parsed_title)
message = title_parser.parsed_title
if message: