Fix linkbot title bug

If an HTML title was parsed with whitespace, it would not strip that
surrounding whitespace. This fixes that.

Also, there are some new debug log messages in linkbot. Hooray!

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2022-06-23 09:58:12 -07:00
parent d7dd0277ca
commit cac2cc20fb

View File

@@ -22,7 +22,17 @@ class TitleParser(HTMLParser):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.parsing_title = False self.parsing_title = False
self.parsed_title: str | None = None self.__parsed_title: str | None = None
@property
def parsed_title(self) -> str | None:
return self.__parsed_title
@parsed_title.setter
def parsed_title(self, value: str | None):
if value:
value = value.strip()
self.__parsed_title = value
def reset(self) -> None: def reset(self) -> None:
super().reset() super().reset()
@@ -107,7 +117,10 @@ class Linkbot(Plugin):
return False return False
async def fetch(self, url: str) -> Tuple[int, str, str] | None: async def fetch(self, url: str) -> Tuple[int, str, str] | None:
async with aiohttp.ClientSession() as session: headers = {
"User-Agent": "Omnibot",
}
async with aiohttp.ClientSession(headers=headers) as session:
async with session.get(url) as response: async with session.get(url) as response:
status = response.status status = response.status
content_type = response.headers["content-type"].lower() content_type = response.headers["content-type"].lower()
@@ -124,12 +137,14 @@ class Linkbot(Plugin):
if not matches: if not matches:
return return
for url in matches: for url in matches:
log.debug("trying URL %s", url)
if await self.is_blocked(url): if await self.is_blocked(url):
# Skip this URL if it's blocked # Skip this URL if it's blocked
log.debug("skipping URL %s because it is blocked", url) log.debug("skipping URL %s because it is blocked", url)
continue continue
# Fetch the HTML at the URL # Fetch the HTML at the URL
result = await self.fetch(url) result = await self.fetch(url)
log.debug("got %s characters back", len(result))
if not result: if not result:
# Could not fetch this URL # Could not fetch this URL
log.debug("skipping URL %s because it couldn't be fetched", url) log.debug("skipping URL %s because it couldn't be fetched", url)
@@ -142,6 +157,7 @@ class Linkbot(Plugin):
if not (200 <= status <= 299): if not (200 <= status <= 299):
message = f"{who.nick}: (status {status})" message = f"{who.nick}: (status {status})"
else: else:
log.debug("got title %r", title_parser.parsed_title)
message = title_parser.parsed_title message = title_parser.parsed_title
if message: if message: