Minor fixups in linkbot on the most recent changes for HTML decoding

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2022-06-01 21:50:22 -07:00
parent 57e1d211a3
commit 10ffeaf63a

View File

@@ -37,11 +37,13 @@ class TitleParser(HTMLParser):
if tag == "title": if tag == "title":
self.parsing_title = True self.parsing_title = True
elif tag == "meta": elif tag == "meta":
attrs = dict(attrs_list) # filter None values
if attrs.get("property", None) == "og:title": attrs = {name: value for name, value in attrs_list if value}
self.parsed_title = attrs.get("content", None) # where property="og:title" or name="title", use the "content" attribute
elif attrs.get("name", None) == "title": if attrs.get("property", None) == "og:title" and "content" in attrs:
self.parsed_title = attrs.get("content", None) self.parsed_title = html.unescape(attrs["content"])
elif attrs.get("name", None) == "title" and "content" in attrs:
self.parsed_title = html.unescape(attrs["content"])
def handle_endtag(self, tag: str) -> None: def handle_endtag(self, tag: str) -> None:
match tag.lower(): match tag.lower():