Add filesystem hashtable for saved thumbnails

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2023-07-30 22:10:44 -07:00
parent 8213fa8508
commit 3db45d63d2

View File

@@ -44,7 +44,7 @@ async def get_thumb(thumb_path: Union[str, Path], post: dict) -> Optional[bytes]
log.info("Downloading %s", url) log.info("Downloading %s", url)
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
async with session.get(url) as resp: async with session.get(url) as resp:
THUMBS_DIR.mkdir(parents=True, exist_ok=True) thumb_path.parent.mkdir(parents=True, exist_ok=True)
content = await resp.read() content = await resp.read()
thumb_path.write_bytes(content) thumb_path.write_bytes(content)
return content return content
@@ -78,7 +78,9 @@ async def pull():
post = defaultdict(lambda: None, posts[cols["post"]["data-pid"]]) post = defaultdict(lambda: None, posts[cols["post"]["data-pid"]])
if "thumb" in post: if "thumb" in post:
thumb_path = Path(THUMBS_DIR, f"{post['thumb']}s.jpg") thumb = post['thumb']
thumb_name = Path(thumb[:2], f"{post['thumb']}s.jpg")
thumb_path = Path(THUMBS_DIR, thumb_name)
download_jobs += [get_thumb(thumb_path, post)] download_jobs += [get_thumb(thumb_path, post)]
else: else:
thumb_path = "" thumb_path = ""
@@ -86,6 +88,9 @@ async def pull():
# Try to create post in database # Try to create post in database
try: try:
with db: with db:
# TODO - this would probably be faster if we select the top 25 items and did the
# comparison. Especially when the database gets huge.
# Check the last N bans for the given board. If the following columns are equal: # Check the last N bans for the given board. If the following columns are equal:
# * now # * now
# * time # * time