diff --git a/chanbans/pull.py b/chanbans/pull.py index 8cde9bb..511b5e5 100644 --- a/chanbans/pull.py +++ b/chanbans/pull.py @@ -44,7 +44,7 @@ async def get_thumb(thumb_path: Union[str, Path], post: dict) -> Optional[bytes] log.info("Downloading %s", url) async with aiohttp.ClientSession() as session: async with session.get(url) as resp: - THUMBS_DIR.mkdir(parents=True, exist_ok=True) + thumb_path.parent.mkdir(parents=True, exist_ok=True) content = await resp.read() thumb_path.write_bytes(content) return content @@ -78,7 +78,9 @@ async def pull(): post = defaultdict(lambda: None, posts[cols["post"]["data-pid"]]) if "thumb" in post: - thumb_path = Path(THUMBS_DIR, f"{post['thumb']}s.jpg") + thumb = post['thumb'] + thumb_name = Path(thumb[:2], f"{post['thumb']}s.jpg") + thumb_path = Path(THUMBS_DIR, thumb_name) download_jobs += [get_thumb(thumb_path, post)] else: thumb_path = "" @@ -86,6 +88,9 @@ async def pull(): # Try to create post in database try: with db: + # TODO - this would probably be faster if we select the top 25 items and did the + # comparison. Especially when the database gets huge. + # Check the last N bans for the given board. If the following columns are equal: # * now # * time