diff --git a/chanbans/pull.py b/chanbans/pull.py index b90f4f3..7253011 100644 --- a/chanbans/pull.py +++ b/chanbans/pull.py @@ -1,10 +1,10 @@ import asyncio -from collections import defaultdict import json import logging -from pathlib import Path import re -from typing import Optional, Union +from collections import defaultdict +from pathlib import Path +from typing import Any, Optional, Union import aiohttp from bs4 import BeautifulSoup as Soup @@ -14,7 +14,6 @@ from .db import get_db from .files import file_cache from .hist import generate_histogram_svg - log = logging.getLogger(__name__) @@ -22,10 +21,23 @@ BANS_URL = "https://4chan.org/bans" PREVIEW_RE = re.compile(r"var postPreviews = (.+)") +def get_pull_config() -> dict[str, Any]: + path = Path("./config.json") + if not path.exists(): + log.debug(f"no {path}, using empty config") + return {} + + with open(path) as fp: + return json.load(fp) + + @file_cache(directory=config.CACHE_DIR, suffix=".html") async def get_bans_html() -> str: - async with aiohttp.ClientSession() as session: - async with session.get(BANS_URL) as resp: + config = get_pull_config() + headers: dict[str, str] = config.get("headers", {}) + cookies: dict[str, str] = config.get("cookies", {}) + async with aiohttp.ClientSession(cookies=cookies) as session: + async with session.get(BANS_URL, headers=headers) as resp: return await resp.text() @@ -33,14 +45,18 @@ async def get_thumb(thumb_path: Union[str, Path], post: dict) -> Optional[bytes] if "thumb" not in post: return None + config = get_pull_config() + headers: dict[str, str] = config.get("headers", {}) + cookies: dict[str, str] = config.get("cookies", {}) + thumb_path = Path(thumb_path) if thumb_path.exists(): return thumb_path.read_bytes() else: url = f"https://i.4cdn.org/bans/thumb/{post['board']}/{post['thumb']}s.jpg" log.info("Downloading %s", url) - async with aiohttp.ClientSession() as session: - async with session.get(url) as resp: + async with aiohttp.ClientSession(cookies=cookies) as session: + async with session.get(url, headers=headers) as resp: thumb_path.parent.mkdir(parents=True, exist_ok=True) content = await resp.read() thumb_path.write_bytes(content)