Add cookies and headers config to pull.py
Hopefully this can work around that cloudflare captcha. ugh Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
@@ -1,10 +1,10 @@
|
||||
import asyncio
|
||||
from collections import defaultdict
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import re
|
||||
from typing import Optional, Union
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import aiohttp
|
||||
from bs4 import BeautifulSoup as Soup
|
||||
@@ -14,7 +14,6 @@ from .db import get_db
|
||||
from .files import file_cache
|
||||
from .hist import generate_histogram_svg
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -22,10 +21,23 @@ BANS_URL = "https://4chan.org/bans"
|
||||
PREVIEW_RE = re.compile(r"var postPreviews = (.+)")
|
||||
|
||||
|
||||
def get_pull_config() -> dict[str, Any]:
|
||||
path = Path("./config.json")
|
||||
if not path.exists():
|
||||
log.debug(f"no {path}, using empty config")
|
||||
return {}
|
||||
|
||||
with open(path) as fp:
|
||||
return json.load(fp)
|
||||
|
||||
|
||||
@file_cache(directory=config.CACHE_DIR, suffix=".html")
|
||||
async def get_bans_html() -> str:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(BANS_URL) as resp:
|
||||
config = get_pull_config()
|
||||
headers: dict[str, str] = config.get("headers", {})
|
||||
cookies: dict[str, str] = config.get("cookies", {})
|
||||
async with aiohttp.ClientSession(cookies=cookies) as session:
|
||||
async with session.get(BANS_URL, headers=headers) as resp:
|
||||
return await resp.text()
|
||||
|
||||
|
||||
@@ -33,14 +45,18 @@ async def get_thumb(thumb_path: Union[str, Path], post: dict) -> Optional[bytes]
|
||||
if "thumb" not in post:
|
||||
return None
|
||||
|
||||
config = get_pull_config()
|
||||
headers: dict[str, str] = config.get("headers", {})
|
||||
cookies: dict[str, str] = config.get("cookies", {})
|
||||
|
||||
thumb_path = Path(thumb_path)
|
||||
if thumb_path.exists():
|
||||
return thumb_path.read_bytes()
|
||||
else:
|
||||
url = f"https://i.4cdn.org/bans/thumb/{post['board']}/{post['thumb']}s.jpg"
|
||||
log.info("Downloading %s", url)
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url) as resp:
|
||||
async with aiohttp.ClientSession(cookies=cookies) as session:
|
||||
async with session.get(url, headers=headers) as resp:
|
||||
thumb_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
content = await resp.read()
|
||||
thumb_path.write_bytes(content)
|
||||
|
||||
Reference in New Issue
Block a user