Add cookies and headers config to pull.py
Hopefully this can work around that cloudflare captcha. ugh Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
@@ -1,10 +1,10 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from collections import defaultdict
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
|
||||||
import re
|
import re
|
||||||
from typing import Optional, Union
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from bs4 import BeautifulSoup as Soup
|
from bs4 import BeautifulSoup as Soup
|
||||||
@@ -14,7 +14,6 @@ from .db import get_db
|
|||||||
from .files import file_cache
|
from .files import file_cache
|
||||||
from .hist import generate_histogram_svg
|
from .hist import generate_histogram_svg
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -22,10 +21,23 @@ BANS_URL = "https://4chan.org/bans"
|
|||||||
PREVIEW_RE = re.compile(r"var postPreviews = (.+)")
|
PREVIEW_RE = re.compile(r"var postPreviews = (.+)")
|
||||||
|
|
||||||
|
|
||||||
|
def get_pull_config() -> dict[str, Any]:
|
||||||
|
path = Path("./config.json")
|
||||||
|
if not path.exists():
|
||||||
|
log.debug(f"no {path}, using empty config")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
with open(path) as fp:
|
||||||
|
return json.load(fp)
|
||||||
|
|
||||||
|
|
||||||
@file_cache(directory=config.CACHE_DIR, suffix=".html")
|
@file_cache(directory=config.CACHE_DIR, suffix=".html")
|
||||||
async def get_bans_html() -> str:
|
async def get_bans_html() -> str:
|
||||||
async with aiohttp.ClientSession() as session:
|
config = get_pull_config()
|
||||||
async with session.get(BANS_URL) as resp:
|
headers: dict[str, str] = config.get("headers", {})
|
||||||
|
cookies: dict[str, str] = config.get("cookies", {})
|
||||||
|
async with aiohttp.ClientSession(cookies=cookies) as session:
|
||||||
|
async with session.get(BANS_URL, headers=headers) as resp:
|
||||||
return await resp.text()
|
return await resp.text()
|
||||||
|
|
||||||
|
|
||||||
@@ -33,14 +45,18 @@ async def get_thumb(thumb_path: Union[str, Path], post: dict) -> Optional[bytes]
|
|||||||
if "thumb" not in post:
|
if "thumb" not in post:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
config = get_pull_config()
|
||||||
|
headers: dict[str, str] = config.get("headers", {})
|
||||||
|
cookies: dict[str, str] = config.get("cookies", {})
|
||||||
|
|
||||||
thumb_path = Path(thumb_path)
|
thumb_path = Path(thumb_path)
|
||||||
if thumb_path.exists():
|
if thumb_path.exists():
|
||||||
return thumb_path.read_bytes()
|
return thumb_path.read_bytes()
|
||||||
else:
|
else:
|
||||||
url = f"https://i.4cdn.org/bans/thumb/{post['board']}/{post['thumb']}s.jpg"
|
url = f"https://i.4cdn.org/bans/thumb/{post['board']}/{post['thumb']}s.jpg"
|
||||||
log.info("Downloading %s", url)
|
log.info("Downloading %s", url)
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession(cookies=cookies) as session:
|
||||||
async with session.get(url) as resp:
|
async with session.get(url, headers=headers) as resp:
|
||||||
thumb_path.parent.mkdir(parents=True, exist_ok=True)
|
thumb_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
content = await resp.read()
|
content = await resp.read()
|
||||||
thumb_path.write_bytes(content)
|
thumb_path.write_bytes(content)
|
||||||
|
|||||||
Reference in New Issue
Block a user