From b71615a06da314be69803a3954920c68c18ebb90 Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Sat, 9 Sep 2023 21:51:51 -0700 Subject: [PATCH] stats: Add histogram generation to the `pull` command * --generate-histogram command line option * PULL_GENERATE_HISTOGRAM if you don't want to use the command line Signed-off-by: Alek Ratzloff --- chanbans/__main__.py | 14 +++++++++++--- chanbans/config.py | 14 +++++++++++--- chanbans/hist.py | 8 +++++++- chanbans/pull.py | 11 ++++++++++- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/chanbans/__main__.py b/chanbans/__main__.py index 9661357..84ee29d 100644 --- a/chanbans/__main__.py +++ b/chanbans/__main__.py @@ -27,17 +27,25 @@ def parse_args(): subcommands |= {subcommand} return subparsers.add_parser(subcommand, *args, **kwargs) - _pull_parser = add_subcommand( + pull_parser = add_subcommand( "pull", help="Pull bans from 4chan, save thumbnails, update the database, and exit", ) + pull_parser.add_argument( + "--generate-histogram", + action="store_true", + help="Generate a histogram SVG if any new posts are pulled", + default=config.PULL_GENERATE_HISTOGRAM, + ) + # TODO(args) --histogram-path _histogram_parser = add_subcommand( "hist", help="Generate histogram file.", # This doesn't work as expected for some reason. Doesn't get parsed correctly - #aliases=["histogram"], + # aliases=["histogram"], ) + # TODO(args) --histogram-path _serve_parser = add_subcommand("serve", help="Start HTTP server") _help_parser = add_subcommand("help", help="Show this help message") @@ -63,7 +71,7 @@ def main(): ) match args.command: case "pull": - asyncio.run(pull()) + asyncio.run(pull(generate_histogram=args.generate_histogram)) case "serve": run_app() case "hist": diff --git a/chanbans/config.py b/chanbans/config.py index af16db1..555442c 100644 --- a/chanbans/config.py +++ b/chanbans/config.py @@ -32,6 +32,14 @@ def one_of_default(name: str, values: Sequence[str], default_value: str) -> str: return value +def default_bool(name: str, default_value: bool) -> bool: + return one_of_default( + name, + ("yes", "true", "1", "no", "false", "0"), + str(default_value).lower(), + ) in ("yes", "true", "1") + + THUMBS_DIR = Path(default("THUMBS_DIR", "thumbs")) CACHE_DIR = Path(default("CACHE_DIR", "cache")) @@ -43,9 +51,9 @@ HTTP_RESULTS_PER_PAGE = default("HTTP_RESULTS_PER_PAGE", 100) STATIC_HANDLER = one_of_default("STATIC_HANDLER", ("remote", "local"), "local") STATIC_LOCAL_PATH = Path(default("STATIC_LOCAL_PATH", "static")) -STATIC_LOCAL_FOLLOW_SYMLINKS = one_of_default( - "STATIC_LOCAL_FOLLOW_SYMLINKS", ("yes", "true", "1", "no", "false", "0"), "true" -) in ("yes", "true", "1") +STATIC_LOCAL_FOLLOW_SYMLINKS = default_bool("STATIC_LOCAL_FOLLOW_SYMLINKS") STATIC_ROOT = default("STATIC_ROOT", "/static") HISTOGRAM_PATH = Path(default("HISTOGRAM_PATH", "static/histogram.svg")) + +PULL_GENERATE_HISTOGRAM = default_bool("PULL_GENERATE_HISTOGRAM", False) diff --git a/chanbans/hist.py b/chanbans/hist.py index 98a526f..0d430dd 100644 --- a/chanbans/hist.py +++ b/chanbans/hist.py @@ -1,6 +1,11 @@ +import logging +from pathlib import Path + from .db import get_db from .config import HISTOGRAM_PATH -from pathlib import Path + + +log = logging.getLogger(__name__) def histogram_svg(): @@ -31,5 +36,6 @@ def histogram_svg(): def generate_histogram_svg(path: Path = HISTOGRAM_PATH): + log.info("Generating and writing histogram SVG to %s", path) svg = histogram_svg() path = path.write_text(svg) diff --git a/chanbans/pull.py b/chanbans/pull.py index f2f92cd..a3270ad 100644 --- a/chanbans/pull.py +++ b/chanbans/pull.py @@ -12,6 +12,7 @@ from bs4 import BeautifulSoup as Soup from . import config from .db import get_db from .files import file_cache +from .hist import generate_histogram_svg log = logging.getLogger(__name__) @@ -46,10 +47,12 @@ async def get_thumb(thumb_path: Union[str, Path], post: dict) -> Optional[bytes] return content -async def pull(): +async def pull(generate_histogram: bool = config.GENERATE_HISTOGRAM): # TODO(args) --db-path arg db = get_db() + updated = False + # Get HTML html = await get_bans_html() # Get post JSON @@ -121,6 +124,9 @@ async def pull(): post["now"], ) + # We are definitely updating this entry + updated = True + post["action"] = cols["action"] # post['board'] = cols['board'] post["length"] = cols["length"] @@ -202,5 +208,8 @@ async def pull(): if "UNIQUE" not in msg: log.exception("error inserting data") log.info("Continuing") + if updated and generate_histogram: + generate_histogram_svg() + # Finish off thumbnail jobs await asyncio.gather(*download_jobs)