stats: Add histogram generation to the pull command

* --generate-histogram command line option
* PULL_GENERATE_HISTOGRAM if you don't want to use the command line

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2023-09-09 21:51:51 -07:00
parent 6eb48ff0d2
commit b71615a06d
4 changed files with 39 additions and 8 deletions

View File

@@ -27,10 +27,17 @@ def parse_args():
subcommands |= {subcommand} subcommands |= {subcommand}
return subparsers.add_parser(subcommand, *args, **kwargs) return subparsers.add_parser(subcommand, *args, **kwargs)
_pull_parser = add_subcommand( pull_parser = add_subcommand(
"pull", "pull",
help="Pull bans from 4chan, save thumbnails, update the database, and exit", help="Pull bans from 4chan, save thumbnails, update the database, and exit",
) )
pull_parser.add_argument(
"--generate-histogram",
action="store_true",
help="Generate a histogram SVG if any new posts are pulled",
default=config.PULL_GENERATE_HISTOGRAM,
)
# TODO(args) --histogram-path
_histogram_parser = add_subcommand( _histogram_parser = add_subcommand(
"hist", "hist",
@@ -38,6 +45,7 @@ def parse_args():
# This doesn't work as expected for some reason. Doesn't get parsed correctly # This doesn't work as expected for some reason. Doesn't get parsed correctly
# aliases=["histogram"], # aliases=["histogram"],
) )
# TODO(args) --histogram-path
_serve_parser = add_subcommand("serve", help="Start HTTP server") _serve_parser = add_subcommand("serve", help="Start HTTP server")
_help_parser = add_subcommand("help", help="Show this help message") _help_parser = add_subcommand("help", help="Show this help message")
@@ -63,7 +71,7 @@ def main():
) )
match args.command: match args.command:
case "pull": case "pull":
asyncio.run(pull()) asyncio.run(pull(generate_histogram=args.generate_histogram))
case "serve": case "serve":
run_app() run_app()
case "hist": case "hist":

View File

@@ -32,6 +32,14 @@ def one_of_default(name: str, values: Sequence[str], default_value: str) -> str:
return value return value
def default_bool(name: str, default_value: bool) -> bool:
return one_of_default(
name,
("yes", "true", "1", "no", "false", "0"),
str(default_value).lower(),
) in ("yes", "true", "1")
THUMBS_DIR = Path(default("THUMBS_DIR", "thumbs")) THUMBS_DIR = Path(default("THUMBS_DIR", "thumbs"))
CACHE_DIR = Path(default("CACHE_DIR", "cache")) CACHE_DIR = Path(default("CACHE_DIR", "cache"))
@@ -43,9 +51,9 @@ HTTP_RESULTS_PER_PAGE = default("HTTP_RESULTS_PER_PAGE", 100)
STATIC_HANDLER = one_of_default("STATIC_HANDLER", ("remote", "local"), "local") STATIC_HANDLER = one_of_default("STATIC_HANDLER", ("remote", "local"), "local")
STATIC_LOCAL_PATH = Path(default("STATIC_LOCAL_PATH", "static")) STATIC_LOCAL_PATH = Path(default("STATIC_LOCAL_PATH", "static"))
STATIC_LOCAL_FOLLOW_SYMLINKS = one_of_default( STATIC_LOCAL_FOLLOW_SYMLINKS = default_bool("STATIC_LOCAL_FOLLOW_SYMLINKS")
"STATIC_LOCAL_FOLLOW_SYMLINKS", ("yes", "true", "1", "no", "false", "0"), "true"
) in ("yes", "true", "1")
STATIC_ROOT = default("STATIC_ROOT", "/static") STATIC_ROOT = default("STATIC_ROOT", "/static")
HISTOGRAM_PATH = Path(default("HISTOGRAM_PATH", "static/histogram.svg")) HISTOGRAM_PATH = Path(default("HISTOGRAM_PATH", "static/histogram.svg"))
PULL_GENERATE_HISTOGRAM = default_bool("PULL_GENERATE_HISTOGRAM", False)

View File

@@ -1,6 +1,11 @@
import logging
from pathlib import Path
from .db import get_db from .db import get_db
from .config import HISTOGRAM_PATH from .config import HISTOGRAM_PATH
from pathlib import Path
log = logging.getLogger(__name__)
def histogram_svg(): def histogram_svg():
@@ -31,5 +36,6 @@ def histogram_svg():
def generate_histogram_svg(path: Path = HISTOGRAM_PATH): def generate_histogram_svg(path: Path = HISTOGRAM_PATH):
log.info("Generating and writing histogram SVG to %s", path)
svg = histogram_svg() svg = histogram_svg()
path = path.write_text(svg) path = path.write_text(svg)

View File

@@ -12,6 +12,7 @@ from bs4 import BeautifulSoup as Soup
from . import config from . import config
from .db import get_db from .db import get_db
from .files import file_cache from .files import file_cache
from .hist import generate_histogram_svg
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -46,10 +47,12 @@ async def get_thumb(thumb_path: Union[str, Path], post: dict) -> Optional[bytes]
return content return content
async def pull(): async def pull(generate_histogram: bool = config.GENERATE_HISTOGRAM):
# TODO(args) --db-path arg # TODO(args) --db-path arg
db = get_db() db = get_db()
updated = False
# Get HTML # Get HTML
html = await get_bans_html() html = await get_bans_html()
# Get post JSON # Get post JSON
@@ -121,6 +124,9 @@ async def pull():
post["now"], post["now"],
) )
# We are definitely updating this entry
updated = True
post["action"] = cols["action"] post["action"] = cols["action"]
# post['board'] = cols['board'] # post['board'] = cols['board']
post["length"] = cols["length"] post["length"] = cols["length"]
@@ -202,5 +208,8 @@ async def pull():
if "UNIQUE" not in msg: if "UNIQUE" not in msg:
log.exception("error inserting data") log.exception("error inserting data")
log.info("Continuing") log.info("Continuing")
if updated and generate_histogram:
generate_histogram_svg()
# Finish off thumbnail jobs # Finish off thumbnail jobs
await asyncio.gather(*download_jobs) await asyncio.gather(*download_jobs)