stats: Add histogram generation to the pull command
* --generate-histogram command line option * PULL_GENERATE_HISTOGRAM if you don't want to use the command line Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
@@ -27,17 +27,25 @@ def parse_args():
|
|||||||
subcommands |= {subcommand}
|
subcommands |= {subcommand}
|
||||||
return subparsers.add_parser(subcommand, *args, **kwargs)
|
return subparsers.add_parser(subcommand, *args, **kwargs)
|
||||||
|
|
||||||
_pull_parser = add_subcommand(
|
pull_parser = add_subcommand(
|
||||||
"pull",
|
"pull",
|
||||||
help="Pull bans from 4chan, save thumbnails, update the database, and exit",
|
help="Pull bans from 4chan, save thumbnails, update the database, and exit",
|
||||||
)
|
)
|
||||||
|
pull_parser.add_argument(
|
||||||
|
"--generate-histogram",
|
||||||
|
action="store_true",
|
||||||
|
help="Generate a histogram SVG if any new posts are pulled",
|
||||||
|
default=config.PULL_GENERATE_HISTOGRAM,
|
||||||
|
)
|
||||||
|
# TODO(args) --histogram-path
|
||||||
|
|
||||||
_histogram_parser = add_subcommand(
|
_histogram_parser = add_subcommand(
|
||||||
"hist",
|
"hist",
|
||||||
help="Generate histogram file.",
|
help="Generate histogram file.",
|
||||||
# This doesn't work as expected for some reason. Doesn't get parsed correctly
|
# This doesn't work as expected for some reason. Doesn't get parsed correctly
|
||||||
#aliases=["histogram"],
|
# aliases=["histogram"],
|
||||||
)
|
)
|
||||||
|
# TODO(args) --histogram-path
|
||||||
|
|
||||||
_serve_parser = add_subcommand("serve", help="Start HTTP server")
|
_serve_parser = add_subcommand("serve", help="Start HTTP server")
|
||||||
_help_parser = add_subcommand("help", help="Show this help message")
|
_help_parser = add_subcommand("help", help="Show this help message")
|
||||||
@@ -63,7 +71,7 @@ def main():
|
|||||||
)
|
)
|
||||||
match args.command:
|
match args.command:
|
||||||
case "pull":
|
case "pull":
|
||||||
asyncio.run(pull())
|
asyncio.run(pull(generate_histogram=args.generate_histogram))
|
||||||
case "serve":
|
case "serve":
|
||||||
run_app()
|
run_app()
|
||||||
case "hist":
|
case "hist":
|
||||||
|
|||||||
@@ -32,6 +32,14 @@ def one_of_default(name: str, values: Sequence[str], default_value: str) -> str:
|
|||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def default_bool(name: str, default_value: bool) -> bool:
|
||||||
|
return one_of_default(
|
||||||
|
name,
|
||||||
|
("yes", "true", "1", "no", "false", "0"),
|
||||||
|
str(default_value).lower(),
|
||||||
|
) in ("yes", "true", "1")
|
||||||
|
|
||||||
|
|
||||||
THUMBS_DIR = Path(default("THUMBS_DIR", "thumbs"))
|
THUMBS_DIR = Path(default("THUMBS_DIR", "thumbs"))
|
||||||
CACHE_DIR = Path(default("CACHE_DIR", "cache"))
|
CACHE_DIR = Path(default("CACHE_DIR", "cache"))
|
||||||
|
|
||||||
@@ -43,9 +51,9 @@ HTTP_RESULTS_PER_PAGE = default("HTTP_RESULTS_PER_PAGE", 100)
|
|||||||
|
|
||||||
STATIC_HANDLER = one_of_default("STATIC_HANDLER", ("remote", "local"), "local")
|
STATIC_HANDLER = one_of_default("STATIC_HANDLER", ("remote", "local"), "local")
|
||||||
STATIC_LOCAL_PATH = Path(default("STATIC_LOCAL_PATH", "static"))
|
STATIC_LOCAL_PATH = Path(default("STATIC_LOCAL_PATH", "static"))
|
||||||
STATIC_LOCAL_FOLLOW_SYMLINKS = one_of_default(
|
STATIC_LOCAL_FOLLOW_SYMLINKS = default_bool("STATIC_LOCAL_FOLLOW_SYMLINKS")
|
||||||
"STATIC_LOCAL_FOLLOW_SYMLINKS", ("yes", "true", "1", "no", "false", "0"), "true"
|
|
||||||
) in ("yes", "true", "1")
|
|
||||||
STATIC_ROOT = default("STATIC_ROOT", "/static")
|
STATIC_ROOT = default("STATIC_ROOT", "/static")
|
||||||
|
|
||||||
HISTOGRAM_PATH = Path(default("HISTOGRAM_PATH", "static/histogram.svg"))
|
HISTOGRAM_PATH = Path(default("HISTOGRAM_PATH", "static/histogram.svg"))
|
||||||
|
|
||||||
|
PULL_GENERATE_HISTOGRAM = default_bool("PULL_GENERATE_HISTOGRAM", False)
|
||||||
|
|||||||
@@ -1,6 +1,11 @@
|
|||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from .db import get_db
|
from .db import get_db
|
||||||
from .config import HISTOGRAM_PATH
|
from .config import HISTOGRAM_PATH
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def histogram_svg():
|
def histogram_svg():
|
||||||
@@ -31,5 +36,6 @@ def histogram_svg():
|
|||||||
|
|
||||||
|
|
||||||
def generate_histogram_svg(path: Path = HISTOGRAM_PATH):
|
def generate_histogram_svg(path: Path = HISTOGRAM_PATH):
|
||||||
|
log.info("Generating and writing histogram SVG to %s", path)
|
||||||
svg = histogram_svg()
|
svg = histogram_svg()
|
||||||
path = path.write_text(svg)
|
path = path.write_text(svg)
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from bs4 import BeautifulSoup as Soup
|
|||||||
from . import config
|
from . import config
|
||||||
from .db import get_db
|
from .db import get_db
|
||||||
from .files import file_cache
|
from .files import file_cache
|
||||||
|
from .hist import generate_histogram_svg
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@@ -46,10 +47,12 @@ async def get_thumb(thumb_path: Union[str, Path], post: dict) -> Optional[bytes]
|
|||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
async def pull():
|
async def pull(generate_histogram: bool = config.GENERATE_HISTOGRAM):
|
||||||
# TODO(args) --db-path arg
|
# TODO(args) --db-path arg
|
||||||
db = get_db()
|
db = get_db()
|
||||||
|
|
||||||
|
updated = False
|
||||||
|
|
||||||
# Get HTML
|
# Get HTML
|
||||||
html = await get_bans_html()
|
html = await get_bans_html()
|
||||||
# Get post JSON
|
# Get post JSON
|
||||||
@@ -121,6 +124,9 @@ async def pull():
|
|||||||
post["now"],
|
post["now"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# We are definitely updating this entry
|
||||||
|
updated = True
|
||||||
|
|
||||||
post["action"] = cols["action"]
|
post["action"] = cols["action"]
|
||||||
# post['board'] = cols['board']
|
# post['board'] = cols['board']
|
||||||
post["length"] = cols["length"]
|
post["length"] = cols["length"]
|
||||||
@@ -202,5 +208,8 @@ async def pull():
|
|||||||
if "UNIQUE" not in msg:
|
if "UNIQUE" not in msg:
|
||||||
log.exception("error inserting data")
|
log.exception("error inserting data")
|
||||||
log.info("Continuing")
|
log.info("Continuing")
|
||||||
|
if updated and generate_histogram:
|
||||||
|
generate_histogram_svg()
|
||||||
|
|
||||||
# Finish off thumbnail jobs
|
# Finish off thumbnail jobs
|
||||||
await asyncio.gather(*download_jobs)
|
await asyncio.gather(*download_jobs)
|
||||||
|
|||||||
Reference in New Issue
Block a user