commit 803fd6a75b75e8e08c838a07ca78e4cf46971201 Author: Alek Ratzloff Date: Sun Feb 19 00:21:11 2023 -0800 Initial commit * Bans are logged into the bans.db file * Cache is created in the bans/ directory Signed-off-by: Alek Ratzloff diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ac90c9e --- /dev/null +++ b/.gitignore @@ -0,0 +1,175 @@ +bans +bans.db +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# End of https://www.toptal.com/developers/gitignore/api/python diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..2beaa4a --- /dev/null +++ b/Pipfile @@ -0,0 +1,15 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +beautifulsoup4 = "*" +httpx = "*" + +[dev-packages] +mypy = "*" +black = "*" + +[requires] +python_version = "3.10" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..ec68bc2 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,222 @@ +{ + "_meta": { + "hash": { + "sha256": "c16a1f907c2fb7b3793e6c17d841288f4d329038a7a952ade758b6163beeffa2" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.10" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "anyio": { + "hashes": [ + "sha256:25ea0d673ae30af41a0c442f81cf3b38c7e79fdc7b60335a4c14e05eb0947421", + "sha256:fbbe32bd270d2a2ef3ed1c5d45041250284e31fc0a4df4a5a6071842051a51e3" + ], + "markers": "python_full_version >= '3.6.2'", + "version": "==3.6.2" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:0e79446b10b3ecb499c1556f7e228a53e64a2bfcebd455f370d8927cb5b59e39", + "sha256:bc4bdda6717de5a2987436fb8d72f45dc90dd856bdfd512a1314ce90349a0106" + ], + "index": "pypi", + "version": "==4.11.2" + }, + "certifi": { + "hashes": [ + "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3", + "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18" + ], + "markers": "python_version >= '3.6'", + "version": "==2022.12.7" + }, + "h11": { + "hashes": [ + "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", + "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761" + ], + "markers": "python_version >= '3.7'", + "version": "==0.14.0" + }, + "httpcore": { + "hashes": [ + "sha256:c5d6f04e2fc530f39e0c077e6a30caa53f1451096120f1f38b954afd0b17c0cb", + "sha256:da1fb708784a938aa084bde4feb8317056c55037247c787bd7e19eb2c2949dc0" + ], + "markers": "python_version >= '3.7'", + "version": "==0.16.3" + }, + "httpx": { + "hashes": [ + "sha256:9818458eb565bb54898ccb9b8b251a28785dd4a55afbc23d0eb410754fe7d0f9", + "sha256:a211fcce9b1254ea24f0cd6af9869b3d29aba40154e947d2a07bb499b3e310d6" + ], + "index": "pypi", + "version": "==0.23.3" + }, + "idna": { + "hashes": [ + "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", + "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" + ], + "version": "==3.4" + }, + "rfc3986": { + "extras": [ + "idna2008" + ], + "hashes": [ + "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835", + "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97" + ], + "version": "==1.5.0" + }, + "sniffio": { + "hashes": [ + "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101", + "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.0" + }, + "soupsieve": { + "hashes": [ + "sha256:49e5368c2cda80ee7e84da9dbe3e110b70a4575f196efb74e51b94549d921955", + "sha256:e28dba9ca6c7c00173e34e4ba57448f0688bb681b7c5e8bf4971daafc093d69a" + ], + "markers": "python_version >= '3.7'", + "version": "==2.4" + } + }, + "develop": { + "black": { + "hashes": [ + "sha256:0052dba51dec07ed029ed61b18183942043e00008ec65d5028814afaab9a22fd", + "sha256:0680d4380db3719ebcfb2613f34e86c8e6d15ffeabcf8ec59355c5e7b85bb555", + "sha256:121ca7f10b4a01fd99951234abdbd97728e1240be89fde18480ffac16503d481", + "sha256:162e37d49e93bd6eb6f1afc3e17a3d23a823042530c37c3c42eeeaf026f38468", + "sha256:2a951cc83ab535d248c89f300eccbd625e80ab880fbcfb5ac8afb5f01a258ac9", + "sha256:2bf649fda611c8550ca9d7592b69f0637218c2369b7744694c5e4902873b2f3a", + "sha256:382998821f58e5c8238d3166c492139573325287820963d2f7de4d518bd76958", + "sha256:49f7b39e30f326a34b5c9a4213213a6b221d7ae9d58ec70df1c4a307cf2a1580", + "sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26", + "sha256:7a0f701d314cfa0896b9001df70a530eb2472babb76086344e688829efd97d32", + "sha256:8178318cb74f98bc571eef19068f6ab5613b3e59d4f47771582f04e175570ed8", + "sha256:8b70eb40a78dfac24842458476135f9b99ab952dd3f2dab738c1881a9b38b753", + "sha256:9880d7d419bb7e709b37e28deb5e68a49227713b623c72b2b931028ea65f619b", + "sha256:9afd3f493666a0cd8f8df9a0200c6359ac53940cbde049dcb1a7eb6ee2dd7074", + "sha256:a29650759a6a0944e7cca036674655c2f0f63806ddecc45ed40b7b8aa314b651", + "sha256:a436e7881d33acaf2536c46a454bb964a50eff59b21b51c6ccf5a40601fbef24", + "sha256:a59db0a2094d2259c554676403fa2fac3473ccf1354c1c63eccf7ae65aac8ab6", + "sha256:a8471939da5e824b891b25751955be52ee7f8a30a916d570a5ba8e0f2eb2ecad", + "sha256:b0bd97bea8903f5a2ba7219257a44e3f1f9d00073d6cc1add68f0beec69692ac", + "sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221", + "sha256:bb460c8561c8c1bec7824ecbc3ce085eb50005883a6203dcfb0122e95797ee06", + "sha256:bfffba28dc52a58f04492181392ee380e95262af14ee01d4bc7bb1b1c6ca8d27", + "sha256:c1c476bc7b7d021321e7d93dc2cbd78ce103b84d5a4cf97ed535fbc0d6660648", + "sha256:c91dfc2c2a4e50df0026f88d2215e166616e0c80e86004d0003ece0488db2739", + "sha256:e6663f91b6feca5d06f2ccd49a10f254f9298cc1f7f49c46e498a0771b507104" + ], + "index": "pypi", + "version": "==23.1.0" + }, + "click": { + "hashes": [ + "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", + "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.3" + }, + "mypy": { + "hashes": [ + "sha256:0af4f0e20706aadf4e6f8f8dc5ab739089146b83fd53cb4a7e0e850ef3de0bb6", + "sha256:15b5a824b58c7c822c51bc66308e759243c32631896743f030daf449fe3677f3", + "sha256:17455cda53eeee0a4adb6371a21dd3dbf465897de82843751cf822605d152c8c", + "sha256:2013226d17f20468f34feddd6aae4635a55f79626549099354ce641bc7d40262", + "sha256:24189f23dc66f83b839bd1cce2dfc356020dfc9a8bae03978477b15be61b062e", + "sha256:27a0f74a298769d9fdc8498fcb4f2beb86f0564bcdb1a37b58cbbe78e55cf8c0", + "sha256:28cea5a6392bb43d266782983b5a4216c25544cd7d80be681a155ddcdafd152d", + "sha256:448de661536d270ce04f2d7dddaa49b2fdba6e3bd8a83212164d4174ff43aa65", + "sha256:48525aec92b47baed9b3380371ab8ab6e63a5aab317347dfe9e55e02aaad22e8", + "sha256:5bc8d6bd3b274dd3846597855d96d38d947aedba18776aa998a8d46fabdaed76", + "sha256:5deb252fd42a77add936b463033a59b8e48eb2eaec2976d76b6878d031933fe4", + "sha256:5f546ac34093c6ce33f6278f7c88f0f147a4849386d3bf3ae193702f4fe31407", + "sha256:5fdd63e4f50e3538617887e9aee91855368d9fc1dea30da743837b0df7373bc4", + "sha256:65b122a993d9c81ea0bfde7689b3365318a88bde952e4dfa1b3a8b4ac05d168b", + "sha256:71a808334d3f41ef011faa5a5cd8153606df5fc0b56de5b2e89566c8093a0c9a", + "sha256:920169f0184215eef19294fa86ea49ffd4635dedfdea2b57e45cb4ee85d5ccaf", + "sha256:93a85495fb13dc484251b4c1fd7a5ac370cd0d812bbfc3b39c1bafefe95275d5", + "sha256:a2948c40a7dd46c1c33765718936669dc1f628f134013b02ff5ac6c7ef6942bf", + "sha256:c6c2ccb7af7154673c591189c3687b013122c5a891bb5651eca3db8e6c6c55bd", + "sha256:c96b8a0c019fe29040d520d9257d8c8f122a7343a8307bf8d6d4a43f5c5bfcc8", + "sha256:d42a98e76070a365a1d1c220fcac8aa4ada12ae0db679cb4d910fabefc88b994", + "sha256:dbeb24514c4acbc78d205f85dd0e800f34062efcc1f4a4857c57e4b4b8712bff", + "sha256:e60d0b09f62ae97a94605c3f73fd952395286cf3e3b9e7b97f60b01ddfbbda88", + "sha256:e64f48c6176e243ad015e995de05af7f22bbe370dbb5b32bd6988438ec873919", + "sha256:e831662208055b006eef68392a768ff83596035ffd6d846786578ba1714ba8f6", + "sha256:eda5c8b9949ed411ff752b9a01adda31afe7eae1e53e946dbdf9db23865e66c4" + ], + "index": "pypi", + "version": "==1.0.1" + }, + "mypy-extensions": { + "hashes": [ + "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", + "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782" + ], + "markers": "python_version >= '3.5'", + "version": "==1.0.0" + }, + "packaging": { + "hashes": [ + "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", + "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97" + ], + "markers": "python_version >= '3.7'", + "version": "==23.0" + }, + "pathspec": { + "hashes": [ + "sha256:3a66eb970cbac598f9e5ccb5b2cf58930cd8e3ed86d393d541eaf2d8b1705229", + "sha256:64d338d4e0914e91c1792321e6907b5a593f1ab1851de7fc269557a21b30ebbc" + ], + "markers": "python_version >= '3.7'", + "version": "==0.11.0" + }, + "platformdirs": { + "hashes": [ + "sha256:8a1228abb1ef82d788f74139988b137e78692984ec7b08eaa6c65f1723af28f9", + "sha256:b1d5eb14f221506f50d6604a561f4c5786d9e80355219694a1b244bcd96f4567" + ], + "markers": "python_version >= '3.7'", + "version": "==3.0.0" + }, + "tomli": { + "hashes": [ + "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", + "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" + ], + "markers": "python_version < '3.11'", + "version": "==2.0.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", + "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" + ], + "markers": "python_version >= '3.7'", + "version": "==4.5.0" + } + } +} diff --git a/chanbans/__init__.py b/chanbans/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/chanbans/__main__.py b/chanbans/__main__.py new file mode 100644 index 0000000..08dfe22 --- /dev/null +++ b/chanbans/__main__.py @@ -0,0 +1,123 @@ +import asyncio +import functools +import json +from pathlib import Path +import re +import sqlite3 +import time +from typing import Optional, Union + +import httpx +from bs4 import BeautifulSoup as Soup + + +BANS_URL = "https://4chan.org/bans" +PREVIEW_RE = re.compile(r"var postPreviews = (.+)") +DB_PATH = "bans.db" + + +def get_db(): + return sqlite3.connect(DB_PATH) + + +def file_cache( + directory: str = ".", format: str = "%Y%m%d%H%M", suffix: str = "", deduplicate=True +): + def newest_in_dir(path: Union[str, Path]) -> Optional[Path]: + d = Path(path) + newest = None + newest_time = 0 + for p in d.glob("*" + suffix): + if not p.is_file(): + continue + stats = p.stat() + if stats.st_mtime > newest_time: + newest = p + newest_time = stats.st_mtime + return newest + + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + d = Path(directory) + d.mkdir(parents=True, exist_ok=True) + path = Path(directory, time.strftime(format) + suffix) + if path.exists(): + return path.read_text() + else: + text = func(*args, **kwargs) + if deduplicate: + # find the most recent file in the path + newest_path = newest_in_dir(directory) + if newest_path: + newest_text = newest_path.read_text() + if newest_text == text: + # Move the old cache to the new path + newest_path.rename(path) + return text + path.write_text(text) + return text + # + + return wrapper + + return decorator + + +@file_cache(directory="bans", suffix=".html") +def get_bans_html() -> str: + r = httpx.get(BANS_URL) + return r.text + + +def main(): + # Ensure DB tables + db = get_db() + db.executescript( + """ + create table if not exists bans ( + id integer primary key, + action varchar(5), + board varchar(10), + length varchar(10), + post text unique, + reason varchar(200) + ); + """ + ) + + # Get HTML + html = get_bans_html() + # Get post JSON + m = PREVIEW_RE.search(html) + posts = json.loads(m[1][:-1]) + # Parse HTML + soup = Soup(html, "html.parser") + rows = soup.find_all("tr") + # Get labels + labels = [next(head.children).lower() for head in [c for c in rows[0] if c != "\n"]] + rows = rows[1:] + + cur = db.cursor() + + for i, row in enumerate(rows): + # Labels + # ['board', 'action', 'length', 'post', 'reason', 'time'] + cols = { + key: next(value.children) + for key, value in zip(labels, [c for c in row if c != "\n"]) + if key != "time" + } + cols["post"] = posts[cols["post"]["data-pid"]] + # Try to create post in database + try: + with db: + curs = db.execute( + "insert into bans (action, board, length, post, reason) values(?, ?, ?, ?, ?)", + (cols['action'], cols['board'], cols['length'], json.dumps(cols['post']), cols['reason']), + ) + except: + pass + + +main() diff --git a/chanbans/__{main}__.py b/chanbans/__{main}__.py new file mode 100644 index 0000000..e69de29