Initial commit

* Bans are logged into the bans.db file
* Cache is created in the bans/ directory

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2023-02-19 00:21:11 -08:00
commit 803fd6a75b
6 changed files with 535 additions and 0 deletions

175
.gitignore vendored Normal file
View File

@@ -0,0 +1,175 @@
bans
bans.db
# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml
# ruff
.ruff_cache/
# End of https://www.toptal.com/developers/gitignore/api/python

15
Pipfile Normal file
View File

@@ -0,0 +1,15 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
beautifulsoup4 = "*"
httpx = "*"
[dev-packages]
mypy = "*"
black = "*"
[requires]
python_version = "3.10"

222
Pipfile.lock generated Normal file
View File

@@ -0,0 +1,222 @@
{
"_meta": {
"hash": {
"sha256": "c16a1f907c2fb7b3793e6c17d841288f4d329038a7a952ade758b6163beeffa2"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.10"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"anyio": {
"hashes": [
"sha256:25ea0d673ae30af41a0c442f81cf3b38c7e79fdc7b60335a4c14e05eb0947421",
"sha256:fbbe32bd270d2a2ef3ed1c5d45041250284e31fc0a4df4a5a6071842051a51e3"
],
"markers": "python_full_version >= '3.6.2'",
"version": "==3.6.2"
},
"beautifulsoup4": {
"hashes": [
"sha256:0e79446b10b3ecb499c1556f7e228a53e64a2bfcebd455f370d8927cb5b59e39",
"sha256:bc4bdda6717de5a2987436fb8d72f45dc90dd856bdfd512a1314ce90349a0106"
],
"index": "pypi",
"version": "==4.11.2"
},
"certifi": {
"hashes": [
"sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3",
"sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"
],
"markers": "python_version >= '3.6'",
"version": "==2022.12.7"
},
"h11": {
"hashes": [
"sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d",
"sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"
],
"markers": "python_version >= '3.7'",
"version": "==0.14.0"
},
"httpcore": {
"hashes": [
"sha256:c5d6f04e2fc530f39e0c077e6a30caa53f1451096120f1f38b954afd0b17c0cb",
"sha256:da1fb708784a938aa084bde4feb8317056c55037247c787bd7e19eb2c2949dc0"
],
"markers": "python_version >= '3.7'",
"version": "==0.16.3"
},
"httpx": {
"hashes": [
"sha256:9818458eb565bb54898ccb9b8b251a28785dd4a55afbc23d0eb410754fe7d0f9",
"sha256:a211fcce9b1254ea24f0cd6af9869b3d29aba40154e947d2a07bb499b3e310d6"
],
"index": "pypi",
"version": "==0.23.3"
},
"idna": {
"hashes": [
"sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4",
"sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"
],
"version": "==3.4"
},
"rfc3986": {
"extras": [
"idna2008"
],
"hashes": [
"sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835",
"sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"
],
"version": "==1.5.0"
},
"sniffio": {
"hashes": [
"sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101",
"sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"
],
"markers": "python_version >= '3.7'",
"version": "==1.3.0"
},
"soupsieve": {
"hashes": [
"sha256:49e5368c2cda80ee7e84da9dbe3e110b70a4575f196efb74e51b94549d921955",
"sha256:e28dba9ca6c7c00173e34e4ba57448f0688bb681b7c5e8bf4971daafc093d69a"
],
"markers": "python_version >= '3.7'",
"version": "==2.4"
}
},
"develop": {
"black": {
"hashes": [
"sha256:0052dba51dec07ed029ed61b18183942043e00008ec65d5028814afaab9a22fd",
"sha256:0680d4380db3719ebcfb2613f34e86c8e6d15ffeabcf8ec59355c5e7b85bb555",
"sha256:121ca7f10b4a01fd99951234abdbd97728e1240be89fde18480ffac16503d481",
"sha256:162e37d49e93bd6eb6f1afc3e17a3d23a823042530c37c3c42eeeaf026f38468",
"sha256:2a951cc83ab535d248c89f300eccbd625e80ab880fbcfb5ac8afb5f01a258ac9",
"sha256:2bf649fda611c8550ca9d7592b69f0637218c2369b7744694c5e4902873b2f3a",
"sha256:382998821f58e5c8238d3166c492139573325287820963d2f7de4d518bd76958",
"sha256:49f7b39e30f326a34b5c9a4213213a6b221d7ae9d58ec70df1c4a307cf2a1580",
"sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26",
"sha256:7a0f701d314cfa0896b9001df70a530eb2472babb76086344e688829efd97d32",
"sha256:8178318cb74f98bc571eef19068f6ab5613b3e59d4f47771582f04e175570ed8",
"sha256:8b70eb40a78dfac24842458476135f9b99ab952dd3f2dab738c1881a9b38b753",
"sha256:9880d7d419bb7e709b37e28deb5e68a49227713b623c72b2b931028ea65f619b",
"sha256:9afd3f493666a0cd8f8df9a0200c6359ac53940cbde049dcb1a7eb6ee2dd7074",
"sha256:a29650759a6a0944e7cca036674655c2f0f63806ddecc45ed40b7b8aa314b651",
"sha256:a436e7881d33acaf2536c46a454bb964a50eff59b21b51c6ccf5a40601fbef24",
"sha256:a59db0a2094d2259c554676403fa2fac3473ccf1354c1c63eccf7ae65aac8ab6",
"sha256:a8471939da5e824b891b25751955be52ee7f8a30a916d570a5ba8e0f2eb2ecad",
"sha256:b0bd97bea8903f5a2ba7219257a44e3f1f9d00073d6cc1add68f0beec69692ac",
"sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221",
"sha256:bb460c8561c8c1bec7824ecbc3ce085eb50005883a6203dcfb0122e95797ee06",
"sha256:bfffba28dc52a58f04492181392ee380e95262af14ee01d4bc7bb1b1c6ca8d27",
"sha256:c1c476bc7b7d021321e7d93dc2cbd78ce103b84d5a4cf97ed535fbc0d6660648",
"sha256:c91dfc2c2a4e50df0026f88d2215e166616e0c80e86004d0003ece0488db2739",
"sha256:e6663f91b6feca5d06f2ccd49a10f254f9298cc1f7f49c46e498a0771b507104"
],
"index": "pypi",
"version": "==23.1.0"
},
"click": {
"hashes": [
"sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e",
"sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"
],
"markers": "python_version >= '3.7'",
"version": "==8.1.3"
},
"mypy": {
"hashes": [
"sha256:0af4f0e20706aadf4e6f8f8dc5ab739089146b83fd53cb4a7e0e850ef3de0bb6",
"sha256:15b5a824b58c7c822c51bc66308e759243c32631896743f030daf449fe3677f3",
"sha256:17455cda53eeee0a4adb6371a21dd3dbf465897de82843751cf822605d152c8c",
"sha256:2013226d17f20468f34feddd6aae4635a55f79626549099354ce641bc7d40262",
"sha256:24189f23dc66f83b839bd1cce2dfc356020dfc9a8bae03978477b15be61b062e",
"sha256:27a0f74a298769d9fdc8498fcb4f2beb86f0564bcdb1a37b58cbbe78e55cf8c0",
"sha256:28cea5a6392bb43d266782983b5a4216c25544cd7d80be681a155ddcdafd152d",
"sha256:448de661536d270ce04f2d7dddaa49b2fdba6e3bd8a83212164d4174ff43aa65",
"sha256:48525aec92b47baed9b3380371ab8ab6e63a5aab317347dfe9e55e02aaad22e8",
"sha256:5bc8d6bd3b274dd3846597855d96d38d947aedba18776aa998a8d46fabdaed76",
"sha256:5deb252fd42a77add936b463033a59b8e48eb2eaec2976d76b6878d031933fe4",
"sha256:5f546ac34093c6ce33f6278f7c88f0f147a4849386d3bf3ae193702f4fe31407",
"sha256:5fdd63e4f50e3538617887e9aee91855368d9fc1dea30da743837b0df7373bc4",
"sha256:65b122a993d9c81ea0bfde7689b3365318a88bde952e4dfa1b3a8b4ac05d168b",
"sha256:71a808334d3f41ef011faa5a5cd8153606df5fc0b56de5b2e89566c8093a0c9a",
"sha256:920169f0184215eef19294fa86ea49ffd4635dedfdea2b57e45cb4ee85d5ccaf",
"sha256:93a85495fb13dc484251b4c1fd7a5ac370cd0d812bbfc3b39c1bafefe95275d5",
"sha256:a2948c40a7dd46c1c33765718936669dc1f628f134013b02ff5ac6c7ef6942bf",
"sha256:c6c2ccb7af7154673c591189c3687b013122c5a891bb5651eca3db8e6c6c55bd",
"sha256:c96b8a0c019fe29040d520d9257d8c8f122a7343a8307bf8d6d4a43f5c5bfcc8",
"sha256:d42a98e76070a365a1d1c220fcac8aa4ada12ae0db679cb4d910fabefc88b994",
"sha256:dbeb24514c4acbc78d205f85dd0e800f34062efcc1f4a4857c57e4b4b8712bff",
"sha256:e60d0b09f62ae97a94605c3f73fd952395286cf3e3b9e7b97f60b01ddfbbda88",
"sha256:e64f48c6176e243ad015e995de05af7f22bbe370dbb5b32bd6988438ec873919",
"sha256:e831662208055b006eef68392a768ff83596035ffd6d846786578ba1714ba8f6",
"sha256:eda5c8b9949ed411ff752b9a01adda31afe7eae1e53e946dbdf9db23865e66c4"
],
"index": "pypi",
"version": "==1.0.1"
},
"mypy-extensions": {
"hashes": [
"sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d",
"sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"
],
"markers": "python_version >= '3.5'",
"version": "==1.0.0"
},
"packaging": {
"hashes": [
"sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2",
"sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"
],
"markers": "python_version >= '3.7'",
"version": "==23.0"
},
"pathspec": {
"hashes": [
"sha256:3a66eb970cbac598f9e5ccb5b2cf58930cd8e3ed86d393d541eaf2d8b1705229",
"sha256:64d338d4e0914e91c1792321e6907b5a593f1ab1851de7fc269557a21b30ebbc"
],
"markers": "python_version >= '3.7'",
"version": "==0.11.0"
},
"platformdirs": {
"hashes": [
"sha256:8a1228abb1ef82d788f74139988b137e78692984ec7b08eaa6c65f1723af28f9",
"sha256:b1d5eb14f221506f50d6604a561f4c5786d9e80355219694a1b244bcd96f4567"
],
"markers": "python_version >= '3.7'",
"version": "==3.0.0"
},
"tomli": {
"hashes": [
"sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
"sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
],
"markers": "python_version < '3.11'",
"version": "==2.0.1"
},
"typing-extensions": {
"hashes": [
"sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb",
"sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"
],
"markers": "python_version >= '3.7'",
"version": "==4.5.0"
}
}
}

0
chanbans/__init__.py Normal file
View File

123
chanbans/__main__.py Normal file
View File

@@ -0,0 +1,123 @@
import asyncio
import functools
import json
from pathlib import Path
import re
import sqlite3
import time
from typing import Optional, Union
import httpx
from bs4 import BeautifulSoup as Soup
BANS_URL = "https://4chan.org/bans"
PREVIEW_RE = re.compile(r"var postPreviews = (.+)")
DB_PATH = "bans.db"
def get_db():
return sqlite3.connect(DB_PATH)
def file_cache(
directory: str = ".", format: str = "%Y%m%d%H%M", suffix: str = "", deduplicate=True
):
def newest_in_dir(path: Union[str, Path]) -> Optional[Path]:
d = Path(path)
newest = None
newest_time = 0
for p in d.glob("*" + suffix):
if not p.is_file():
continue
stats = p.stat()
if stats.st_mtime > newest_time:
newest = p
newest_time = stats.st_mtime
return newest
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
d = Path(directory)
d.mkdir(parents=True, exist_ok=True)
path = Path(directory, time.strftime(format) + suffix)
if path.exists():
return path.read_text()
else:
text = func(*args, **kwargs)
if deduplicate:
# find the most recent file in the path
newest_path = newest_in_dir(directory)
if newest_path:
newest_text = newest_path.read_text()
if newest_text == text:
# Move the old cache to the new path
newest_path.rename(path)
return text
path.write_text(text)
return text
#
return wrapper
return decorator
@file_cache(directory="bans", suffix=".html")
def get_bans_html() -> str:
r = httpx.get(BANS_URL)
return r.text
def main():
# Ensure DB tables
db = get_db()
db.executescript(
"""
create table if not exists bans (
id integer primary key,
action varchar(5),
board varchar(10),
length varchar(10),
post text unique,
reason varchar(200)
);
"""
)
# Get HTML
html = get_bans_html()
# Get post JSON
m = PREVIEW_RE.search(html)
posts = json.loads(m[1][:-1])
# Parse HTML
soup = Soup(html, "html.parser")
rows = soup.find_all("tr")
# Get labels
labels = [next(head.children).lower() for head in [c for c in rows[0] if c != "\n"]]
rows = rows[1:]
cur = db.cursor()
for i, row in enumerate(rows):
# Labels
# ['board', 'action', 'length', 'post', 'reason', 'time']
cols = {
key: next(value.children)
for key, value in zip(labels, [c for c in row if c != "\n"])
if key != "time"
}
cols["post"] = posts[cols["post"]["data-pid"]]
# Try to create post in database
try:
with db:
curs = db.execute(
"insert into bans (action, board, length, post, reason) values(?, ?, ?, ?, ?)",
(cols['action'], cols['board'], cols['length'], json.dumps(cols['post']), cols['reason']),
)
except:
pass
main()

0
chanbans/__{main}__.py Normal file
View File