Update database format

The old database format would keep the post data in a JSON string.
Now, post data is broken out into individual columns for searching more
easily.

Additionally, a convert script has been provided.

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2023-07-25 15:45:50 -07:00
parent 9353bc3ac9
commit e85e3a03a0
3 changed files with 242 additions and 14 deletions

View File

@@ -1,4 +1,5 @@
import sqlite3
from typing import Optional
DB_PATH = "bans.db"
@@ -14,10 +15,41 @@ def get_db(db_path: str = DB_PATH):
action varchar(5),
board varchar(10),
length varchar(10),
post text unique,
now varchar(30),
name varchar(100),
trip varchar(30),
com text,
time varchar(30),
sub varchar(100),
nsfw boolean,
thumb varchar(100),
ext varchar(10),
w int,
h int,
tn_w int,
tn_h int,
md5 varchar(100),
fsize integer,
filename text,
tim varchar(30),
thumb_path text,
reason varchar(200)
);
"""
)
return db
def search_db(
board: Optional[str] = None,
reason: Optional[str] = None,
name: Optional[str] = None,
com: Optional[str] = None,
sub: Optional[str] = None,
time_before: int = 0,
time_after: int = 0,
md5: Optional[str] = None,
):
query = """
SELECT action, board, length, post, thumb_path, reason
"""

View File

@@ -77,7 +77,7 @@ async def pull():
post = posts[cols["post"]["data-pid"]]
cols["post"] = post
if 'thumb' in post:
if "thumb" in post:
thumb_path = Path(THUMBS_DIR, f"{post['thumb']}s.jpg")
download_jobs += [get_thumb(thumb_path, post)]
else:
@@ -86,20 +86,87 @@ async def pull():
# Try to create post in database
try:
with db:
post = cols["post"]
post["action"] = cols["action"]
# post['board'] = cols['board']
post["length"] = cols["length"]
post["thumb_path"] = str(thumb_path)
post["reason"] = cols["reason"]
if "trip" not in post:
post["trip"] = None
if "nsfw" not in post:
post["nsfw"] = False
if "thumb" not in post:
post["thumb"] = None
if "ext" not in post:
post["ext"] = None
if "w" not in post:
post["w"] = 0
else:
post["w"] = int(post["w"])
if "h" not in post:
post["h"] = 0
else:
post["h"] = int(post["h"])
if "tn_w" not in post:
post["tn_w"] = 0
else:
post["tn_w"] = int(post["tn_w"])
if "tn_h" not in post:
post["tn_h"] = 0
else:
post["tn_h"] = int(post["tn_h"])
if "md5" not in post:
post["md5"] = None
if "fsize" not in post:
post["fsize"] = None
if "filename" not in post:
post["filename"] = None
if "tim" not in post:
post["tim"] = None
db.execute(
"insert into bans (action, board, length, post, thumb_path, reason) values(?, ?, ?, ?, ?, ?)",
(
cols["action"],
cols["board"],
cols["length"],
json.dumps(cols["post"]),
str(thumb_path),
cols["reason"],
),
"""
INSERT INTO bans (action, board, length, now, name, trip, com, time, sub, nsfw, thumb, ext, w, h, tn_w, tn_h, md5, fsize, filename, tim, thumb_path, reason)
VALUES (
:action,
:board,
:length,
:now,
:name,
:trip,
:com,
:time,
:sub,
:nsfw,
:thumb,
:ext,
:w,
:h,
:tn_w,
:tn_h,
:md5,
:fsize,
:filename,
:tim,
:thumb_path,
:reason
)
""",
post,
)
except Exception as ex:
msg = str(ex)
if 'UNIQUE' not in msg:
log.exception()
if "UNIQUE" not in msg:
log.exception("error inserting data")
raise SystemExit()
# Finish off thumbnail jobs
await asyncio.gather(*download_jobs)

129
convert_db.py Normal file
View File

@@ -0,0 +1,129 @@
import json
import sqlite3
from chanbans.db import get_db
def dict_factory(cursor, row):
d = {}
for idx, col in enumerate(cursor.description):
d[col[0]] = row[idx]
return d
# Create a new database too
old_db = get_db()
new_db = sqlite3.connect("new_db.db")
new_db.executescript(
"""
create table if not exists bans(
id integer primary key,
action varchar(5),
board varchar(10),
length varchar(10),
now varchar(30),
name varchar(100),
trip varchar(30),
com text,
time varchar(30),
sub varchar(100),
nsfw boolean,
thumb varchar(100),
ext varchar(10),
w int,
h int,
tn_w int,
tn_h int,
md5 varchar(100),
fsize integer,
filename text,
tim varchar(30),
thumb_path text,
reason varchar(200)
);
"""
)
old_db.row_factory = dict_factory
bans = old_db.execute("SELECT * FROM BANS").fetchall()
import pprint
for ban in bans:
post = json.loads(ban["post"])
post["action"] = ban["action"]
# post['board'] = ban['board']
post["length"] = ban["length"]
post["thumb_path"] = ban["thumb_path"]
post["reason"] = ban["reason"]
if "nsfw" not in post:
post["nsfw"] = False
if "thumb" not in post:
post["thumb"] = None
if "ext" not in post:
post["ext"] = None
if "w" not in post:
post["w"] = 0
else:
post['w'] = int(post['w'])
if "h" not in post:
post["h"] = 0
else:
post['h'] = int(post['h'])
if "tn_w" not in post:
post["tn_w"] = 0
else:
post['tn_w'] = int(post['tn_w'])
if "tn_h" not in post:
post["tn_h"] = 0
else:
post['tn_h'] = int(post['tn_h'])
if "md5" not in post:
post["md5"] = None
if "fsize" not in post:
post["fsize"] = None
if "filename" not in post:
post["filename"] = None
if "tim" not in post:
post["tim"] = None
print(type(post['w']))
new_db.execute(
"""
INSERT INTO bans (action, board, length, now, name, trip, com, time, sub, nsfw, thumb, ext, w, h, tn_w, tn_h, md5, fsize, filename, tim, thumb_path, reason)
VALUES (
:action,
:board,
:length,
:now,
:name,
:trip,
:com,
:time,
:sub,
:nsfw,
:thumb,
:ext,
:w,
:h,
:tn_w,
:tn_h,
:md5,
:fsize,
:filename,
:tim,
:thumb_path,
:reason
)
""",
post,
)