Add duplicate checks for the "pull" command
This checks the received JSON against following columns: * now * time * md5 * com * sub * board and will add them to the database if they don't exist. Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
|
from collections import defaultdict
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -74,8 +75,7 @@ async def pull():
|
|||||||
for key, value in zip(labels, [c for c in row if c != "\n"])
|
for key, value in zip(labels, [c for c in row if c != "\n"])
|
||||||
if key != "time"
|
if key != "time"
|
||||||
}
|
}
|
||||||
post = posts[cols["post"]["data-pid"]]
|
post = defaultdict(lambda: None, posts[cols["post"]["data-pid"]])
|
||||||
cols["post"] = post
|
|
||||||
|
|
||||||
if "thumb" in post:
|
if "thumb" in post:
|
||||||
thumb_path = Path(THUMBS_DIR, f"{post['thumb']}s.jpg")
|
thumb_path = Path(THUMBS_DIR, f"{post['thumb']}s.jpg")
|
||||||
@@ -86,7 +86,32 @@ async def pull():
|
|||||||
# Try to create post in database
|
# Try to create post in database
|
||||||
try:
|
try:
|
||||||
with db:
|
with db:
|
||||||
post = cols["post"]
|
|
||||||
|
# Check the last N bans for the given board. If the following columns are equal:
|
||||||
|
# * now
|
||||||
|
# * time
|
||||||
|
# * md5
|
||||||
|
# * com
|
||||||
|
# * sub
|
||||||
|
# * board
|
||||||
|
# Then we consider it to be a duplicate.
|
||||||
|
result = db.execute(
|
||||||
|
"""
|
||||||
|
select id, now, time, md5, com, sub, board
|
||||||
|
from bans
|
||||||
|
where
|
||||||
|
now = :now
|
||||||
|
and time = :time
|
||||||
|
and md5 = :md5
|
||||||
|
and com = :com
|
||||||
|
and sub = :sub
|
||||||
|
and board = :board
|
||||||
|
""",
|
||||||
|
post,
|
||||||
|
)
|
||||||
|
if _row := result.fetchone():
|
||||||
|
log.debug("duplicate found, skipping - %s", _row["id"])
|
||||||
|
continue
|
||||||
|
|
||||||
post["action"] = cols["action"]
|
post["action"] = cols["action"]
|
||||||
# post['board'] = cols['board']
|
# post['board'] = cols['board']
|
||||||
@@ -167,6 +192,6 @@ async def pull():
|
|||||||
msg = str(ex)
|
msg = str(ex)
|
||||||
if "UNIQUE" not in msg:
|
if "UNIQUE" not in msg:
|
||||||
log.exception("error inserting data")
|
log.exception("error inserting data")
|
||||||
raise SystemExit()
|
log.info("Continuing")
|
||||||
# Finish off thumbnail jobs
|
# Finish off thumbnail jobs
|
||||||
await asyncio.gather(*download_jobs)
|
await asyncio.gather(*download_jobs)
|
||||||
|
|||||||
Reference in New Issue
Block a user