markov: Add up process-scheduled saving

This allows markov to save (hopefully) in parallel using a ProcessPoolExecutor. Since objects are sent over-the-wire and copied, pruning in parallel is not an issue. Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
2022-06-03 18:24:25 -07:00
parent 04c0d05208
commit 1d016c5164
1 changed files with 23 additions and 10 deletions
--- a/plugins/markov.py
+++ b/plugins/markov.py
@@ -264,17 +264,30 @@ class Markov(Plugin):
    async def save(self, retain_after: float | None = None):
        async with self.__saving:
            from concurrent.futures import ProcessPoolExecutor
            log.info("Saving markov chains")
-            for chains in self.__chains.values():
+            coros = []
-                for chain in chains.values():
+            loop = asyncio.get_running_loop()
-                    chain.save()
+            # ProcessPoolExecutor is an explicit decision I've made to use,
-                    # Prune
+            # because it allows us to save in a different process, with
-                    retain = True
+            # different memory, and simultaneously clear it if it needs to be
-                    if retain_after is not None:
+            # cleared.
-                        retain = chain.last_access > retain_after
+            with ProcessPoolExecutor() as pool:
-                    if not retain:
+                for chains in self.__chains.values():
-                        log.debug("Pruning markov chain %s from memory", chain.path)
+                    for chain in chains.values():
-                        chain.clear_cache()
+                        # Start the save in a new process, in a new task.
                        coro = loop.run_in_executor(pool, chain.save)
                        coros += [coro]
                        # Prune
                        retain = True
                        if retain_after is not None:
                            retain = chain.last_access > retain_after
                        if not retain:
                            log.info("Pruning markov chain %s from memory", chain.path)
                            chain.clear_cache()
                if coros:
                    await asyncio.gather(*coros)
            log.info("Done")
    async def on_unload(self, conn: IrcProtocol):