Hello, I noticed some behaviour I thought was a little peculiar when I was [profiling WAL file sizes](https://theunterminatedstring.com/sqlite-vacuuming/). For a previously populated, but now empty database, calling `incremental_vacuum` is prone to push a lot of pages through the WAL. However, executing many small `incremental_vacuum(N)`s to remove the same number of free pages results in significantly less pages being moved through the WAL. I've included a script to demonstrate this below. This script populates the database with 100 rows of 1 MB data, then deletes them. This produced a database with: Page Count: 24482 Freelist Count: 24206 When a single `incremental_vacuum` was called to remove all the pages the result is a lot pages moved through the WAL: Total pages written to WAL: 23777 Total pages moved to DB: 23777 When `incremental_vacuum(200)` was called 122 times the WAL file stats are: Total pages written to WAL: 544 Total pages moved to DB: 544 This was run using: SQLite3 version: 3.31.1 I'll admit that I haven't went near the source code to try and make sense of it. But I guess I was wondering if the above behaviour is expected? ``` #! /usr/bin/env python3 import sqlite3 import os import sys def print_versions(): print(f"SQLite Python Module Version: {sqlite3.version_info}") print(f"SQLite3 version: {sqlite3.sqlite_version}") class ScenarioHandles: def __init__(self): self.connection = None self.cursor = None def _checkpoint_truncate(handles): handles.cursor.execute("PRAGMA wal_checkpoint(TRUNCATE);") # Returns - (written to WAL, moved to db) def _checkpoint_passive(handles): handles.cursor.execute("PRAGMA wal_checkpoint(PASSIVE);") row = handles.cursor.fetchone() return (row[1], row[2]) def _get_pages_usage(handles): handles.cursor.execute("PRAGMA page_count;") page_count = handles.cursor.fetchone()[0] handles.cursor.execute("PRAGMA freelist_count;") freelist_count = handles.cursor.fetchone()[0] used_count = page_count - freelist_count print(f"Page Count: {page_count} Freelist Count: {freelist_count}") return freelist_count def _incremental_vacuum(handles, pages): handles.cursor.execute(f"PRAGMA incremental_vacuum({pages});") handles.cursor.fetchall() def _write_100(handles, num_rows=100): print("Writing rows...") mb = 10 ** 6 one_mb_data = "x" * mb for i in range(num_rows): handles.cursor.execute( "INSERT INTO Data (PrimaryKey, Stuff) VALUES (?, ?);", ( i, one_mb_data, ), ) handles.connection.commit() def _delete_100(handles): print("Deleting rows...") start_row = 0 end_row = 99 for i in range(start_row, end_row): handles.cursor.execute("DELETE FROM Data WHERE PrimaryKey = ?;", (i,)) handles.connection.commit() def setup_database(db_file): handles = ScenarioHandles() os.makedirs(os.path.dirname(db_file), exist_ok=True) if os.path.exists(db_file): os.remove(db_file) handles.connection = sqlite3.connect(db_file) handles.cursor = handles.connection.cursor() handles.cursor.execute("PRAGMA auto_vacuum=2;") handles.cursor.execute("PRAGMA journal_mode=WAL;") handles.cursor.execute( """ CREATE TABLE Data ( PrimaryKey INTEGER PRIMARY KEY, Stuff TEXT); """ ) _write_100(handles) _delete_100(handles) _checkpoint_truncate(handles) handles.cursor.execute("PRAGMA wal_autocheckpoint(0);") # Bomb out early for manual testing # sys.exit(0) return handles def big_incr_vacuum(): db_file = "db/test_big.db" handles = setup_database(db_file) _get_pages_usage(handles) print("Starting one big incremental vacuum") _incremental_vacuum(handles, 0) (written_to_wal, moved_to_db) = _checkpoint_passive(handles) print( f"Total pages written to WAL: {written_to_wal} Total pages moved to DB: {moved_to_db}" ) _get_pages_usage(handles) def small_incr_vacuum(): db_file = "db/test_small.db" handles = setup_database(db_file) free_pages = _get_pages_usage(handles) incr_vaccum_pages = 200 steps = int(free_pages / incr_vaccum_pages) + 1 print( f"Starting {steps} calls to incremental_vacuum with {incr_vaccum_pages} pages" ) written_to_wal = 0 moved_to_db = 0 for s in range(steps): _incremental_vacuum(handles, 200) (to_wal, to_db) = _checkpoint_passive(handles) written_to_wal = written_to_wal + to_wal moved_to_db = moved_to_db + to_db print( f"Total pages written to WAL: {written_to_wal} Total pages moved to DB: {moved_to_db}" ) _get_pages_usage(handles) if __name__ == "__main__": print_versions() print("\n\n") big_incr_vacuum() print("\n\n") small_incr_vacuum() ```