From 0ec33f11b6ee468116fadbbdd3ecbb9fc82e18fc Mon Sep 17 00:00:00 2001 From: Andrii Landiak Date: Tue, 21 Apr 2026 11:13:25 +0300 Subject: [PATCH] Refactor for the procedure of cleanup_orphan_ota_lobs --- .../main/data/upgrade/basic/schema_update.sql | 112 +++++++++--------- 1 file changed, 58 insertions(+), 54 deletions(-) diff --git a/application/src/main/data/upgrade/basic/schema_update.sql b/application/src/main/data/upgrade/basic/schema_update.sql index eccb53e3bc..086891fd55 100644 --- a/application/src/main/data/upgrade/basic/schema_update.sql +++ b/application/src/main/data/upgrade/basic/schema_update.sql @@ -28,86 +28,90 @@ ALTER TABLE rule_chain ADD COLUMN IF NOT EXISTS notes varchar(1000000); -- CLEANUP ORPHANED OTA PACKAGE LARGE OBJECTS START --- This script cleans up orphaned PostgreSQL large objects that are no longer referenced by the ota_package table. --- These orphaned objects accumulate when OTA packages are deleted or updated and can consume significant disk space. +-- Cleans up orphaned PostgreSQL large objects no longer referenced by ota_package. +-- These accumulate when OTA packages are deleted or updated and can consume significant disk space. -- Note: only the ota_package.data column uses PostgreSQL large objects (OID type) in ThingsBoard. -- This script removes all large objects not referenced by ota_package.data. -- If external applications sharing this database also use large objects, their objects WILL be deleted. -- --- This runs as a single transaction, which is acceptable for typical installations (up to tens of thousands --- of orphaned objects). For installations with millions of orphaned objects, WAL pressure may be a concern. +-- Processes orphans in batches with a COMMIT between each batch. Does not block ota_package +-- (only row-level locks on pg_largeobject rows being deleted), so it is safe to run on a live +-- server. Each batch releases row locks, flushes WAL, and advances the xmin horizon. -DO +CREATE OR REPLACE PROCEDURE cleanup_orphan_ota_lobs(batch_size int DEFAULT 500) +LANGUAGE plpgsql AS $$ DECLARE orphan_oid bigint; - deleted_count int := 0; - failed_count int := 0; - total_orphans int; - start_ts timestamptz; + batch_processed int; + deleted bigint := 0; + failed bigint := 0; + total_orphans bigint; + start_ts timestamptz := clock_timestamp(); elapsed_sec numeric; BEGIN - start_ts := clock_timestamp(); + DROP TABLE IF EXISTS orphan_ota_lob_queue; + CREATE TEMP TABLE orphan_ota_lob_queue (oid bigint PRIMARY KEY) ON COMMIT PRESERVE ROWS; - -- Drop first to ensure fresh data on re-run - DROP TABLE IF EXISTS orphan_oids; + INSERT INTO orphan_ota_lob_queue + SELECT m.oid + FROM pg_largeobject_metadata m + LEFT JOIN ota_package p ON p.data = m.oid + WHERE p.data IS NULL; - -- Collect orphan OIDs into a temp table to avoid repeating the JOIN each iteration - CREATE TEMP TABLE orphan_oids AS - SELECT m.oid AS orphan_oid - FROM pg_largeobject_metadata m - LEFT JOIN ota_package p ON p.data = m.oid - WHERE p.data IS NULL; - - SELECT COUNT(*) INTO total_orphans FROM orphan_oids; + SELECT COUNT(*) INTO total_orphans FROM orphan_ota_lob_queue; IF total_orphans = 0 THEN RAISE NOTICE 'No orphaned large objects found'; - DROP TABLE IF EXISTS orphan_oids; + DROP TABLE orphan_ota_lob_queue; RETURN; END IF; - RAISE NOTICE 'Found % orphaned large objects to clean up', total_orphans; + RAISE NOTICE 'Found % orphaned large objects, cleaning up in batches of %', total_orphans, batch_size; + COMMIT; - FOR orphan_oid IN SELECT o.orphan_oid FROM orphan_oids o LOOP - BEGIN - PERFORM lo_unlink(orphan_oid); - deleted_count := deleted_count + 1; - - IF deleted_count % 1000 = 0 THEN - elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts); - RAISE NOTICE 'Progress: deleted % of % orphaned large objects (%s elapsed)...', - deleted_count, total_orphans, ROUND(elapsed_sec, 1); - END IF; - EXCEPTION WHEN OTHERS THEN - failed_count := failed_count + 1; - RAISE WARNING 'Failed to delete large object with OID %: %', orphan_oid, SQLERRM; - END; + batch_processed := 0; + + FOR orphan_oid IN + SELECT oid FROM orphan_ota_lob_queue LIMIT batch_size + LOOP + BEGIN + PERFORM lo_unlink(orphan_oid); + deleted := deleted + 1; + EXCEPTION WHEN OTHERS THEN + failed := failed + 1; + RAISE WARNING 'Failed to unlink large object %: %', orphan_oid, SQLERRM; + END; + + DELETE FROM orphan_ota_lob_queue WHERE oid = orphan_oid; + batch_processed := batch_processed + 1; + END LOOP; + + COMMIT; + + EXIT WHEN batch_processed = 0; + + elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts); + RAISE NOTICE 'Progress: % deleted, % failed, % remaining (%s elapsed)', + deleted, failed, total_orphans - deleted - failed, ROUND(elapsed_sec, 1); END LOOP; - elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts); + DROP TABLE orphan_ota_lob_queue; - IF failed_count > 0 THEN - RAISE NOTICE 'Completed cleanup: deleted %, failed % out of % orphaned large objects (%s elapsed)', - deleted_count, failed_count, total_orphans, ROUND(elapsed_sec, 1); + elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts); + IF failed > 0 THEN + RAISE WARNING 'OTA large object cleanup finished: % deleted, % failed out of % (%s elapsed)', + deleted, failed, total_orphans, ROUND(elapsed_sec, 1); ELSE - RAISE NOTICE 'Successfully cleaned up all % orphaned large objects (%s elapsed)', - deleted_count, ROUND(elapsed_sec, 1); - END IF; - - -- Fail the migration if more than 10% of deletions failed, indicating a systemic problem - IF failed_count > 0 AND (failed_count::numeric / total_orphans) > 0.1 THEN - DROP TABLE IF EXISTS orphan_oids; - RAISE EXCEPTION 'OTA cleanup aborted: % of % deletions failed (>10%%), indicating a systemic issue', - failed_count, total_orphans; + RAISE NOTICE 'OTA large object cleanup finished: % deleted (%s elapsed)', + deleted, ROUND(elapsed_sec, 1); END IF; - - DROP TABLE IF EXISTS orphan_oids; -EXCEPTION WHEN OTHERS THEN - DROP TABLE IF EXISTS orphan_oids; - RAISE; END; $$; +CALL cleanup_orphan_ota_lobs(500); + +DROP PROCEDURE IF EXISTS cleanup_orphan_ota_lobs(int); + -- CLEANUP ORPHANED OTA PACKAGE LARGE OBJECTS END