From 8423d698d704fee9b45793ee7d42f299e88d5d71 Mon Sep 17 00:00:00 2001 From: Andrii Landiak Date: Mon, 20 Apr 2026 11:57:30 +0300 Subject: [PATCH 1/2] Orphaned table clean up upgrade script --- .../main/data/upgrade/basic/schema_update.sql | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/application/src/main/data/upgrade/basic/schema_update.sql b/application/src/main/data/upgrade/basic/schema_update.sql index 2f9c0cd302..eccb53e3bc 100644 --- a/application/src/main/data/upgrade/basic/schema_update.sql +++ b/application/src/main/data/upgrade/basic/schema_update.sql @@ -25,3 +25,89 @@ ALTER TABLE calculated_field ADD COLUMN IF NOT EXISTS additional_info varchar; ALTER TABLE rule_chain ADD COLUMN IF NOT EXISTS notes varchar(1000000); -- RULE CHAIN NOTES MIGRATION END + +-- CLEANUP ORPHANED OTA PACKAGE LARGE OBJECTS START + +-- This script cleans up orphaned PostgreSQL large objects that are no longer referenced by the ota_package table. +-- These orphaned objects accumulate when OTA packages are deleted or updated and can consume significant disk space. +-- Note: only the ota_package.data column uses PostgreSQL large objects (OID type) in ThingsBoard. +-- This script removes all large objects not referenced by ota_package.data. +-- If external applications sharing this database also use large objects, their objects WILL be deleted. +-- +-- This runs as a single transaction, which is acceptable for typical installations (up to tens of thousands +-- of orphaned objects). For installations with millions of orphaned objects, WAL pressure may be a concern. + +DO +$$ +DECLARE + orphan_oid bigint; + deleted_count int := 0; + failed_count int := 0; + total_orphans int; + start_ts timestamptz; + elapsed_sec numeric; +BEGIN + start_ts := clock_timestamp(); + + -- Drop first to ensure fresh data on re-run + DROP TABLE IF EXISTS orphan_oids; + + -- Collect orphan OIDs into a temp table to avoid repeating the JOIN each iteration + CREATE TEMP TABLE orphan_oids AS + SELECT m.oid AS orphan_oid + FROM pg_largeobject_metadata m + LEFT JOIN ota_package p ON p.data = m.oid + WHERE p.data IS NULL; + + SELECT COUNT(*) INTO total_orphans FROM orphan_oids; + + IF total_orphans = 0 THEN + RAISE NOTICE 'No orphaned large objects found'; + DROP TABLE IF EXISTS orphan_oids; + RETURN; + END IF; + + RAISE NOTICE 'Found % orphaned large objects to clean up', total_orphans; + + FOR orphan_oid IN SELECT o.orphan_oid FROM orphan_oids o + LOOP + BEGIN + PERFORM lo_unlink(orphan_oid); + deleted_count := deleted_count + 1; + + IF deleted_count % 1000 = 0 THEN + elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts); + RAISE NOTICE 'Progress: deleted % of % orphaned large objects (%s elapsed)...', + deleted_count, total_orphans, ROUND(elapsed_sec, 1); + END IF; + EXCEPTION WHEN OTHERS THEN + failed_count := failed_count + 1; + RAISE WARNING 'Failed to delete large object with OID %: %', orphan_oid, SQLERRM; + END; + END LOOP; + + elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts); + + IF failed_count > 0 THEN + RAISE NOTICE 'Completed cleanup: deleted %, failed % out of % orphaned large objects (%s elapsed)', + deleted_count, failed_count, total_orphans, ROUND(elapsed_sec, 1); + ELSE + RAISE NOTICE 'Successfully cleaned up all % orphaned large objects (%s elapsed)', + deleted_count, ROUND(elapsed_sec, 1); + END IF; + + -- Fail the migration if more than 10% of deletions failed, indicating a systemic problem + IF failed_count > 0 AND (failed_count::numeric / total_orphans) > 0.1 THEN + DROP TABLE IF EXISTS orphan_oids; + RAISE EXCEPTION 'OTA cleanup aborted: % of % deletions failed (>10%%), indicating a systemic issue', + failed_count, total_orphans; + END IF; + + DROP TABLE IF EXISTS orphan_oids; +EXCEPTION WHEN OTHERS THEN + DROP TABLE IF EXISTS orphan_oids; + RAISE; +END; +$$; + +-- CLEANUP ORPHANED OTA PACKAGE LARGE OBJECTS END From 0ec33f11b6ee468116fadbbdd3ecbb9fc82e18fc Mon Sep 17 00:00:00 2001 From: Andrii Landiak Date: Tue, 21 Apr 2026 11:13:25 +0300 Subject: [PATCH 2/2] Refactor for the procedure of cleanup_orphan_ota_lobs --- .../main/data/upgrade/basic/schema_update.sql | 112 +++++++++--------- 1 file changed, 58 insertions(+), 54 deletions(-) diff --git a/application/src/main/data/upgrade/basic/schema_update.sql b/application/src/main/data/upgrade/basic/schema_update.sql index eccb53e3bc..086891fd55 100644 --- a/application/src/main/data/upgrade/basic/schema_update.sql +++ b/application/src/main/data/upgrade/basic/schema_update.sql @@ -28,86 +28,90 @@ ALTER TABLE rule_chain ADD COLUMN IF NOT EXISTS notes varchar(1000000); -- CLEANUP ORPHANED OTA PACKAGE LARGE OBJECTS START --- This script cleans up orphaned PostgreSQL large objects that are no longer referenced by the ota_package table. --- These orphaned objects accumulate when OTA packages are deleted or updated and can consume significant disk space. +-- Cleans up orphaned PostgreSQL large objects no longer referenced by ota_package. +-- These accumulate when OTA packages are deleted or updated and can consume significant disk space. -- Note: only the ota_package.data column uses PostgreSQL large objects (OID type) in ThingsBoard. -- This script removes all large objects not referenced by ota_package.data. -- If external applications sharing this database also use large objects, their objects WILL be deleted. -- --- This runs as a single transaction, which is acceptable for typical installations (up to tens of thousands --- of orphaned objects). For installations with millions of orphaned objects, WAL pressure may be a concern. +-- Processes orphans in batches with a COMMIT between each batch. Does not block ota_package +-- (only row-level locks on pg_largeobject rows being deleted), so it is safe to run on a live +-- server. Each batch releases row locks, flushes WAL, and advances the xmin horizon. -DO +CREATE OR REPLACE PROCEDURE cleanup_orphan_ota_lobs(batch_size int DEFAULT 500) +LANGUAGE plpgsql AS $$ DECLARE orphan_oid bigint; - deleted_count int := 0; - failed_count int := 0; - total_orphans int; - start_ts timestamptz; + batch_processed int; + deleted bigint := 0; + failed bigint := 0; + total_orphans bigint; + start_ts timestamptz := clock_timestamp(); elapsed_sec numeric; BEGIN - start_ts := clock_timestamp(); + DROP TABLE IF EXISTS orphan_ota_lob_queue; + CREATE TEMP TABLE orphan_ota_lob_queue (oid bigint PRIMARY KEY) ON COMMIT PRESERVE ROWS; - -- Drop first to ensure fresh data on re-run - DROP TABLE IF EXISTS orphan_oids; + INSERT INTO orphan_ota_lob_queue + SELECT m.oid + FROM pg_largeobject_metadata m + LEFT JOIN ota_package p ON p.data = m.oid + WHERE p.data IS NULL; - -- Collect orphan OIDs into a temp table to avoid repeating the JOIN each iteration - CREATE TEMP TABLE orphan_oids AS - SELECT m.oid AS orphan_oid - FROM pg_largeobject_metadata m - LEFT JOIN ota_package p ON p.data = m.oid - WHERE p.data IS NULL; - - SELECT COUNT(*) INTO total_orphans FROM orphan_oids; + SELECT COUNT(*) INTO total_orphans FROM orphan_ota_lob_queue; IF total_orphans = 0 THEN RAISE NOTICE 'No orphaned large objects found'; - DROP TABLE IF EXISTS orphan_oids; + DROP TABLE orphan_ota_lob_queue; RETURN; END IF; - RAISE NOTICE 'Found % orphaned large objects to clean up', total_orphans; + RAISE NOTICE 'Found % orphaned large objects, cleaning up in batches of %', total_orphans, batch_size; + COMMIT; - FOR orphan_oid IN SELECT o.orphan_oid FROM orphan_oids o LOOP - BEGIN - PERFORM lo_unlink(orphan_oid); - deleted_count := deleted_count + 1; - - IF deleted_count % 1000 = 0 THEN - elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts); - RAISE NOTICE 'Progress: deleted % of % orphaned large objects (%s elapsed)...', - deleted_count, total_orphans, ROUND(elapsed_sec, 1); - END IF; - EXCEPTION WHEN OTHERS THEN - failed_count := failed_count + 1; - RAISE WARNING 'Failed to delete large object with OID %: %', orphan_oid, SQLERRM; - END; + batch_processed := 0; + + FOR orphan_oid IN + SELECT oid FROM orphan_ota_lob_queue LIMIT batch_size + LOOP + BEGIN + PERFORM lo_unlink(orphan_oid); + deleted := deleted + 1; + EXCEPTION WHEN OTHERS THEN + failed := failed + 1; + RAISE WARNING 'Failed to unlink large object %: %', orphan_oid, SQLERRM; + END; + + DELETE FROM orphan_ota_lob_queue WHERE oid = orphan_oid; + batch_processed := batch_processed + 1; + END LOOP; + + COMMIT; + + EXIT WHEN batch_processed = 0; + + elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts); + RAISE NOTICE 'Progress: % deleted, % failed, % remaining (%s elapsed)', + deleted, failed, total_orphans - deleted - failed, ROUND(elapsed_sec, 1); END LOOP; - elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts); + DROP TABLE orphan_ota_lob_queue; - IF failed_count > 0 THEN - RAISE NOTICE 'Completed cleanup: deleted %, failed % out of % orphaned large objects (%s elapsed)', - deleted_count, failed_count, total_orphans, ROUND(elapsed_sec, 1); + elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts); + IF failed > 0 THEN + RAISE WARNING 'OTA large object cleanup finished: % deleted, % failed out of % (%s elapsed)', + deleted, failed, total_orphans, ROUND(elapsed_sec, 1); ELSE - RAISE NOTICE 'Successfully cleaned up all % orphaned large objects (%s elapsed)', - deleted_count, ROUND(elapsed_sec, 1); - END IF; - - -- Fail the migration if more than 10% of deletions failed, indicating a systemic problem - IF failed_count > 0 AND (failed_count::numeric / total_orphans) > 0.1 THEN - DROP TABLE IF EXISTS orphan_oids; - RAISE EXCEPTION 'OTA cleanup aborted: % of % deletions failed (>10%%), indicating a systemic issue', - failed_count, total_orphans; + RAISE NOTICE 'OTA large object cleanup finished: % deleted (%s elapsed)', + deleted, ROUND(elapsed_sec, 1); END IF; - - DROP TABLE IF EXISTS orphan_oids; -EXCEPTION WHEN OTHERS THEN - DROP TABLE IF EXISTS orphan_oids; - RAISE; END; $$; +CALL cleanup_orphan_ota_lobs(500); + +DROP PROCEDURE IF EXISTS cleanup_orphan_ota_lobs(int); + -- CLEANUP ORPHANED OTA PACKAGE LARGE OBJECTS END