Browse Source

Refactor for the procedure of cleanup_orphan_ota_lobs

pull/14901/head
Andrii Landiak 2 months ago
parent
commit
0ec33f11b6
  1. 112
      application/src/main/data/upgrade/basic/schema_update.sql

112
application/src/main/data/upgrade/basic/schema_update.sql

@ -28,86 +28,90 @@ ALTER TABLE rule_chain ADD COLUMN IF NOT EXISTS notes varchar(1000000);
-- CLEANUP ORPHANED OTA PACKAGE LARGE OBJECTS START
-- This script cleans up orphaned PostgreSQL large objects that are no longer referenced by the ota_package table.
-- These orphaned objects accumulate when OTA packages are deleted or updated and can consume significant disk space.
-- Cleans up orphaned PostgreSQL large objects no longer referenced by ota_package.
-- These accumulate when OTA packages are deleted or updated and can consume significant disk space.
-- Note: only the ota_package.data column uses PostgreSQL large objects (OID type) in ThingsBoard.
-- This script removes all large objects not referenced by ota_package.data.
-- If external applications sharing this database also use large objects, their objects WILL be deleted.
--
-- This runs as a single transaction, which is acceptable for typical installations (up to tens of thousands
-- of orphaned objects). For installations with millions of orphaned objects, WAL pressure may be a concern.
-- Processes orphans in batches with a COMMIT between each batch. Does not block ota_package
-- (only row-level locks on pg_largeobject rows being deleted), so it is safe to run on a live
-- server. Each batch releases row locks, flushes WAL, and advances the xmin horizon.
DO
CREATE OR REPLACE PROCEDURE cleanup_orphan_ota_lobs(batch_size int DEFAULT 500)
LANGUAGE plpgsql AS
$$
DECLARE
orphan_oid bigint;
deleted_count int := 0;
failed_count int := 0;
total_orphans int;
start_ts timestamptz;
batch_processed int;
deleted bigint := 0;
failed bigint := 0;
total_orphans bigint;
start_ts timestamptz := clock_timestamp();
elapsed_sec numeric;
BEGIN
start_ts := clock_timestamp();
DROP TABLE IF EXISTS orphan_ota_lob_queue;
CREATE TEMP TABLE orphan_ota_lob_queue (oid bigint PRIMARY KEY) ON COMMIT PRESERVE ROWS;
-- Drop first to ensure fresh data on re-run
DROP TABLE IF EXISTS orphan_oids;
INSERT INTO orphan_ota_lob_queue
SELECT m.oid
FROM pg_largeobject_metadata m
LEFT JOIN ota_package p ON p.data = m.oid
WHERE p.data IS NULL;
-- Collect orphan OIDs into a temp table to avoid repeating the JOIN each iteration
CREATE TEMP TABLE orphan_oids AS
SELECT m.oid AS orphan_oid
FROM pg_largeobject_metadata m
LEFT JOIN ota_package p ON p.data = m.oid
WHERE p.data IS NULL;
SELECT COUNT(*) INTO total_orphans FROM orphan_oids;
SELECT COUNT(*) INTO total_orphans FROM orphan_ota_lob_queue;
IF total_orphans = 0 THEN
RAISE NOTICE 'No orphaned large objects found';
DROP TABLE IF EXISTS orphan_oids;
DROP TABLE orphan_ota_lob_queue;
RETURN;
END IF;
RAISE NOTICE 'Found % orphaned large objects to clean up', total_orphans;
RAISE NOTICE 'Found % orphaned large objects, cleaning up in batches of %', total_orphans, batch_size;
COMMIT;
FOR orphan_oid IN SELECT o.orphan_oid FROM orphan_oids o
LOOP
BEGIN
PERFORM lo_unlink(orphan_oid);
deleted_count := deleted_count + 1;
IF deleted_count % 1000 = 0 THEN
elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts);
RAISE NOTICE 'Progress: deleted % of % orphaned large objects (%s elapsed)...',
deleted_count, total_orphans, ROUND(elapsed_sec, 1);
END IF;
EXCEPTION WHEN OTHERS THEN
failed_count := failed_count + 1;
RAISE WARNING 'Failed to delete large object with OID %: %', orphan_oid, SQLERRM;
END;
batch_processed := 0;
FOR orphan_oid IN
SELECT oid FROM orphan_ota_lob_queue LIMIT batch_size
LOOP
BEGIN
PERFORM lo_unlink(orphan_oid);
deleted := deleted + 1;
EXCEPTION WHEN OTHERS THEN
failed := failed + 1;
RAISE WARNING 'Failed to unlink large object %: %', orphan_oid, SQLERRM;
END;
DELETE FROM orphan_ota_lob_queue WHERE oid = orphan_oid;
batch_processed := batch_processed + 1;
END LOOP;
COMMIT;
EXIT WHEN batch_processed = 0;
elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts);
RAISE NOTICE 'Progress: % deleted, % failed, % remaining (%s elapsed)',
deleted, failed, total_orphans - deleted - failed, ROUND(elapsed_sec, 1);
END LOOP;
elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts);
DROP TABLE orphan_ota_lob_queue;
IF failed_count > 0 THEN
RAISE NOTICE 'Completed cleanup: deleted %, failed % out of % orphaned large objects (%s elapsed)',
deleted_count, failed_count, total_orphans, ROUND(elapsed_sec, 1);
elapsed_sec := EXTRACT(EPOCH FROM clock_timestamp() - start_ts);
IF failed > 0 THEN
RAISE WARNING 'OTA large object cleanup finished: % deleted, % failed out of % (%s elapsed)',
deleted, failed, total_orphans, ROUND(elapsed_sec, 1);
ELSE
RAISE NOTICE 'Successfully cleaned up all % orphaned large objects (%s elapsed)',
deleted_count, ROUND(elapsed_sec, 1);
END IF;
-- Fail the migration if more than 10% of deletions failed, indicating a systemic problem
IF failed_count > 0 AND (failed_count::numeric / total_orphans) > 0.1 THEN
DROP TABLE IF EXISTS orphan_oids;
RAISE EXCEPTION 'OTA cleanup aborted: % of % deletions failed (>10%%), indicating a systemic issue',
failed_count, total_orphans;
RAISE NOTICE 'OTA large object cleanup finished: % deleted (%s elapsed)',
deleted, ROUND(elapsed_sec, 1);
END IF;
DROP TABLE IF EXISTS orphan_oids;
EXCEPTION WHEN OTHERS THEN
DROP TABLE IF EXISTS orphan_oids;
RAISE;
END;
$$;
CALL cleanup_orphan_ota_lobs(500);
DROP PROCEDURE IF EXISTS cleanup_orphan_ota_lobs(int);
-- CLEANUP ORPHANED OTA PACKAGE LARGE OBJECTS END

Loading…
Cancel
Save