===== Vacuuming =====
==== Parameters ====
autovacuum_analyze_scale_factor | Number of DML prior to analyze as a fraction of reltuples.
autovacuum_analyze_threshold | Minimum number of DML prior to analyze.
autovacuum_vacuum_scale_factor | Number of DML prior to vacuum as a fraction of reltuples.
autovacuum_vacuum_threshold | Minimum number of tuple DML prior to vacuum.
==== Dead Rows ====
The following SQL code displays the thresholds for when Autovacuum will clean up dead rows.\\
Dead rows (or tuples) occur in postgres when records are updated or deleted.\\
select relname, n_live_tup, n_dead_tup,
floor(current_setting('autovacuum_vacuum_threshold')::integer +
(current_setting('autovacuum_vacuum_scale_factor')::numeric *
(n_live_tup))) as "Autovacuum Threshold",
round(n_dead_tup / floor(current_setting('autovacuum_vacuum_threshold')::integer +
(current_setting('autovacuum_vacuum_scale_factor')::numeric *
(n_live_tup)))*100,2) as "Percentage",
last_vacuum, last_autovacuum
from pg_stat_user_tables
where n_live_tup > 0
order by 5 desc limit 10;
==== Vacuum Progress ====
Will not display the progress of VACUUM FULL commands.
SELECT p.pid, now() - a.xact_start AS duration,
coalesce(wait_event_type ||'.'|| wait_event, 'f') AS waiting,
CASE
WHEN a.query ~*'^autovacuum.*to prevent wraparound' THEN 'wraparound'
WHEN a.query ~*'^vacuum' THEN 'user'
ELSE 'regular'
END AS mode,
p.relid::regclass AS table, p.phase,
pg_size_pretty(p.heap_blks_total * current_setting('block_size')::int) AS table_size,
pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
pg_size_pretty(p.heap_blks_scanned * current_setting('block_size')::int) AS scanned,
pg_size_pretty(p.heap_blks_vacuumed * current_setting('block_size')::int) AS vacuumed,
round(100.0 * p.heap_blks_scanned / p.heap_blks_total, 1) AS scanned_pct,
round(100.0 * p.heap_blks_vacuumed / p.heap_blks_total, 1) AS vacuumed_pct,
round(100.0 * p.num_dead_tuples / p.max_dead_tuples,1) AS dead_pct
FROM pg_stat_progress_vacuum p
JOIN pg_stat_activity a using (pid)
ORDER BY now() - a.xact_start DESC;
==== Table Bloat ====
Table bloat can be fixed by performing a "VACUUM FULL" on the table.\\
This will create a new unfragmented table and rebuild all associated indexes.\\
However this will cause a full lock on the table, even SELECT queries will have to wait for the process to complete.\\
Instead you can use the extension [[https://dbwiki.co.uk/postgresqlextensions#pg_repack|pg_repack]]. \\
The basis of the below SQL query was taken from: [[https://github.com/ioguix/pgsql-bloat-estimation]]\\
SELECT current_database(), tblname,
round(bs*tblpages/1024/1024,0) AS Table_size_mb,
round(cast((tblpages - est_tblpages)*bs/1024/1024 as numeric),0) AS bloat_size_mb,
CASE WHEN tblpages - est_tblpages > 0
THEN round(cast(100 * (tblpages - est_tblpages)/tblpages as numeric),0)
ELSE 0
END AS extra_pct
FROM (
SELECT ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) AS est_tblpages,
tblpages, bs, schemaname, tblname, heappages, toastpages
FROM (
SELECT
( 4 + tpl_hdr_size + tpl_data_size + (2*ma)
- CASE WHEN tpl_hdr_size%ma = 0 THEN ma ELSE tpl_hdr_size%ma END
- CASE WHEN ceil(tpl_data_size)::int%ma = 0 THEN ma ELSE ceil(tpl_data_size)::int%ma END
) AS tpl_size, bs - page_hdr AS size_per_block, (heappages + toastpages) AS tblpages, heappages,
toastpages, reltuples, toasttuples, bs, page_hdr, schemaname, tblname
FROM (
SELECT
ns.nspname AS schemaname, tbl.relname AS tblname, tbl.reltuples,
tbl.relpages AS heappages, coalesce(toast.relpages, 0) AS toastpages,
coalesce(toast.reltuples, 0) AS toasttuples,
current_setting('block_size')::numeric AS bs,
CASE WHEN version()~'mingw32' OR version()~'64-bit|x86_64|ppc64|ia64|amd64' THEN 8 ELSE 4 END AS ma,
24 AS page_hdr,
23 + CASE WHEN MAX(coalesce(s.null_frac,0)) > 0 THEN ( 7 + count(s.attname) ) / 8 ELSE 0::int END
+ CASE WHEN bool_or(att.attname = 'oid' and att.attnum < 0) THEN 4 ELSE 0 END AS tpl_hdr_size,
sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) AS tpl_data_size
FROM pg_attribute AS att
JOIN pg_class AS tbl ON att.attrelid = tbl.oid
JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace
LEFT JOIN pg_stats AS s ON s.schemaname=ns.nspname
AND s.tablename = tbl.relname AND s.inherited=false AND s.attname=att.attname
LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid
WHERE NOT att.attisdropped
AND tbl.relkind in ('r','m')
GROUP BY 1,2,3,4,5,6,7,8,9
) AS s
) AS s2
) AS s3
WHERE schemaname in ('public')
AND (tblpages - est_tblpages)*bs > 1024000
ORDER BY (tblpages - est_tblpages)*bs DESC;