===== Vacuuming ===== ==== Parameters ====


autovacuum_analyze_scale_factor       | Number of DML prior to analyze as a fraction of reltuples.
autovacuum_analyze_threshold          | Minimum number of DML prior to analyze.
autovacuum_vacuum_scale_factor        | Number of DML prior to vacuum as a fraction of reltuples.
autovacuum_vacuum_threshold           | Minimum number of tuple DML prior to vacuum.

==== Dead Rows ==== The following SQL code displays the thresholds for when Autovacuum will clean up dead rows.\\ Dead rows (or tuples) occur in postgres when records are updated or deleted.\\


select relname, n_live_tup, n_dead_tup, 
       floor(current_setting('autovacuum_vacuum_threshold')::integer + 
	    (current_setting('autovacuum_vacuum_scale_factor')::numeric * 
	    (n_live_tup))) as "Autovacuum Threshold",
       round(n_dead_tup / floor(current_setting('autovacuum_vacuum_threshold')::integer + 
	                       (current_setting('autovacuum_vacuum_scale_factor')::numeric * 
		               (n_live_tup)))*100,2) as "Percentage",
       last_vacuum, last_autovacuum 
from pg_stat_user_tables
where n_live_tup > 0
order by 5 desc limit 10;

==== Vacuum Progress ==== Will not display the progress of VACUUM FULL commands.


SELECT p.pid, now() - a.xact_start AS duration,
       coalesce(wait_event_type ||'.'|| wait_event, 'f') AS waiting,
       CASE
         WHEN a.query ~*'^autovacuum.*to prevent wraparound' THEN 'wraparound'
         WHEN a.query ~*'^vacuum' THEN 'user'
         ELSE 'regular'
       END AS mode,
       p.relid::regclass AS table, p.phase,
       pg_size_pretty(p.heap_blks_total * current_setting('block_size')::int) AS table_size,
       pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
       pg_size_pretty(p.heap_blks_scanned * current_setting('block_size')::int) AS scanned,
       pg_size_pretty(p.heap_blks_vacuumed * current_setting('block_size')::int) AS vacuumed,
       round(100.0 * p.heap_blks_scanned / p.heap_blks_total, 1) AS scanned_pct,
       round(100.0 * p.heap_blks_vacuumed / p.heap_blks_total, 1) AS vacuumed_pct,
       round(100.0 * p.num_dead_tuples / p.max_dead_tuples,1) AS dead_pct
  FROM pg_stat_progress_vacuum p
  JOIN pg_stat_activity a using (pid)
ORDER BY now() - a.xact_start DESC;

==== Table Bloat ==== Table bloat can be fixed by performing a "VACUUM FULL" on the table.\\ This will create a new unfragmented table and rebuild all associated indexes.\\ However this will cause a full lock on the table, even SELECT queries will have to wait for the process to complete.\\ Instead you can use the extension [[https://dbwiki.co.uk/postgresqlextensions#pg_repack|pg_repack]]. \\ The basis of the below SQL query was taken from: [[https://github.com/ioguix/pgsql-bloat-estimation]]\\


SELECT current_database(), tblname, 
       round(bs*tblpages/1024/1024,0) AS Table_size_mb,
       round(cast((tblpages - est_tblpages)*bs/1024/1024 as numeric),0) AS bloat_size_mb,
  CASE WHEN tblpages - est_tblpages > 0
    THEN round(cast(100 * (tblpages - est_tblpages)/tblpages as numeric),0)
    ELSE 0
  END AS extra_pct
FROM (
  SELECT ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) AS est_tblpages,
         tblpages, bs, schemaname, tblname, heappages, toastpages
  FROM (
    SELECT
      ( 4 + tpl_hdr_size + tpl_data_size + (2*ma)
        - CASE WHEN tpl_hdr_size%ma = 0 THEN ma ELSE tpl_hdr_size%ma END
        - CASE WHEN ceil(tpl_data_size)::int%ma = 0 THEN ma ELSE ceil(tpl_data_size)::int%ma END
      ) AS tpl_size, bs - page_hdr AS size_per_block, (heappages + toastpages) AS tblpages, heappages,
      toastpages, reltuples, toasttuples, bs, page_hdr, schemaname, tblname
    FROM (
      SELECT
        ns.nspname AS schemaname, tbl.relname AS tblname, tbl.reltuples,
        tbl.relpages AS heappages, coalesce(toast.relpages, 0) AS toastpages,
        coalesce(toast.reltuples, 0) AS toasttuples,
        current_setting('block_size')::numeric AS bs,
        CASE WHEN version()~'mingw32' OR version()~'64-bit|x86_64|ppc64|ia64|amd64' THEN 8 ELSE 4 END AS ma,
        24 AS page_hdr,
        23 + CASE WHEN MAX(coalesce(s.null_frac,0)) > 0 THEN ( 7 + count(s.attname) ) / 8 ELSE 0::int END
           + CASE WHEN bool_or(att.attname = 'oid' and att.attnum < 0) THEN 4 ELSE 0 END AS tpl_hdr_size,
        sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) AS tpl_data_size
      FROM pg_attribute AS att
        JOIN pg_class AS tbl ON att.attrelid = tbl.oid
        JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace
        LEFT JOIN pg_stats AS s ON s.schemaname=ns.nspname
          AND s.tablename = tbl.relname AND s.inherited=false AND s.attname=att.attname
        LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid
      WHERE NOT att.attisdropped
        AND tbl.relkind in ('r','m')
      GROUP BY 1,2,3,4,5,6,7,8,9
    ) AS s
  ) AS s2
) AS s3
WHERE schemaname in ('public')
  AND (tblpages - est_tblpages)*bs > 1024000
ORDER BY (tblpages - est_tblpages)*bs DESC;