table bloat

WITH constants AS (
  SELECT current_setting('block_size')::numeric AS bs, 23 AS hdr, 4 AS ma
), bloat_info AS (
  SELECT
    ma,bs,schemaname,tablename,
    (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
    (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
  FROM (
    SELECT
      schemaname, tablename, hdr, ma, bs,
      SUM((1-null_frac)*avg_width) AS datawidth,
      MAX(null_frac) AS maxfracsum,
      hdr+(
        SELECT 1+count(*)/8
        FROM pg_stats s2
        WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
      ) AS nullhdr
    FROM pg_stats s, constants
    GROUP BY 1,2,3,4,5
  ) AS foo
), table_bloat AS (
  SELECT
    schemaname, tablename, cc.relpages, bs,
    CEIL((cc.reltuples*((datahdr+ma-
      (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta
  FROM bloat_info
  JOIN pg_class cc ON cc.relname = bloat_info.tablename
  JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname <> 'information_schema'
), index_bloat AS (
  SELECT
    schemaname, tablename, bs,
    COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
    COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
  FROM bloat_info
  JOIN pg_class cc ON cc.relname = bloat_info.tablename
  JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname <> 'information_schema'
  JOIN pg_index i ON indrelid = cc.oid
  JOIN pg_class c2 ON c2.oid = i.indexrelid
)
SELECT
  type, schemaname, object_name, bloat, pg_size_pretty(raw_waste) as waste
FROM
(SELECT
  'table' as type,
  schemaname,
  tablename as object_name,
  ROUND(CASE WHEN otta=0 THEN 0.0 ELSE table_bloat.relpages/otta::numeric END,1) AS bloat,
  CASE WHEN relpages < otta THEN '0' ELSE (bs*(table_bloat.relpages-otta)::bigint)::bigint END AS raw_waste
FROM
  table_bloat
    UNION
SELECT
  'index' as type,
  schemaname,
  tablename || '::' || iname as object_name,
  ROUND(CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages/iotta::numeric END,1) AS bloat,
  CASE WHEN ipages < iotta THEN '0' ELSE (bs*(ipages-iotta))::bigint END AS raw_waste
FROM
  index_bloat) bloat_summary
ORDER BY raw_waste DESC, bloat DESC

Adding JSON constraints to JSON objects

I find it easy to think of JSONB columns as being open to anything – I’ll often create a JSONB ‘metadata’ column and use it as a sort of generic bucket to throw data into that I may or may not need later. They perform this job well. While you can use JSON in a very dynamic, ‘seat of the pants’ fashion, you can also bring it into the structured world with queries, functions, and constraints. For example, you might have a books table and be storing data about books within it as JSON documents

create table books(k serial primary key, doc jsonb not null);

insert into books(doc) values
('
{ "ISBN" : 4582546494267,
"title" : "Macbeth",
"author" :
{"given_name": "William",
"family_name": "Shakespeare"},
"year" : 1623
}
');

One type of constraint that might help in a table like this is to ensure that we even have a JSON document of some type. You can do this by checking the type of the document and ensuring that it’s an object (i.e. that it has keys and values, rather than just being the number 10, say)

alter table books
add constraint books_doc_is_object
check(
jsonb_typeof(doc) is not null and
jsonb_typeof(doc) = 'object'
);

We can go a step further by validating the data within the JSON document. For example, an ISBN is a 13 digit integer that uniquely represents a book (or other forms of written material). We can ensure that the “ISBN” value given in a JSON document is a thirteen digit integer like so

alter table books
add constraint books_doc_isbn_ok
check(
doc->>'ISBN' is not null and
jsonb_typeof(doc->'ISBN') = 'number' and
(doc->>'ISBN')::bigint > 0 and
length(doc->>'ISBN') = 13
);

This validation goes into some depth checking that the ‘ISBN’ value is present, that it’s a number, then casts it into an integer to check that it’s positive in value, and then checking that its length is equal to 13. You can go a lot further than this, but the newsletter would be too long, so check out Bryn Llewellyn’s document data modeling with JSON data types article for more. It’s written about YugabyteDB but since its JSON support is built on top of Postgres, most of what’s written applies to Postgres too.

unused indices

approximate row count

Monitoring

Connection pooling with pgbouncer

rollback plan

Promoting an RDS read replica

Active transactions and connected clients

Replication lag

Upgrading a postgres database in Heroku

Doing this process with a database size of ~1 TB, it takes about 15 minutes of downtime

blocking queries

terminate long running queries

long running queries

which tables are currently autovacuuming

table bloat

vacuum stats

cache hit

sequential scans

total index size

index size per table

total table size

index usage

index size

Adding JSON constraints to JSON objects

`RANK` and `DENSE_RANK` for ranking rows