Chapter 2: the rich type system Postgres gives you that other DBs don’t. JSONB, arrays, ranges, citext, intervals, INET. SQLAlchemy’s Postgres dialect surfaces them all.
JSONB
from sqlalchemy.dialects.postgresql import JSONB
class Event(Base):
id: Mapped[int] = mapped_column(primary_key=True)
payload: Mapped[dict] = mapped_column(JSONB, nullable=False)
Use JSONB (binary; indexable) over JSON (text). For 99% of cases: JSONB.
JSONB queries
from sqlalchemy import select
# Containment (uses GIN index well)
stmt = select(Event).where(Event.payload.contains({"status": "ok"}))
# Path access
stmt = select(Event).where(Event.payload["user_id"].astext == "42")
stmt = select(Event).where(Event.payload["user_id"].as_integer() == 42)
# Has key
from sqlalchemy import text
stmt = select(Event).where(Event.payload.has_key("user_id"))
# Path operations
stmt = select(Event.payload[("user", "email")].astext)
# JSONB path query
stmt = select(Event).where(text("payload @? '$.items[*] ? (@.qty > 5)'"))
-> path; ->> text-cast; @> containment. All exposed via SQLAlchemy ops.
Updating JSONB
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy import func
# Set a key
await session.execute(
update(Event)
.where(Event.id == 1)
.values(payload=func.jsonb_set(Event.payload, "{status}", '"ok"'))
)
# Concat (merge top-level)
await session.execute(
update(User).where(User.id == 1).values(prefs=User.prefs + {"theme": "dark"})
)
jsonb_set for nested updates. || (concat) for shallow merges.
GIN index for JSONB
from sqlalchemy import Index
class Event(Base):
__tablename__ = "events"
id: Mapped[int] = mapped_column(primary_key=True)
payload: Mapped[dict] = mapped_column(JSONB)
__table_args__ = (
Index("ix_events_payload_gin", "payload", postgresql_using="gin", postgresql_ops={"payload": "jsonb_path_ops"}),
)
jsonb_path_ops is faster + smaller than default operator class for @> queries.
Expression index for specific paths
__table_args__ = (
Index("ix_events_user_id", text("(payload->>'user_id')")),
)
For specific path queries. Smaller than full GIN.
ARRAY
from sqlalchemy.dialects.postgresql import ARRAY
class Post(Base):
id: Mapped[int] = mapped_column(primary_key=True)
tags: Mapped[list[str]] = mapped_column(ARRAY(String))
Native Postgres array. Append, contains, overlap operations.
Array operations
from sqlalchemy import any_
# Contains
stmt = select(Post).where(Post.tags.contains(["python", "fastapi"]))
# Any
stmt = select(Post).where(text("'python' = ANY(tags)"))
# Length
stmt = select(Post).where(func.array_length(Post.tags, 1) > 5)
# Append
await session.execute(
update(Post).where(Post.id == 1).values(tags=Post.tags + ["new-tag"])
)
GIN index on arrays
__table_args__ = (
Index("ix_posts_tags_gin", "tags", postgresql_using="gin"),
)
For efficient containment queries on arrays.
Range types
from sqlalchemy.dialects.postgresql import INT4RANGE, NUMRANGE, DATERANGE, TSRANGE, TSTZRANGE
class Booking(Base):
id: Mapped[int] = mapped_column(primary_key=True)
period: Mapped[Range] = mapped_column(TSTZRANGE)
Postgres ranges: [low, high) (inclusive low; exclusive high; configurable).
from psycopg2.extras import DateTimeRange
booking.period = DateTimeRange("2026-05-01", "2026-05-08")
# Overlaps
stmt = select(Booking).where(Booking.period.op("&&")(other_range))
# Contains
stmt = select(Booking).where(Booking.period.contains(point))
Exclusion constraints (no overlap)
from sqlalchemy import ExcludeConstraint
class Booking(Base):
__tablename__ = "bookings"
id: Mapped[int] = mapped_column(primary_key=True)
room_id: Mapped[int]
period: Mapped[Range] = mapped_column(TSTZRANGE)
__table_args__ = (
ExcludeConstraint(("room_id", "="), ("period", "&&"), name="ex_no_overlap"),
)
Two bookings for the same room can’t overlap. Enforced at the DB level. Postgres-only feature.
citext (case-insensitive text)
from sqlalchemy.dialects.postgresql import CITEXT
class User(Base):
email: Mapped[str] = mapped_column(CITEXT, unique=True)
'[email protected]' == '[email protected]' is true. Useful for emails.
Requires CREATE EXTENSION citext in the DB.
INTERVAL
from datetime import timedelta
from sqlalchemy import Interval
class Subscription(Base):
duration: Mapped[timedelta] = mapped_column(Interval)
Maps to Python timedelta. Postgres native INTERVAL type.
stmt = select(Subscription).where(Subscription.duration > timedelta(days=30))
INET / CIDR
from sqlalchemy.dialects.postgresql import INET, CIDR
class Visitor(Base):
ip: Mapped[str] = mapped_column(INET)
IP address type; supports CIDR-aware operators.
stmt = select(Visitor).where(Visitor.ip.op("<<")(text("'10.0.0.0/8'")))
<< = “is contained by network.”
UUID
import uuid
from sqlalchemy.dialects.postgresql import UUID
class Doc(Base):
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, server_default=text("gen_random_uuid()"))
gen_random_uuid() is built into Postgres 13+. For older: uuid_generate_v4() from uuid-ossp extension.
ENUM
from sqlalchemy.dialects.postgresql import ENUM
import enum
class Role(str, enum.Enum):
USER = "user"
ADMIN = "admin"
class User(Base):
role: Mapped[Role] = mapped_column(ENUM(Role, name="user_role"))
Native Postgres ENUM. Adding values is non-trivial:
ALTER TYPE user_role ADD VALUE 'editor'; -- not in transaction; can't undo
For schema-evolution flexibility: prefer VARCHAR + CHECK constraint.
hstore
from sqlalchemy.dialects.postgresql import HSTORE
class Doc(Base):
metadata_: Mapped[dict] = mapped_column(HSTORE)
Older flat key-value type. JSONB superseded it for most uses.
tsvector
from sqlalchemy.dialects.postgresql import TSVECTOR
from sqlalchemy import Computed
class Post(Base):
title: Mapped[str]
body: Mapped[str]
search: Mapped[str] = mapped_column(
TSVECTOR,
Computed("to_tsvector('english', title || ' ' || body)", persisted=True),
)
__table_args__ = (
Index("ix_posts_search", "search", postgresql_using="gin"),
)
Native full-text search. Generated column kept in sync. See Chapter 4.
Custom types
from sqlalchemy.types import UserDefinedType
class Money(UserDefinedType):
def get_col_spec(self):
return "money"
Or use TypeDecorator:
from sqlalchemy.types import TypeDecorator
from sqlalchemy.dialects.postgresql import JSONB
class PydanticJSONB(TypeDecorator):
impl = JSONB
cache_ok = True
def __init__(self, model_cls, **kwargs):
self.model_cls = model_cls
super().__init__(**kwargs)
def process_bind_param(self, value, dialect):
if value is None: return None
return value.model_dump() if isinstance(value, BaseModel) else value
def process_result_value(self, value, dialect):
if value is None: return None
return self.model_cls.model_validate(value)
Pydantic model stored as JSONB; auto-converted on the way in/out.
Common mistakes
1. JSON instead of JSONB
JSONB is binary, queryable, indexable. JSON stores raw text. Always JSONB.
2. No GIN index on JSONB queries
Sequential scan. Add GIN index matching your query operator (jsonb_path_ops for @>).
3. Native ENUM in fast-evolving schemas
Adding ENUM values is one-way. Prefer VARCHAR + CHECK.
4. Storing large arrays in JSONB
Arrays of 10000+ items: pagination is ugly. Normalize.
5. citext without case folding requirements
Adds overhead; sometimes you want strict case sensitivity.
What’s next
Chapter 3: Indexes.
Read this next
Building something AI-, backend-, or data-heavy and want a second pair of eyes? I do consulting and freelance work — see my projects and ways to reach me at rajpoot.dev .