Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ BACKLOG.md
EXPERIMENTAL_MVCC.md
HARDENING_DECOUPLED.md
ICEBERG_LAZY_REFRESH.md
CROSS_TIER_MOVE_DESIGN.md

# Local secrets — NEVER commit. (One-off GHCR push token; revoke after use.)
github_token_do_not_merge.txt
Expand Down
8 changes: 8 additions & 0 deletions docs/architecture_tiered.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,14 @@ AND of those, OR of those when all arms prove the same tier, BETWEEN
Subqueries, UDF calls, and expressions on the partition column are
AMBIGUOUS.

The tier classification above applies to the WHERE clause. The SET
clause carries a separate rule: an `UPDATE` that assigns the partition
column itself is rejected, regardless of `coldfront.allow_mixed_writes`.
Changing the partition column can move a row across the cutoff, and the
in-place rewrite would leave the row in its old tier where the view's
tier predicate then hides it. To change the partition column, delete the
row and re-insert it with the new value.

## Write modes: strict vs permissive (`allow_mixed_writes`)

When the predicate is AMBIGUOUS the hook picks one of two behaviours
Expand Down
1 change: 1 addition & 0 deletions extension/coldfront/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ EXTENSION = coldfront
DATA = coldfront--1.0.sql coldfront--0.1--1.0.sql
REGRESS = load_order update_unregistered_view update_heap_table \
update_hot_via_view update_cold_via_view update_ambiguous_rejected \
update_partition_key_blocked \
returning_literal_in_where cast_literal_in_value \
classify_between_in_or allow_mixed_writes cold_write_batch_size_guc \
dollar_quote_in_value mixed_case_identifier \
Expand Down
47 changes: 47 additions & 0 deletions extension/coldfront/src/coldfront.c
Original file line number Diff line number Diff line change
Expand Up @@ -1960,6 +1960,52 @@ cf_reject_multi_reference(Query *query, RangeTblEntry *rte)
"are not supported; reference it once.")));
}

/*
* Reject an UPDATE that assigns the partition column of a tiered view. Changing
* the partition column can move the row across the hot/cold cutoff; the in-place
* rewrite (hot, cold, or dual) updates the row where it already lives, so a moved
* row stays physically in its old tier while the view's tier predicate
* (ts >= cutoff / r[ts] < cutoff) filters it out — a silent disappearance
* (GitHub #20). Relocating the row across tiers is a separate, unimplemented
* feature; until then the partition column is read-only through the view. The
* targetList here is post-parse-analyze, so it holds exactly the SET-assigned
* columns (plus resjunk entries we skip) — not the full row. Blocks any assignment
* regardless of the new value's tier or coldfront.allow_mixed_writes: with no move
* to permit, allowing it would just reinstate the loss.
*/
static void
cf_reject_partition_col_update(Query *query, RangeTblEntry *rte,
TieredViewInfo *info)
{
AttrNumber partcol_attno;
ListCell *lc;

if (query->commandType != CMD_UPDATE || info->partition_col == NULL)
return;

partcol_attno = get_attnum(rte->relid, info->partition_col);
if (partcol_attno == InvalidAttrNumber)
return;

foreach(lc, query->targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);

if (tle->resjunk)
continue;
if (tle->resno == partcol_attno)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("UPDATE of partition column \"%s\" on tiered view \"%s\" is not supported",
info->partition_col, get_rel_name(rte->relid)),
errhint("Changing \"%s\" can move the row across the hot/cold "
"boundary; that relocation is not yet supported. To "
"change \"%s\", delete the row and re-insert it with "
"the new value.",
info->partition_col, info->partition_col)));
}
}

/*
* Reject a multi-reference UPDATE/DELETE on a tiered view, then pick the emit
* path (tiered-INSERT split, hot, cold, or dual) and return the rewritten SQL.
Expand All @@ -1972,6 +2018,7 @@ cf_dispatch_emit(Query *query, RangeTblEntry *rte, TieredViewInfo *info,
TierClass tier;

cf_reject_multi_reference(query, rte);
cf_reject_partition_col_update(query, rte, info);

/* Tiered-view INSERT: bulk split-by-watermark via emit_tiered_insert.
* Iceberg-only INSERT falls through to the unconditional cold path
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
-- An UPDATE that assigns the partition column of a tiered view is blocked: such
-- a SET can move the row across the hot/cold cutoff, which the in-place rewrite
-- would lose silently (GitHub #20). The hook must ereport(ERROR) with SQLSTATE
-- 0A000 (ERRCODE_FEATURE_NOT_SUPPORTED) regardless of coldfront.allow_mixed_writes
-- and regardless of which tier the WHERE selects. Relocating the row is a separate
-- feature; until then the partition column is read-only via the view.
CREATE EXTENSION IF NOT EXISTS pg_duckdb;
NOTICE: extension "pg_duckdb" already exists, skipping
CREATE EXTENSION IF NOT EXISTS coldfront;
NOTICE: extension "coldfront" already exists, skipping
SET TIME ZONE 'UTC';
-- White-box: checks the hooks' SQL/DDL, not Iceberg I/O. Real cold I/O is ci/journey.sh; see README.md.
SET coldfront.warehouse = '';
SET coldfront.lakekeeper_endpoint = '';
CREATE TABLE public._events (id int, ts timestamptz, status text);
INSERT INTO public._events VALUES (1, '2026-04-01 12:00:00+00', 'hot_orig');
CREATE VIEW public.events AS SELECT * FROM public._events;
INSERT INTO coldfront.tiered_views(schema_name, relname, hot_table, iceberg_table, partition_col)
VALUES ('public', 'events', 'public._events', 'ice.default.events', 'ts');
INSERT INTO coldfront.archive_watermark(table_name, cutoff_time)
VALUES ('events', '2026-03-01'::timestamptz);
-- Permissive mode (default): a partition-column SET is still blocked, because
-- the move is not implemented and the dual-tier rewrite would lose the row.
SET coldfront.allow_mixed_writes = on;
-- Cold→hot crossing (the #20 repro): blocked.
UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE status = 'hot_orig';
ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported
HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value.
-- Hot→cold crossing: blocked too (symmetric loss).
UPDATE public.events SET ts = '2026-01-05 10:00:00+00' WHERE ts = '2026-04-01 12:00:00+00';
ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported
HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value.
-- Same-tier constant SET is also blocked (blunt block; no per-value proof).
UPDATE public.events SET ts = '2026-04-02 12:00:00+00' WHERE ts = '2026-04-01 12:00:00+00';
ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported
HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value.
-- Non-constant partition-column SET (could cross per-row): blocked.
UPDATE public.events SET ts = ts + interval '6 months' WHERE id = 1;
ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported
HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value.
-- A tier-deterministic WHERE does not rescue a partition-column SET: blocked.
UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE ts = '2026-01-15 01:00:00+00';
ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported
HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value.
-- Strict mode: same block.
SET coldfront.allow_mixed_writes = off;
UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE status = 'hot_orig';
ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported
HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value.
-- A SET that does NOT touch the partition column is unaffected (still routes by
-- WHERE tier — here hot, plain PG).
UPDATE public.events SET status = 'ok' WHERE ts = '2026-04-01 12:00:00+00';
-- _events: only the status update applied; every ts-changing statement errored.
SELECT id, ts, status FROM public._events ORDER BY id;
id | ts | status
----+------------------------------+--------
1 | Wed Apr 01 12:00:00 2026 UTC | ok
(1 row)

-- Cleanup. Unregister before dropping: the DDL hook blocks DROP of a
-- registered tiered table/view.
DELETE FROM coldfront.tiered_views;
DELETE FROM coldfront.archive_watermark;
DROP VIEW public.events;
DROP TABLE public._events;
60 changes: 60 additions & 0 deletions extension/coldfront/test/sql/update_partition_key_blocked.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
-- An UPDATE that assigns the partition column of a tiered view is blocked: such
-- a SET can move the row across the hot/cold cutoff, which the in-place rewrite
-- would lose silently (GitHub #20). The hook must ereport(ERROR) with SQLSTATE
-- 0A000 (ERRCODE_FEATURE_NOT_SUPPORTED) regardless of coldfront.allow_mixed_writes
-- and regardless of which tier the WHERE selects. Relocating the row is a separate
-- feature; until then the partition column is read-only via the view.

CREATE EXTENSION IF NOT EXISTS pg_duckdb;
CREATE EXTENSION IF NOT EXISTS coldfront;

SET TIME ZONE 'UTC';
-- White-box: checks the hooks' SQL/DDL, not Iceberg I/O. Real cold I/O is ci/journey.sh; see README.md.
SET coldfront.warehouse = '';
SET coldfront.lakekeeper_endpoint = '';

CREATE TABLE public._events (id int, ts timestamptz, status text);
INSERT INTO public._events VALUES (1, '2026-04-01 12:00:00+00', 'hot_orig');
CREATE VIEW public.events AS SELECT * FROM public._events;

INSERT INTO coldfront.tiered_views(schema_name, relname, hot_table, iceberg_table, partition_col)
VALUES ('public', 'events', 'public._events', 'ice.default.events', 'ts');
INSERT INTO coldfront.archive_watermark(table_name, cutoff_time)
VALUES ('events', '2026-03-01'::timestamptz);

-- Permissive mode (default): a partition-column SET is still blocked, because
-- the move is not implemented and the dual-tier rewrite would lose the row.
SET coldfront.allow_mixed_writes = on;

-- Cold→hot crossing (the #20 repro): blocked.
UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE status = 'hot_orig';

-- Hot→cold crossing: blocked too (symmetric loss).
UPDATE public.events SET ts = '2026-01-05 10:00:00+00' WHERE ts = '2026-04-01 12:00:00+00';

-- Same-tier constant SET is also blocked (blunt block; no per-value proof).
UPDATE public.events SET ts = '2026-04-02 12:00:00+00' WHERE ts = '2026-04-01 12:00:00+00';

-- Non-constant partition-column SET (could cross per-row): blocked.
UPDATE public.events SET ts = ts + interval '6 months' WHERE id = 1;

-- A tier-deterministic WHERE does not rescue a partition-column SET: blocked.
UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE ts = '2026-01-15 01:00:00+00';

-- Strict mode: same block.
SET coldfront.allow_mixed_writes = off;
UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE status = 'hot_orig';

-- A SET that does NOT touch the partition column is unaffected (still routes by
-- WHERE tier — here hot, plain PG).
UPDATE public.events SET status = 'ok' WHERE ts = '2026-04-01 12:00:00+00';

-- _events: only the status update applied; every ts-changing statement errored.
SELECT id, ts, status FROM public._events ORDER BY id;

-- Cleanup. Unregister before dropping: the DDL hook blocks DROP of a
-- registered tiered table/view.
DELETE FROM coldfront.tiered_views;
DELETE FROM coldfront.archive_watermark;
DROP VIEW public.events;
DROP TABLE public._events;