diff --git a/.gitignore b/.gitignore index 61107d4..4de8b63 100644 --- a/.gitignore +++ b/.gitignore @@ -65,6 +65,7 @@ BACKLOG.md EXPERIMENTAL_MVCC.md HARDENING_DECOUPLED.md ICEBERG_LAZY_REFRESH.md +CROSS_TIER_MOVE_DESIGN.md # Local secrets — NEVER commit. (One-off GHCR push token; revoke after use.) github_token_do_not_merge.txt diff --git a/docs/architecture_tiered.md b/docs/architecture_tiered.md index 1dc76cd..da481f3 100644 --- a/docs/architecture_tiered.md +++ b/docs/architecture_tiered.md @@ -226,6 +226,14 @@ AND of those, OR of those when all arms prove the same tier, BETWEEN Subqueries, UDF calls, and expressions on the partition column are AMBIGUOUS. +The tier classification above applies to the WHERE clause. The SET +clause carries a separate rule: an `UPDATE` that assigns the partition +column itself is rejected, regardless of `coldfront.allow_mixed_writes`. +Changing the partition column can move a row across the cutoff, and the +in-place rewrite would leave the row in its old tier where the view's +tier predicate then hides it. To change the partition column, delete the +row and re-insert it with the new value. + ## Write modes: strict vs permissive (`allow_mixed_writes`) When the predicate is AMBIGUOUS the hook picks one of two behaviours diff --git a/extension/coldfront/Makefile b/extension/coldfront/Makefile index 423842c..62585c3 100644 --- a/extension/coldfront/Makefile +++ b/extension/coldfront/Makefile @@ -13,6 +13,7 @@ EXTENSION = coldfront DATA = coldfront--1.0.sql coldfront--0.1--1.0.sql REGRESS = load_order update_unregistered_view update_heap_table \ update_hot_via_view update_cold_via_view update_ambiguous_rejected \ + update_partition_key_blocked \ returning_literal_in_where cast_literal_in_value \ classify_between_in_or allow_mixed_writes cold_write_batch_size_guc \ dollar_quote_in_value mixed_case_identifier \ diff --git a/extension/coldfront/src/coldfront.c b/extension/coldfront/src/coldfront.c index a6cce09..07b0a26 100644 --- a/extension/coldfront/src/coldfront.c +++ b/extension/coldfront/src/coldfront.c @@ -1960,6 +1960,52 @@ cf_reject_multi_reference(Query *query, RangeTblEntry *rte) "are not supported; reference it once."))); } +/* + * Reject an UPDATE that assigns the partition column of a tiered view. Changing + * the partition column can move the row across the hot/cold cutoff; the in-place + * rewrite (hot, cold, or dual) updates the row where it already lives, so a moved + * row stays physically in its old tier while the view's tier predicate + * (ts >= cutoff / r[ts] < cutoff) filters it out — a silent disappearance + * (GitHub #20). Relocating the row across tiers is a separate, unimplemented + * feature; until then the partition column is read-only through the view. The + * targetList here is post-parse-analyze, so it holds exactly the SET-assigned + * columns (plus resjunk entries we skip) — not the full row. Blocks any assignment + * regardless of the new value's tier or coldfront.allow_mixed_writes: with no move + * to permit, allowing it would just reinstate the loss. + */ +static void +cf_reject_partition_col_update(Query *query, RangeTblEntry *rte, + TieredViewInfo *info) +{ + AttrNumber partcol_attno; + ListCell *lc; + + if (query->commandType != CMD_UPDATE || info->partition_col == NULL) + return; + + partcol_attno = get_attnum(rte->relid, info->partition_col); + if (partcol_attno == InvalidAttrNumber) + return; + + foreach(lc, query->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + continue; + if (tle->resno == partcol_attno) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("UPDATE of partition column \"%s\" on tiered view \"%s\" is not supported", + info->partition_col, get_rel_name(rte->relid)), + errhint("Changing \"%s\" can move the row across the hot/cold " + "boundary; that relocation is not yet supported. To " + "change \"%s\", delete the row and re-insert it with " + "the new value.", + info->partition_col, info->partition_col))); + } +} + /* * Reject a multi-reference UPDATE/DELETE on a tiered view, then pick the emit * path (tiered-INSERT split, hot, cold, or dual) and return the rewritten SQL. @@ -1972,6 +2018,7 @@ cf_dispatch_emit(Query *query, RangeTblEntry *rte, TieredViewInfo *info, TierClass tier; cf_reject_multi_reference(query, rte); + cf_reject_partition_col_update(query, rte, info); /* Tiered-view INSERT: bulk split-by-watermark via emit_tiered_insert. * Iceberg-only INSERT falls through to the unconditional cold path diff --git a/extension/coldfront/test/expected/update_partition_key_blocked.out b/extension/coldfront/test/expected/update_partition_key_blocked.out new file mode 100644 index 0000000..4bf400e --- /dev/null +++ b/extension/coldfront/test/expected/update_partition_key_blocked.out @@ -0,0 +1,65 @@ +-- An UPDATE that assigns the partition column of a tiered view is blocked: such +-- a SET can move the row across the hot/cold cutoff, which the in-place rewrite +-- would lose silently (GitHub #20). The hook must ereport(ERROR) with SQLSTATE +-- 0A000 (ERRCODE_FEATURE_NOT_SUPPORTED) regardless of coldfront.allow_mixed_writes +-- and regardless of which tier the WHERE selects. Relocating the row is a separate +-- feature; until then the partition column is read-only via the view. +CREATE EXTENSION IF NOT EXISTS pg_duckdb; +NOTICE: extension "pg_duckdb" already exists, skipping +CREATE EXTENSION IF NOT EXISTS coldfront; +NOTICE: extension "coldfront" already exists, skipping +SET TIME ZONE 'UTC'; +-- White-box: checks the hooks' SQL/DDL, not Iceberg I/O. Real cold I/O is ci/journey.sh; see README.md. +SET coldfront.warehouse = ''; +SET coldfront.lakekeeper_endpoint = ''; +CREATE TABLE public._events (id int, ts timestamptz, status text); +INSERT INTO public._events VALUES (1, '2026-04-01 12:00:00+00', 'hot_orig'); +CREATE VIEW public.events AS SELECT * FROM public._events; +INSERT INTO coldfront.tiered_views(schema_name, relname, hot_table, iceberg_table, partition_col) +VALUES ('public', 'events', 'public._events', 'ice.default.events', 'ts'); +INSERT INTO coldfront.archive_watermark(table_name, cutoff_time) +VALUES ('events', '2026-03-01'::timestamptz); +-- Permissive mode (default): a partition-column SET is still blocked, because +-- the move is not implemented and the dual-tier rewrite would lose the row. +SET coldfront.allow_mixed_writes = on; +-- Cold→hot crossing (the #20 repro): blocked. +UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE status = 'hot_orig'; +ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported +HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value. +-- Hot→cold crossing: blocked too (symmetric loss). +UPDATE public.events SET ts = '2026-01-05 10:00:00+00' WHERE ts = '2026-04-01 12:00:00+00'; +ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported +HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value. +-- Same-tier constant SET is also blocked (blunt block; no per-value proof). +UPDATE public.events SET ts = '2026-04-02 12:00:00+00' WHERE ts = '2026-04-01 12:00:00+00'; +ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported +HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value. +-- Non-constant partition-column SET (could cross per-row): blocked. +UPDATE public.events SET ts = ts + interval '6 months' WHERE id = 1; +ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported +HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value. +-- A tier-deterministic WHERE does not rescue a partition-column SET: blocked. +UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE ts = '2026-01-15 01:00:00+00'; +ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported +HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value. +-- Strict mode: same block. +SET coldfront.allow_mixed_writes = off; +UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE status = 'hot_orig'; +ERROR: UPDATE of partition column "ts" on tiered view "events" is not supported +HINT: Changing "ts" can move the row across the hot/cold boundary; that relocation is not yet supported. To change "ts", delete the row and re-insert it with the new value. +-- A SET that does NOT touch the partition column is unaffected (still routes by +-- WHERE tier — here hot, plain PG). +UPDATE public.events SET status = 'ok' WHERE ts = '2026-04-01 12:00:00+00'; +-- _events: only the status update applied; every ts-changing statement errored. +SELECT id, ts, status FROM public._events ORDER BY id; + id | ts | status +----+------------------------------+-------- + 1 | Wed Apr 01 12:00:00 2026 UTC | ok +(1 row) + +-- Cleanup. Unregister before dropping: the DDL hook blocks DROP of a +-- registered tiered table/view. +DELETE FROM coldfront.tiered_views; +DELETE FROM coldfront.archive_watermark; +DROP VIEW public.events; +DROP TABLE public._events; diff --git a/extension/coldfront/test/sql/update_partition_key_blocked.sql b/extension/coldfront/test/sql/update_partition_key_blocked.sql new file mode 100644 index 0000000..9b4144f --- /dev/null +++ b/extension/coldfront/test/sql/update_partition_key_blocked.sql @@ -0,0 +1,60 @@ +-- An UPDATE that assigns the partition column of a tiered view is blocked: such +-- a SET can move the row across the hot/cold cutoff, which the in-place rewrite +-- would lose silently (GitHub #20). The hook must ereport(ERROR) with SQLSTATE +-- 0A000 (ERRCODE_FEATURE_NOT_SUPPORTED) regardless of coldfront.allow_mixed_writes +-- and regardless of which tier the WHERE selects. Relocating the row is a separate +-- feature; until then the partition column is read-only via the view. + +CREATE EXTENSION IF NOT EXISTS pg_duckdb; +CREATE EXTENSION IF NOT EXISTS coldfront; + +SET TIME ZONE 'UTC'; +-- White-box: checks the hooks' SQL/DDL, not Iceberg I/O. Real cold I/O is ci/journey.sh; see README.md. +SET coldfront.warehouse = ''; +SET coldfront.lakekeeper_endpoint = ''; + +CREATE TABLE public._events (id int, ts timestamptz, status text); +INSERT INTO public._events VALUES (1, '2026-04-01 12:00:00+00', 'hot_orig'); +CREATE VIEW public.events AS SELECT * FROM public._events; + +INSERT INTO coldfront.tiered_views(schema_name, relname, hot_table, iceberg_table, partition_col) +VALUES ('public', 'events', 'public._events', 'ice.default.events', 'ts'); +INSERT INTO coldfront.archive_watermark(table_name, cutoff_time) +VALUES ('events', '2026-03-01'::timestamptz); + +-- Permissive mode (default): a partition-column SET is still blocked, because +-- the move is not implemented and the dual-tier rewrite would lose the row. +SET coldfront.allow_mixed_writes = on; + +-- Cold→hot crossing (the #20 repro): blocked. +UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE status = 'hot_orig'; + +-- Hot→cold crossing: blocked too (symmetric loss). +UPDATE public.events SET ts = '2026-01-05 10:00:00+00' WHERE ts = '2026-04-01 12:00:00+00'; + +-- Same-tier constant SET is also blocked (blunt block; no per-value proof). +UPDATE public.events SET ts = '2026-04-02 12:00:00+00' WHERE ts = '2026-04-01 12:00:00+00'; + +-- Non-constant partition-column SET (could cross per-row): blocked. +UPDATE public.events SET ts = ts + interval '6 months' WHERE id = 1; + +-- A tier-deterministic WHERE does not rescue a partition-column SET: blocked. +UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE ts = '2026-01-15 01:00:00+00'; + +-- Strict mode: same block. +SET coldfront.allow_mixed_writes = off; +UPDATE public.events SET ts = '2026-06-18 10:00:00+00' WHERE status = 'hot_orig'; + +-- A SET that does NOT touch the partition column is unaffected (still routes by +-- WHERE tier — here hot, plain PG). +UPDATE public.events SET status = 'ok' WHERE ts = '2026-04-01 12:00:00+00'; + +-- _events: only the status update applied; every ts-changing statement errored. +SELECT id, ts, status FROM public._events ORDER BY id; + +-- Cleanup. Unregister before dropping: the DDL hook blocks DROP of a +-- registered tiered table/view. +DELETE FROM coldfront.tiered_views; +DELETE FROM coldfront.archive_watermark; +DROP VIEW public.events; +DROP TABLE public._events;