Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
0064fc9
Refactor: extract BaseScan and ManifestGroupPlanner
smaheshwar-pltr Jun 16, 2026
ece2b0f
Feature: Incremental Append Scan
smaheshwar-pltr Jun 16, 2026
ecfa2fb
Merge remote-tracking branch 'origin/main' into sm/table-scan-refactor
smaheshwar-pltr Jun 17, 2026
e6d94e4
Merge remote-tracking branch 'origin/main' into sm/incremental-append…
smaheshwar-pltr Jun 17, 2026
8f5e9ad
Address review nits: drop redundant ABC base, keep extracted bodies a…
smaheshwar-pltr Jun 17, 2026
e9cb285
Merge branch 'sm/table-scan-refactor' into sm/incremental-append-scan-v3
smaheshwar-pltr Jun 17, 2026
6a0ef60
Address review nits
smaheshwar-pltr Jun 17, 2026
e382729
Add test coverage for incremental append scan
smaheshwar-pltr Jun 17, 2026
df00b11
Drop implementation-coupled comment from no-snapshot test
smaheshwar-pltr Jun 18, 2026
acb10f7
Merge branch 'sm/table-scan-refactor' into sm/incremental-append-scan-v3
smaheshwar-pltr Jun 18, 2026
e1e47af
Merge remote-tracking branch 'origin/main' into sm/table-scan-refactor
smaheshwar-pltr Jun 18, 2026
6e4cf3a
Merge branch 'sm/table-scan-refactor' into sm/incremental-append-scan-v3
smaheshwar-pltr Jun 18, 2026
a7e3f24
Merge main into sm/incremental-append-scan-v3
smaheshwar-pltr Jun 22, 2026
41d3ee7
Require from_snapshot_id_exclusive and drop range builders on Increme…
smaheshwar-pltr Jun 23, 2026
bfa7903
Open up IncrementalAppendScan API: optional and inclusive start
smaheshwar-pltr Jun 27, 2026
17c6237
Test that incremental append scan ignores compacted files
smaheshwar-pltr Jun 27, 2026
7f675ab
Review polish: restore IAS covariance, tidy assertions and ancestry h…
smaheshwar-pltr Jun 27, 2026
99dcfb5
Guard the compaction test against a no-op rewrite
smaheshwar-pltr Jun 27, 2026
b4e696c
Drop redundant comment on the compaction guard assert
smaheshwar-pltr Jun 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions dev/provision.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,3 +395,48 @@
)
spark.sql(f"ALTER TABLE {catalog_name}.default.test_empty_scan_ordered_str WRITE ORDERED BY id")
spark.sql(f"INSERT INTO {catalog_name}.default.test_empty_scan_ordered_str VALUES 'a', 'c'")

# Append scan fixture. Snapshots written:
# 0: append (1, 'a')
# 1: append (2, 'b')
# 2: append (3, 'c'), (4, 'b')
# 3: compact -- rewrites the two letter='b' files into one (operation=replace)
# 4: delete number=2
# 5: append (5, 'd', 100) -- on evolved schema
# 6: replace table -- lineage break
spark.sql(
f"""
CREATE OR REPLACE TABLE {catalog_name}.default.test_incremental_read (
number integer,
letter string
)
USING iceberg
PARTITIONED BY (letter)
TBLPROPERTIES ('format-version'='2')
"""
)
spark.sql(f"INSERT INTO {catalog_name}.default.test_incremental_read VALUES (1, 'a')")
spark.sql(f"INSERT INTO {catalog_name}.default.test_incremental_read VALUES (2, 'b')")
spark.sql(f"INSERT INTO {catalog_name}.default.test_incremental_read VALUES (3, 'c'), (4, 'b')")
# Compact: letter='b' has two files (from the previous two appends); rewrite them into one.
# This commits a non-append (replace) snapshot whose rewritten file the append scan must not pick up.
Comment on lines +421 to +422

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressing #3512 (review)

spark.sql(
f"""
CALL {catalog_name}.system.rewrite_data_files(
table => 'default.test_incremental_read',
options => map('min-input-files', '2')
)
"""
)
spark.sql(f"DELETE FROM {catalog_name}.default.test_incremental_read WHERE number = 2")
spark.sql(f"ALTER TABLE {catalog_name}.default.test_incremental_read ADD COLUMN extra int")
spark.sql(f"INSERT INTO {catalog_name}.default.test_incremental_read VALUES (5, 'd', 100)")
spark.sql(
f"""
REPLACE TABLE {catalog_name}.default.test_incremental_read
USING iceberg
PARTITIONED BY (letter)
TBLPROPERTIES ('format-version'='2')
AS SELECT number, letter, extra FROM {catalog_name}.default.test_incremental_read
"""
)
Loading