apache · JingsongLi · May 20, 2026 · May 19, 2026
diff --git a/docs/content/pypaimon/cli.md b/docs/content/pypaimon/cli.md
@@ -156,6 +156,67 @@ Output:
   5     Eve   32  Hangzhou
 ```
 
+### Table Explain
+
+Show the scan plan of a query without reading any data: the target snapshot, the pushed-down predicate / projection / limit, the partition / bucket / file-stats pruning funnel, and split-level signals (raw-convertible ratio, deletion-vector ratio, level histogram, files-per-split and split-size distribution). Useful for previewing the pruning effect of a predicate before actually running the read.
+
+```shell
+paimon table explain mydb.events
+```
+
+**Options:**
+
+- `--select, -s`: Project specific columns (comma-separated)
+- `--where, -w`: Filter condition in SQL-like syntax (same operators as `table read`)
+- `--limit, -l`: Row limit to push down
+- `--verbose, -v`: List every split with its files
+- `--format, -f`: Output format: `table` (default) or `json`
+
+**Examples:**
+
+```shell
+# Whole-table scan plan
+paimon table explain mydb.events
+
+# Push filter and projection through the planner
+paimon table explain mydb.events --where "dt = '2026-05-16' AND id = 7" -s dt,id,val
+
+# List every split (and its files) instead of just the aggregates
+paimon table explain mydb.events -w "dt = '2026-05-16'" --verbose
+
+# Machine-readable output for scripting (level_histogram keys are JSON strings)
+paimon table explain mydb.events --format json
+```
+
+Output:
+```
+== PyPaimon Scan Plan ==
+Table:              mydb.events (PK, HASH_FIXED)
+Snapshot:           5  (schema 0)
+Predicate:          (dt = '2026-05-16') AND (id = 7)
+Projection:         [dt, id, val]
+Limit:              <none>
+
+Partition pruning:  20 -> 4  (pruned 16)
+Bucket pruning:     4 -> 1  (pruned 3)
+File skipping:      1 -> 1  (pruned 0)
+
+Splits:             1
+  raw-convertible:  1 / 1
+  with DV:          0 / 1
+  all-above-L0:     0 / 1
+  files/split:      min=1  max=1  avg=1.00
+  size/split:       min=2.6 KiB  p50=2.6 KiB  p95=2.6 KiB  max=2.6 KiB
+
+Files:              1
+Total size:         2.6 KiB
+Estimated rows:     10   (merged: 10)
+Level histogram:    L0=1
+Deletion files:     0
+```
+
+`explain` reads the manifest list and manifest files but never opens any data files, so it is dramatically cheaper than a real read on large tables.
+
 ### Table Get
 
 Get and display table schema information in JSON format. The output format is the same as the schema JSON format used

diff --git a/docs/content/pypaimon/python-api.md b/docs/content/pypaimon/python-api.md
@@ -660,6 +660,27 @@ What the fields tell you:
 
 `ExplainResult` is a plain dataclass — alongside the human-readable `__str__` shown above, every field (`partition_pruning`, `bucket_pruning`, `file_skipping`, `split_count`, `splits_raw_convertible`, `level_histogram`, `splits`, ...) is addressable in Python for programmatic use.
 
+#### CLI
+
+The same scan plan is available from the `paimon` command line — useful for previewing pruning effects of a predicate without writing any Python:
+
+```bash
+# Whole-table scan
+paimon -c paimon.yaml table explain default.events
+
+# Push down filter / projection / limit and list every split
+paimon -c paimon.yaml table explain default.events \
+    --where "dt = '2026-05-16' AND id = 7" \
+    --select dt,id,val \
+    --limit 100 \
+    --verbose
+
+# Machine-readable output (level_histogram keys are JSON strings)
+paimon -c paimon.yaml table explain default.events --format json
+```
+
+`--where` accepts the same SQL-like syntax as `paimon table read`. With `--format json`, the result is a structured dump of `ExplainResult` suitable for piping into `jq` or further processing.
+
 ## Rollback
 
 Paimon supports rolling back a table to a previous snapshot or tag. This is useful for undoing unwanted changes or

diff --git a/paimon-python/pypaimon/cli/cli_table.py b/paimon-python/pypaimon/cli/cli_table.py
@@ -22,6 +22,8 @@
 """
 
 import sys
+from dataclasses import asdict
+
 from pypaimon.common.json_util import JSON
 
 
@@ -147,6 +149,98 @@ def cmd_table_read(args):
         print(df.to_string(index=False))
 
 
+def cmd_table_explain(args):
+    """
+    Execute the 'table explain' command.
+
+    Prints the scan plan (snapshot, pushed-down predicate / projection /
+    limit, partition / bucket / file-stats pruning funnel and split-
+    level signals) without reading any data files.
+    """
+    from pypaimon.cli.cli import load_catalog_config, create_catalog
+
+    config = load_catalog_config(args.config)
+    catalog = create_catalog(config)
+
+    table_identifier = args.table
+    parts = table_identifier.split('.')
+    if len(parts) != 2:
+        print(f"Error: Invalid table identifier '{table_identifier}'. "
+              f"Expected format: 'database.table'", file=sys.stderr)
+        sys.exit(1)
+    database_name, table_name = parts
+
+    try:
+        table = catalog.get_table(f"{database_name}.{table_name}")
+    except Exception as e:
+        print(f"Error: Failed to get table '{table_identifier}': {e}", file=sys.stderr)
+        sys.exit(1)
+
+    read_builder = table.new_read_builder()
+    available_fields = set(field.name for field in table.table_schema.fields)
+
+    select_columns = getattr(args, 'select', None)
+    if select_columns:
+        user_columns = [col.strip() for col in select_columns.split(',')]
+        invalid_columns = [col for col in user_columns if col not in available_fields]
+        if invalid_columns:
+            print(f"Error: Column(s) {invalid_columns} do not exist in table '{table_identifier}'.",
+                  file=sys.stderr)
+            sys.exit(1)
+        read_builder = read_builder.with_projection(user_columns)
+
+    where_clause = getattr(args, 'where', None)
+    if where_clause:
+        from pypaimon.cli.where_parser import parse_where_clause
+        try:
+            predicate = parse_where_clause(where_clause, table.table_schema.fields)
+            if predicate:
+                read_builder = read_builder.with_filter(predicate)
+        except ValueError as e:
+            print(f"Error: Invalid WHERE clause: {e}", file=sys.stderr)
+            sys.exit(1)
+
+    # Unlike `table read`, explain always pushes the limit down — the
+    # whole point of explain is to show what the planner will see,
+    # including limit pushdown.
+    limit = getattr(args, 'limit', None)
+    if limit is not None:
+        read_builder = read_builder.with_limit(limit)
+
+    verbose = getattr(args, 'verbose', False)
+    try:
+        result = read_builder.explain(verbose=verbose)
+    except Exception as e:
+        print(f"Error: Failed to explain table '{table_identifier}': {e}", file=sys.stderr)
+        sys.exit(1)
+
+    output_format = getattr(args, 'format', 'table')
+    if output_format == 'json':
+        import json
+        print(json.dumps(_explain_result_to_json_dict(result), indent=2, ensure_ascii=False))
+    else:
+        print(str(result))
+
+
+def _explain_result_to_json_dict(result):
+    """Serialize an ``ExplainResult`` to a JSON-friendly dict.
+
+    ``level_histogram`` has ``int`` keys, both at the top level and
+    inside each split. ``json.dumps`` would coerce them to strings
+    silently; we do it up front so the output is explicit and stable.
+    """
+    payload = asdict(result)
+    payload['level_histogram'] = {
+        str(level): count for level, count in payload.get('level_histogram', {}).items()
+    }
+    if payload.get('splits') is not None:
+        for split in payload['splits']:
+            split['level_histogram'] = {
+                str(level): count for level, count in split.get('level_histogram', {}).items()
+            }
+    return payload
+
+
 def cmd_table_full_text_search(args):
     """
     Execute the 'table full-text-search' command.
@@ -827,7 +921,50 @@ def add_table_subcommands(table_parser):
         help='Output format: table (default) or json'
     )
     read_parser.set_defaults(func=cmd_table_read)
-
+
+    # table explain command
+    explain_parser = table_subparsers.add_parser(
+        'explain',
+        help='Show the scan plan (snapshot, pushdown, pruning funnel, split shape) '
+             'without reading data'
+    )
+    explain_parser.add_argument(
+        'table',
+        help='Table identifier in format: database.table'
+    )
+    explain_parser.add_argument(
+        '--select', '-s',
+        type=str,
+        default=None,
+        help='Project specific columns (comma-separated, e.g., "id,name,age")'
+    )
+    explain_parser.add_argument(
+        '--where', '-w',
+        type=str,
+        default=None,
+        help='Filter condition in SQL-like syntax '
+             '(e.g., "age > 18", "dt = \'2026-01-01\' AND id IN (1,2,3)")'
+    )
+    explain_parser.add_argument(
+        '--limit', '-l',
+        type=int,
+        default=None,
+        help='Row limit to push down'
+    )
+    explain_parser.add_argument(
+        '--verbose', '-v',
+        action='store_true',
+        help='List every split with its files'
+    )
+    explain_parser.add_argument(
+        '--format', '-f',
+        type=str,
+        choices=['table', 'json'],
+        default='table',
+        help='Output format: table (default) or json'
+    )
+    explain_parser.set_defaults(func=cmd_table_explain)
+
     # table get command
     get_parser = table_subparsers.add_parser('get', help='Get table schema information')
     get_parser.add_argument(

diff --git a/paimon-python/pypaimon/read/read_builder.py b/paimon-python/pypaimon/read/read_builder.py
@@ -101,9 +101,6 @@ def _nested_name_paths(self) -> Optional[List[List[str]]]:
     def new_predicate_builder(self) -> PredicateBuilder:
         return PredicateBuilder(self.read_type())
 
-    # TODO: surface this through pypaimon's CLI (alongside cli_sql /
-    # cli_table) so users can run `pypaimon explain ...` against a table
-    # without writing any Python.
     def explain(self, verbose: bool = False) -> ExplainResult:
         """Produce a structured scan plan for this builder.