diff --git a/br/pkg/restore/snap_client/systable_restore_test.go b/br/pkg/restore/snap_client/systable_restore_test.go index 9dc9f333d7197..97289c5c903d4 100644 --- a/br/pkg/restore/snap_client/systable_restore_test.go +++ b/br/pkg/restore/snap_client/systable_restore_test.go @@ -393,7 +393,7 @@ func TestCheckPrivilegeTableRowsCollateCompatibility(t *testing.T) { // // The above variables are in the file br/pkg/restore/systable_restore.go func TestMonitorTheSystemTableIncremental(t *testing.T) { - require.Equal(t, int64(258), session.CurrentBootstrapVersion) + require.Equal(t, int64(259), session.CurrentBootstrapVersion) } func TestIsStatsTemporaryTable(t *testing.T) { diff --git a/cmd/tidb-server/main.go b/cmd/tidb-server/main.go index ab4d6f58b3120..55529ab5adda5 100644 --- a/cmd/tidb-server/main.go +++ b/cmd/tidb-server/main.go @@ -24,6 +24,7 @@ import ( "strconv" "strings" "sync/atomic" + "syscall" "time" "github.com/grafana/pyroscope-go" @@ -145,6 +146,12 @@ const ( nmMaxIdleSeconds = "max-idle-seconds" ) +const ( + exitCodeOK = 0 + exitCodeErr = 1 + exitCodeInt = 128 + int(syscall.SIGINT) +) + var ( version *bool configPath *string @@ -403,31 +410,50 @@ func main() { } exited := make(chan struct{}) - signal.SetupSignalHandler(func() { + exitCode := exitCodeOK + signal.SetupSignalHandler(func(sig os.Signal) { svr.Close() resourcemanager.InstanceResourceManager.Stop() cleanup(svr, storage, dom) cpuprofile.StopCPUProfiler() executor.Stop() + exitCode = exitCodeForSignal(sig) close(exited) }) topsql.SetupTopProfiling(keyspace.GetKeyspaceNameBytesBySettings(), svr, dom) terror.MustNil(svr.Run(dom)) <-exited - syncLog() + if err := syncLog(); err != nil { + // Log sync failure means shutdown did not finish cleanly, so keep + // reporting it as a generic non-zero exit instead of a successful exit. + exitCode = exitCodeErr + } + if exitCode != exitCodeOK { + os.Exit(exitCode) + } } -func syncLog() { +func exitCodeForSignal(sig os.Signal) int { + // Standby force shutdown uses SIGINT. Return 128+SIGINT so deployment scripts + // can identify this force-shutdown path. + if sig == syscall.SIGINT { + return exitCodeInt + } + return exitCodeOK +} + +func syncLog() error { if err := log.Sync(); err != nil { // Don't complain about /dev/stdout as Fsync will return EINVAL. if pathErr, ok := err.(*fs.PathError); ok { if pathErr.Path == "/dev/stdout" { - os.Exit(0) + return nil } } fmt.Fprintln(os.Stderr, "sync log err:", err) - os.Exit(1) + return err } + return nil } func checkTempStorageQuota() error { diff --git a/cmd/tidb-server/main_test.go b/cmd/tidb-server/main_test.go index b5e3160651462..3016b94cf5037 100644 --- a/cmd/tidb-server/main_test.go +++ b/cmd/tidb-server/main_test.go @@ -16,6 +16,7 @@ package main import ( "os" + "syscall" "testing" "github.com/pingcap/tidb/pkg/config" @@ -51,6 +52,26 @@ func TestRunMain(t *testing.T) { } } +func TestExitCodeForSignal(t *testing.T) { + tests := []struct { + name string + sig os.Signal + want int + }{ + {name: "SIGINT", sig: syscall.SIGINT, want: exitCodeInt}, + {name: "SIGTERM", sig: syscall.SIGTERM, want: exitCodeOK}, + {name: "SIGHUP", sig: syscall.SIGHUP, want: exitCodeOK}, + {name: "SIGQUIT", sig: syscall.SIGQUIT, want: exitCodeOK}, + {name: "nil", sig: nil, want: exitCodeOK}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + require.Equal(t, tt.want, exitCodeForSignal(tt.sig)) + }) + } +} + func TestSetGlobalVars(t *testing.T) { defer view.Stop() require.Equal(t, "tikv,tiflash,tidb", variable.GetSysVar(vardef.TiDBIsolationReadEngines).Value) diff --git a/lightning/tests/lightning_write_timeout/run.sh b/lightning/tests/lightning_write_timeout/run.sh index e291b2b635e48..56bd3d728e0a3 100644 --- a/lightning/tests/lightning_write_timeout/run.sh +++ b/lightning/tests/lightning_write_timeout/run.sh @@ -42,7 +42,7 @@ for i in {1..200000}; do done set -x -export GO_FAILPOINTS="github.com/pingcap/tidb/pkg/lightning/backend/local/shortWaitNTimeout=100*return(1)" +export GO_FAILPOINTS="github.com/pingcap/tidb/pkg/lightning/backend/local/shortWaitNTimeout=5*return(1000)" run_lightning --backend local -d "$TEST_DIR/data" --config "$CUR/config.toml" -check_lightning_log_contains 'Experiencing a wait timeout while writing to tikv' +check_lightning_log_contains 'experiencing a wait timeout while writing to TiKV' diff --git a/pkg/ddl/create_table.go b/pkg/ddl/create_table.go index 431264809a438..0a3d691493b0b 100644 --- a/pkg/ddl/create_table.go +++ b/pkg/ddl/create_table.go @@ -786,6 +786,14 @@ func BuildSessionTemporaryTableInfo(ctx *metabuild.Context, store kv.Storage, is // BuildTableInfoWithStmt builds model.TableInfo from a SQL statement without validity check func BuildTableInfoWithStmt(ctx *metabuild.Context, s *ast.CreateTableStmt, dbCharset, dbCollate string, placementPolicyRef *model.PolicyRefInfo) (*model.TableInfo, error) { + // Apply tidb_enable_descending_index gate up front, before any constraint + // processing. See ApplyDescGateToIndexParts for the rationale: baking the + // decision in at submission time prevents a `SET GLOBAL` between statement + // submission and DDL owner replay from silently flipping the persisted + // schema. + for _, c := range s.Constraints { + ApplyDescGateToIndexParts(c.Keys) + } colDefs := s.Cols tableCharset, tableCollate, err := GetCharsetAndCollateInTableOption(0, s.Options, ctx.GetDefaultCollationForUTF8MB4()) if err != nil { @@ -1337,6 +1345,18 @@ func BuildTableInfo( isSingleIntPK := isSingleIntPKFromTableInfo(constr, tbInfo) if ShouldBuildClusteredIndex(ctx.GetClusteredIndexDefMode(), constr.Option, isSingleIntPK) { + // Reject DESC on the columns of a clustered PRIMARY KEY. + // For PKIsHandle (single-int PK) the column becomes the + // row's int handle directly and BuildIndexInfo is never + // invoked, so this guard catches that case. For + // IsCommonHandle the same check fires again inside + // BuildIndexInfo for defence-in-depth. See pingcap/tidb#2519. + for _, key := range constr.Keys { + if key.Desc { + return nil, errors.Errorf( + "DESC is not supported on the columns of a clustered PRIMARY KEY; either drop the DESC keyword or declare the primary key as NONCLUSTERED") + } + } if isSingleIntPK { tbInfo.PKIsHandle = true } else { diff --git a/pkg/ddl/db_integration_test.go b/pkg/ddl/db_integration_test.go index 73eaf6b283dc5..0dd96b9f56beb 100644 --- a/pkg/ddl/db_integration_test.go +++ b/pkg/ddl/db_integration_test.go @@ -3124,6 +3124,406 @@ func TestIssue52680(t *testing.T) { tk.MustQuery("select * from issue52680").Check(testkit.Rows("1", "2", "3")) } +func TestDescendingIndexMetadataPlumbing(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + // With the feature gate off (the default), DESC is silently ignored at + // DDL time — preserving historical MySQL 5.7 behavior. + tk.MustExec("set @@global.tidb_enable_descending_index = off") + tk.MustExec("create table t_desc_off (a int, b int, index idx_off (a, b desc))") + is := domain.GetDomain(tk.Session()).InfoSchema() + tbl, err := is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_desc_off")) + require.NoError(t, err) + idx := tbl.Meta().Indices[0] + require.False(t, idx.Columns[0].Desc) + require.False(t, idx.Columns[1].Desc, "DESC must be dropped when the feature gate is off") + require.Equal(t, uint8(0), idx.Version) + tk.MustQuery("select collation from information_schema.statistics where index_name='idx_off' order by seq_in_index"). + Check(testkit.Rows("A", "A")) + + // With the feature gate on, DESC is persisted and reflected in SHOW CREATE + // TABLE and information_schema.STATISTICS. IndexInfo.Version bumps to 1 so + // an older TiDB reading this schema will refuse to serve queries. + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk.MustExec("create table t_desc_on (a int, b int, index idx_on (a, b desc))") + is = domain.GetDomain(tk.Session()).InfoSchema() + tbl, err = is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_desc_on")) + require.NoError(t, err) + idx = tbl.Meta().Indices[0] + require.False(t, idx.Columns[0].Desc) + require.True(t, idx.Columns[1].Desc) + require.Equal(t, uint8(1), idx.Version) + + tk.MustQuery("select collation from information_schema.statistics where index_name='idx_on' order by seq_in_index"). + Check(testkit.Rows("A", "D")) + + showCreate := tk.MustQuery("show create table t_desc_on").Rows()[0][1].(string) + require.Contains(t, showCreate, "`b` DESC", "SHOW CREATE TABLE must preserve DESC in the index definition") + require.NotContains(t, showCreate, "`a` DESC", "ascending columns must render without an explicit direction") +} + +// explainHas reports whether any line of an EXPLAIN rowset contains the given substring. +func explainHasString(rows [][]any, substr string) bool { + for _, row := range rows { + for _, col := range row { + if s, ok := col.(string); ok && strings.Contains(s, substr) { + return true + } + } + } + return false +} + +// TestDescendingIndexScanDirection verifies that the planner picks the right +// scan direction once an index column is stored descending. It does NOT verify +// result correctness — that depends on the physical-encoding work in a later +// phase. The assertion here is purely on plan shape. +func TestDescendingIndexScanDirection(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + + // Descending-stored single-column index. A forward scan on this index + // (physically) yields rows in the declared order; the planner should + // therefore reach the required direction by flipping scan.Desc rather + // than mirroring the ORDER BY direction unconditionally. + tk.MustExec("drop table if exists t_desc") + tk.MustExec("create table t_desc (a int, b int, index idx_b_desc (b desc))") + + // ORDER BY b DESC on a DESC-stored index: a FORWARD scan satisfies the + // property, so the IndexRangeScan must NOT be marked as a reverse scan. + rows := tk.MustQuery("explain format='brief' select b from t_desc use index(idx_b_desc) order by b desc").Rows() + require.True(t, explainHasString(rows, "keep order:true"), + "DESC idx + ORDER BY DESC: expected keep order:true") + require.False(t, explainHasString(rows, "keep order:true, desc"), + "DESC idx + ORDER BY DESC: expected forward scan (no ', desc'), got reverse") + + // ORDER BY b ASC on a DESC-stored index: a REVERSE scan satisfies the + // property. The scan must be marked as desc. + rows = tk.MustQuery("explain format='brief' select b from t_desc use index(idx_b_desc) order by b asc").Rows() + require.True(t, explainHasString(rows, "keep order:true, desc"), + "DESC idx + ORDER BY ASC: expected reverse scan (', desc')") + + // Mixed-direction composite (a ASC, b DESC) — this is the MySQL 8.0 + // flagship use case. Forward scan satisfies "ORDER BY a ASC, b DESC" + // directly; reverse scan satisfies "ORDER BY a DESC, b ASC"; uniform + // "ORDER BY a, b" is unsatisfiable by either direction and must fall + // back to a Sort. + tk.MustExec("drop table if exists t_mixed") + tk.MustExec("create table t_mixed (a int, b int, index idx_mixed (a, b desc))") + + // Matching the index direction-for-direction: forward scan, no Sort. + rows = tk.MustQuery("explain format='brief' select a, b from t_mixed use index(idx_mixed) order by a asc, b desc").Rows() + require.False(t, explainHasString(rows, "Sort"), + "INDEX(a, b DESC) ORDER BY a ASC, b DESC must use the index without a Sort") + require.True(t, explainHasString(rows, "keep order:true"), + "forward scan should keep order") + require.False(t, explainHasString(rows, "keep order:true, desc"), + "forward scan must not be marked desc") + + // Inverted direction-for-direction: reverse scan, still no Sort. + rows = tk.MustQuery("explain format='brief' select a, b from t_mixed use index(idx_mixed) order by a desc, b asc").Rows() + require.False(t, explainHasString(rows, "Sort"), + "INDEX(a, b DESC) ORDER BY a DESC, b ASC must use the index in reverse without a Sort") + require.True(t, explainHasString(rows, "keep order:true, desc"), + "reverse scan should be marked desc") + + // Mixed mismatch: forward gives (a ASC, b DESC), reverse gives (a DESC, + // b ASC); neither matches "ORDER BY a, b" uniformly ASC, so a Sort is + // required. + rows = tk.MustQuery("explain format='brief' select a, b from t_mixed use index(idx_mixed) order by a, b").Rows() + require.True(t, explainHasString(rows, "Sort"), + "INDEX(a, b DESC) ORDER BY a ASC, b ASC is unsatisfiable; expected a Sort") + + // With the feature gate off, DESC is dropped at DDL time, so the same + // index behaves like an ascending one — ORDER BY DESC should drive a + // reverse scan, preserving historical behavior. + tk.MustExec("set @@global.tidb_enable_descending_index = off") + tk.MustExec("drop table if exists t_off") + tk.MustExec("create table t_off (a int, b int, index idx_off (b desc))") + rows = tk.MustQuery("explain format='brief' select b from t_off use index(idx_off) order by b desc").Rows() + require.True(t, explainHasString(rows, "keep order:true, desc"), + "feature gate off: DESC must be dropped and reverse scan must remain the path for ORDER BY DESC") +} + +// TestDescendingIndexSysvarIsCreateTimeOnly nails down the gate's semantic: +// tidb_enable_descending_index controls whether *new* CREATE INDEX statements +// persist DESC, but turning it off later must not invalidate or rewrite any +// existing DESC index. The persisted IndexColumn.Desc + IndexInfo.Version +// remain in metadata, the planner keeps using the index, and INSERT/SELECT +// continue to round-trip correctly. +func TestDescendingIndexSysvarIsCreateTimeOnly(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk2 := testkit.NewTestKit(t, store) + tk2.MustExec("use test") + + // Create the DESC index while the gate is ON. + tk2.MustExec("drop table if exists t_sysvar_flip") + tk2.MustExec("create table t_sysvar_flip (a int, b int, index idx_b (b desc))") + tk2.MustExec("insert into t_sysvar_flip values (1, 5), (2, 20), (3, 12)") + + // Flip the gate OFF in a fresh session and reload the schema. + tk.MustExec("set @@global.tidb_enable_descending_index = off") + tk3 := testkit.NewTestKit(t, store) + tk3.MustExec("use test") + + // Existing DESC index metadata must survive the flip. + is := domain.GetDomain(tk3.Session()).InfoSchema() + tbl, err := is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_sysvar_flip")) + require.NoError(t, err) + idx := tbl.Meta().Indices[0] + require.True(t, idx.Columns[0].Desc, "existing DESC index must remain DESC after sysvar flip") + require.Equal(t, uint8(1), idx.Version) + + // Reads against the DESC index continue to work. + tk3.MustQuery("select b from t_sysvar_flip use index(idx_b) order by b desc"). + Check(testkit.Rows("20", "12", "5")) + + // Inserts continue to be encoded correctly (the table's existing index + // has Desc=true, so GenIndexKey will go through the DESC path even + // though the gate is off). + tk3.MustExec("insert into t_sysvar_flip values (4, 30)") + tk3.MustQuery("select b from t_sysvar_flip use index(idx_b) order by b desc"). + Check(testkit.Rows("30", "20", "12", "5")) + + // A *new* index built while the gate is off must drop DESC silently + // (preserving MySQL 5.7 compatibility for migration scripts). + tk3.MustExec("alter table t_sysvar_flip add index idx_a (a desc)") + is = domain.GetDomain(tk3.Session()).InfoSchema() + tbl, err = is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_sysvar_flip")) + require.NoError(t, err) + var newIdx *model.IndexInfo + for _, idx := range tbl.Meta().Indices { + if idx.Name.L == "idx_a" { + newIdx = idx + break + } + } + // Assert presence outside the loop — without this, an upstream + // regression that fails to create idx_a (or renames it) would make + // the assertions vacuously skip and the test would falsely pass. + require.NotNil(t, newIdx, "expected idx_a to be present after ALTER TABLE") + require.False(t, newIdx.Columns[0].Desc, "new index built with gate off must drop DESC") + require.Equal(t, uint8(0), newIdx.Version, "new ASC-only index must stay at Version=0") +} + +// TestDescOnClusteredPrimaryKeyIsRejected verifies the DDL guard that +// refuses DESC on the columns of a clustered PRIMARY KEY. The PK columns +// determine the row's physical key bytes; allowing DESC there would +// require coordinated changes across many code paths that currently +// assume the row key is ordered ascending. The DDL guard protects us +// until that work is done. +func TestDescOnClusteredPrimaryKeyIsRejected(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk2 := testkit.NewTestKit(t, store) + tk2.MustExec("use test") + tk2.MustExec("set @@tidb_enable_clustered_index = ON") + + // Sanity: sysvar is actually on for this session before exercising DDL. + tk2.MustQuery("select @@global.tidb_enable_descending_index").Check(testkit.Rows("1")) + + // Multi-column PRIMARY KEY ... CLUSTERED with DESC must error. + tk2.MustExec("drop table if exists t_clustered_desc") + err := tk2.ExecToErr("create table t_clustered_desc (a int, b int, c int, primary key (a, b desc) clustered)") + require.Error(t, err) + require.Contains(t, err.Error(), "DESC is not supported on the columns of a clustered PRIMARY KEY") + + // Single-int PRIMARY KEY with DESC also rejected (PKIsHandle path). + err = tk2.ExecToErr("create table t_pk_handle_desc (a int, b int, primary key (a desc) clustered)") + require.Error(t, err) + require.Contains(t, err.Error(), "DESC is not supported on the columns of a clustered PRIMARY KEY") + + // NONCLUSTERED primary key with DESC is fine — at this layer it's + // just a unique secondary index. + tk2.MustExec("drop table if exists t_nonclustered_desc") + tk2.MustExec("create table t_nonclustered_desc (a int, b int, primary key (a desc) nonclustered)") + + // Regular DESC index on a clustered table is fine — only the PK + // columns themselves are restricted. + tk2.MustExec("drop table if exists t_clustered_secondary_desc") + tk2.MustExec("create table t_clustered_secondary_desc (a varchar(10), b int, primary key (a) clustered, index idx_b (b desc))") +} + +// TestExpressionIndexDescPersists verifies that DESC on an expression index +// part is preserved end-to-end. The parser already accepts +// `INDEX((expr) DESC)`; this test asserts the rest of the pipeline (DDL, +// SHOW CREATE TABLE, INFORMATION_SCHEMA) reflects the order correctly. +func TestExpressionIndexDescPersists(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk2 := testkit.NewTestKit(t, store) + tk2.MustExec("use test") + + tk2.MustExec("drop table if exists t_expr_desc") + tk2.MustExec("create table t_expr_desc (a int, b int, key idx_expr ((a + b) desc))") + + is := domain.GetDomain(tk2.Session()).InfoSchema() + tbl, err := is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_expr_desc")) + require.NoError(t, err) + idx := tbl.Meta().Indices[0] + require.Equal(t, "idx_expr", idx.Name.L) + require.True(t, idx.Columns[0].Desc, "expression index column must be marked DESC") + require.Equal(t, uint8(1), idx.Version, "DESC expression index must bump IndexInfo.Version") + + // SHOW CREATE TABLE must round-trip the DESC. + showCreate := tk2.MustQuery("show create table t_expr_desc").Rows()[0][1].(string) + require.Contains(t, showCreate, "DESC", "SHOW CREATE TABLE must preserve DESC on the expression part") + + // information_schema reports collation 'D' for the expression part. + tk2.MustQuery("select collation from information_schema.statistics where table_name='t_expr_desc' and index_name='idx_expr'"). + Check(testkit.Rows("D")) +} + +// TestMixedDirectionCompositeIndex covers the MySQL 8.0 flagship use case: +// a composite index with mixed ASC/DESC columns must satisfy "ORDER BY" +// requests that match its direction vector (forward) or are the bitwise +// complement of it (reverse), with row data coming back correctly ordered. +func TestMixedDirectionCompositeIndex(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk2 := testkit.NewTestKit(t, store) + tk2.MustExec("use test") + + tk2.MustExec("drop table if exists t_mixed_e2e") + tk2.MustExec("create table t_mixed_e2e (a int, b int, index idx_ab (a, b desc))") + // Two values of a, several b values per a, and one DISTINCT b across + // the two groups so the join key isn't trivial. + tk2.MustExec("insert into t_mixed_e2e values (1, 10), (1, 5), (1, 15), (2, 20), (2, 8), (2, 12)") + + // Forward scan satisfies ORDER BY a ASC, b DESC: rows must come back + // (a ascending, b descending within each a). + tk2.MustQuery("select a, b from t_mixed_e2e use index(idx_ab) order by a asc, b desc").Check(testkit.Rows( + "1 15", "1 10", "1 5", + "2 20", "2 12", "2 8", + )) + + // Reverse scan satisfies ORDER BY a DESC, b ASC. + tk2.MustQuery("select a, b from t_mixed_e2e use index(idx_ab) order by a desc, b asc").Check(testkit.Rows( + "2 8", "2 12", "2 20", + "1 5", "1 10", "1 15", + )) + + // "ORDER BY a, b" is unsatisfiable by either direction; results must + // still be correct (planner falls back to a Sort) but we don't assert + // scan direction here, just row order. + tk2.MustQuery("select a, b from t_mixed_e2e use index(idx_ab) order by a, b").Check(testkit.Rows( + "1 5", "1 10", "1 15", + "2 8", "2 12", "2 20", + )) +} + +// TestDescendingIndexEndToEnd verifies INSERT + SELECT through a DESC index +// end-to-end: writes go through the complement-encoded path, the mutation +// consistency check tolerates them, and the unistore coprocessor emulation +// decodes the returned keys correctly via auto-detecting DESC flag bytes. +func TestDescendingIndexEndToEnd(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk2 := testkit.NewTestKit(t, store) + tk2.MustExec("use test") + + tk2.MustExec("drop table if exists t_desc_e2e") + tk2.MustExec("create table t_desc_e2e (a int, b int, index idx_b (b desc))") + tk2.MustExec("insert into t_desc_e2e values (1, 10), (2, 20), (3, 5), (4, 30), (5, 15)") + + // Sanity: the index really is stored as DESC. + tk2.MustQuery("select collation from information_schema.statistics where table_name='t_desc_e2e' and index_name='idx_b'"). + Check(testkit.Rows("D")) + + // Point lookup on DESC column must find the row. + tk2.MustQuery("select a from t_desc_e2e use index(idx_b) where b = 20").Check(testkit.Rows("2")) + tk2.MustQuery("select a from t_desc_e2e use index(idx_b) where b = 5").Check(testkit.Rows("3")) + tk2.MustQuery("select a from t_desc_e2e use index(idx_b) where b = 999").Check(testkit.Rows()) + + // Full ordered scan of the DESC index returns rows in b-descending order + // via a forward keyspace scan (no reverse-scan flag on the IndexReader). + tk2.MustQuery("select b from t_desc_e2e use index(idx_b) order by b desc").Check(testkit.Rows("30", "20", "15", "10", "5")) + // The ORDER BY ASC variant exercises the reverse-scan path on the DESC + // index and must also return the correct order. + tk2.MustQuery("select b from t_desc_e2e use index(idx_b) order by b asc").Check(testkit.Rows("5", "10", "15", "20", "30")) +} + +// TestUnservableIndexRejectsQueries exercises the IsServable fence: if a +// table contains an index whose metadata Version is newer than this binary +// understands, every plan-building entry point (SELECT, INSERT) must surface +// a clear error rather than silently produce wrong rows. +func TestUnservableIndexRejectsQueries(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t_unservable") + tk.MustExec("create table t_unservable (a int, b int, key idx_b (b))") + + // Forge a forward-version index by mutating the in-memory schema. We + // can't create one through DDL because the current TiDB binary won't + // emit Version > maxKnownIndexVersion — the point of the fence is to + // guard a downlevel binary reading what some future binary wrote. + is := dom.InfoSchema() + tbl, err := is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_unservable")) + require.NoError(t, err) + tblInfo := tbl.Meta() + require.Len(t, tblInfo.Indices, 1) + tblInfo.Indices[0].Version = 99 + + // SELECT planning must reject the table because the planner's + // getPossibleAccessPaths walks tableInfo.Indices. + _, err = tk.Exec("select * from t_unservable where b = 1") + require.Error(t, err) + require.Contains(t, err.Error(), "metadata version 99") + require.Contains(t, err.Error(), "DROP INDEX") + + // INSERT VALUES doesn't enumerate access paths but still lands on the + // same fence via checkAllIndicesServable in buildInsert. + _, err = tk.Exec("insert into t_unservable values (1, 2)") + require.Error(t, err) + require.Contains(t, err.Error(), "metadata version 99") + + // Writable non-public states must also be fenced. tables.IsIndexWritable + // returns true for StateWriteOnly and StateWriteReorganization, so the + // mutation paths in pkg/table/tables/tables.go would write to an index + // in those states. checkAllIndicesServable must reject them too, + // otherwise an older TiDB binary running DML against a table whose + // new-format index is mid-DDL would silently corrupt the index. + for _, st := range []model.SchemaState{model.StateWriteOnly, model.StateWriteReorganization} { + tblInfo.Indices[0].State = st + _, err = tk.Exec("insert into t_unservable values (3, 4)") + require.Errorf(t, err, "INSERT must fail when index is in %s with unservable version", st) + require.Contains(t, err.Error(), "metadata version 99") + } + + // And the inverse: StateDeleteOnly / StateDeleteReorganization are NOT + // writable, so the fence intentionally lets them through. Restore to + // public afterwards so the table-cleanup teardown is well-formed. + for _, st := range []model.SchemaState{model.StateDeleteOnly, model.StateDeleteReorganization} { + tblInfo.Indices[0].State = st + _, err = tk.Exec("insert into t_unservable values (5, 6)") + require.NoErrorf(t, err, "INSERT must pass when index is in %s (not writable, fence intentionally skips)", st) + } + tblInfo.Indices[0].State = model.StatePublic +} + func TestCreateIndexWithChangeMaxIndexLength(t *testing.T) { store := testkit.CreateMockStore(t) originCfg := config.GetGlobalConfig() diff --git a/pkg/ddl/desc_index_version_check_test.go b/pkg/ddl/desc_index_version_check_test.go new file mode 100644 index 0000000000000..121a8e1f32ae0 --- /dev/null +++ b/pkg/ddl/desc_index_version_check_test.go @@ -0,0 +1,153 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ddl + +import ( + "testing" + + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/tidb/pkg/ddl/placement" + "github.com/stretchr/testify/require" +) + +// store builds a synthetic *metapb.Store for the version-check tests. +// Pass an empty version to simulate a store that hasn't reported one yet. +func storeAt(id uint64, version string, isTiFlash bool) *metapb.Store { + s := &metapb.Store{Id: id, Version: version, Address: "mock"} + if isTiFlash { + s.Labels = []*metapb.StoreLabel{{Key: placement.EngineLabelKey, Value: placement.EngineLabelTiFlash}} + } + return s +} + +func TestCheckStoresMeetDescIndexMinVersion(t *testing.T) { + const minVer = "9.0.0" + const failClosed = false // production semantics + + t.Run("all TiKV stores are new enough", func(t *testing.T) { + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "9.1.0", false), + storeAt(3, "v9.0.0", false), // leading-v normalisation + } + require.NoError(t, checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed)) + }) + + t.Run("an old TiKV store fails the gate", func(t *testing.T) { + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "8.5.0", false), + } + err := checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed) + require.Error(t, err) + require.Contains(t, err.Error(), "store 2") + require.Contains(t, err.Error(), "8.5.0") + require.Contains(t, err.Error(), minVer) + require.Contains(t, err.Error(), "upgrade TiKV") + }) + + t.Run("TiFlash stores are excluded from the check", func(t *testing.T) { + // A TiFlash store on an old version must not block the gate; the + // check is for TiKV only because TiKV alone runs the coprocessor + // path that needs the new decoder. + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "1.0.0", true), // ancient TiFlash, ignored + } + require.NoError(t, checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed)) + }) + + t.Run("tombstone stores are skipped", func(t *testing.T) { + old := storeAt(2, "8.0.0", false) + old.State = metapb.StoreState_Tombstone + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + old, + } + require.NoError(t, checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed)) + }) + + t.Run("empty version fails closed in production", func(t *testing.T) { + // A store that hasn't reported a clean semver yet cannot prove it + // can decode descending-order keys. The gate must reject the DDL + // rather than fall through and risk silent corruption. The operator + // can retry once PD has reconciled. + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "", false), + } + err := checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed) + require.Error(t, err) + require.Contains(t, err.Error(), "store 2") + require.Contains(t, err.Error(), "has not reported a version") + }) + + t.Run("empty version is tolerated in tests", func(t *testing.T) { + // In `intest` builds the mock store fixture reports empty versions + // for every replica, so the gate must let DDL through to allow + // integration tests to run. + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "", false), + } + require.NoError(t, checkStoresMeetDescIndexMinVersion(stores, minVer, true)) + }) + + t.Run("garbage version always fails closed", func(t *testing.T) { + // Unparsable version strings are unambiguously a bug or a + // misconfigured store; never tolerate them, even in tests. + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "not-a-semver", false), + } + err := checkStoresMeetDescIndexMinVersion(stores, minVer, true) + require.Error(t, err) + require.Contains(t, err.Error(), "store 2") + require.Contains(t, err.Error(), "unparsable") + }) + + t.Run("malformed minVersion is reported", func(t *testing.T) { + err := checkStoresMeetDescIndexMinVersion(nil, "definitely-not-semver", failClosed) + require.Error(t, err) + }) + + t.Run("pre-release at the floor base version is accepted", func(t *testing.T) { + // Strict semver treats `9.0.0-beta.2` < `9.0.0`, but a nightly / + // release-candidate cluster cut from the branch that contains the + // feature does carry the new decoder. Reject only when the base + // (Major.Minor.Patch) is below the floor; pre-release tags at the + // floor base pass. tiup playground reports versions like + // `9.0.0-beta.2` so this is what the e2e runner relies on. + stores := []*metapb.Store{ + storeAt(1, "9.0.0-beta.2", false), + storeAt(2, "9.0.0-rc.1", false), + } + require.NoError(t, checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed)) + }) + + t.Run("pre-release below the floor base still fails", func(t *testing.T) { + // `8.5.0-beta.2`'s base (8.5.0) is below the floor (9.0.0), so the + // pre-release relaxation must NOT let it through. This guards + // against accidentally degrading the check to "any pre-release + // passes". + stores := []*metapb.Store{ + storeAt(1, "8.5.0-beta.2", false), + } + err := checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed) + require.Error(t, err) + require.Contains(t, err.Error(), "store 1") + require.Contains(t, err.Error(), "8.5.0-beta.2") + }) +} diff --git a/pkg/ddl/executor.go b/pkg/ddl/executor.go index 32cdacbba4f01..f1e43870c5ecd 100644 --- a/pkg/ddl/executor.go +++ b/pkg/ddl/executor.go @@ -29,12 +29,15 @@ import ( "time" "unicode/utf8" + "github.com/coreos/go-semver/semver" "github.com/pingcap/errors" "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/config/kerneltype" "github.com/pingcap/tidb/pkg/ddl/label" "github.com/pingcap/tidb/pkg/ddl/logutil" + "github.com/pingcap/tidb/pkg/ddl/placement" "github.com/pingcap/tidb/pkg/ddl/resourcegroup" sess "github.com/pingcap/tidb/pkg/ddl/session" ddlutil "github.com/pingcap/tidb/pkg/ddl/util" @@ -78,10 +81,26 @@ import ( "github.com/pingcap/tidb/pkg/util/traceevent" "github.com/pingcap/tidb/pkg/util/tracing" "github.com/tikv/client-go/v2/oracle" + "github.com/tikv/client-go/v2/tikv" pdhttp "github.com/tikv/pd/client/http" "go.uber.org/zap" ) +// MinTiKVVersionForDescIndex is the lowest TiKV semver that contains the +// DESC-aware coprocessor decoder added for pingcap/tidb#2519. The CREATE +// INDEX path refuses to persist a column with Desc=true unless every TiKV +// store reports at least this version, so the cluster can never end up in +// a state where TiDB writes complemented index bytes that TiKV mis-decodes. +// +// TODO(release): bump this in lockstep with the TiKV release that contains +// the matching Rust changes (split_datum / decode_one_with_desc auto-detect +// of complemented flag bytes). The placeholder below intentionally sits +// well past the most recent stable release so the gate fails closed in any +// pre-release deployment — operators who flip tidb_enable_descending_index +// on a cluster that hasn't been upgraded will see a clear "upgrade TiKV" +// error rather than silently corrupting their indexes. +const MinTiKVVersionForDescIndex = "9.0.0" + const ( expressionIndexPrefix = "_V$" tableNotExist = -1 @@ -1123,6 +1142,20 @@ func (e *executor) createTableWithInfoJob( } } + // Run the descending-index TiKV-version gate at the shared chokepoint + // for CREATE TABLE so CreateTableWithInfo and BatchCreateTableWithInfo + // are covered alongside CreateTable. Placed AFTER the OnExistIgnore + // short-circuit so a no-op `CREATE TABLE IF NOT EXISTS` against an + // existing table never makes a PD round-trip. See pingcap/tidb#2519. + for _, idx := range tbInfo.Indices { + if idx.HasDescColumn() { + if err = checkTiKVSupportsDescIndex(ctx); err != nil { + return nil, errors.Trace(err) + } + break + } + } + if err := checkTableInfoValidExtra(ctx.GetSessionVars().StmtCtx.ErrCtx(), ctx.GetStore(), dbName, tbInfo); err != nil { return nil, err } @@ -4684,6 +4717,11 @@ func (e *executor) CreatePrimaryKey(ctx sessionctx.Context, ti ast.Ident, indexN return infoschema.ErrMultiplePriKey } + // Apply tidb_enable_descending_index gate at the SQL frontend, before + // the DDL job is enqueued. See ApplyDescGateToIndexParts for the + // rationale (avoid races between SET GLOBAL and DDL owner replay). + ApplyDescGateToIndexParts(indexPartSpecifications) + // Primary keys cannot include expression index parts. A primary key requires the generated column to be stored, // but expression index parts are implemented as virtual generated columns, not stored generated columns. for _, idxPart := range indexPartSpecifications { @@ -4703,6 +4741,11 @@ func (e *executor) CreatePrimaryKey(ctx sessionctx.Context, ti ast.Ident, indexN if err != nil { return errors.Trace(err) } + if hasDescIndexColumn(indexColumns) { + if err := checkTiKVSupportsDescIndex(ctx); err != nil { + return err + } + } if _, err = CheckPKOnGeneratedColumn(tblInfo, indexPartSpecifications); err != nil { return err } @@ -4839,6 +4882,14 @@ func (e *executor) createColumnarIndex(ctx sessionctx.Context, ti ast.Ident, ind return errors.Trace(err) } + // Apply tidb_enable_descending_index gate at the SQL frontend, before + // the DDL job is enqueued. With the gate off, Desc is cleared here so + // the columnar-DESC reject in buildIndexColumns never fires — that + // preserves the historical "DESC silently dropped" behaviour for + // columnar indexes when the feature is disabled. With the gate on, + // columnar DESC stays Desc=true and gets a clear error downstream. + ApplyDescGateToIndexParts(indexPartSpecifications) + var columnarIndexType model.ColumnarIndexType switch indexOption.Tp { case ast.IndexTypeInverted: @@ -4943,6 +4994,123 @@ func (e *executor) CreateIndex(ctx sessionctx.Context, stmt *ast.CreateIndexStmt stmt.IndexPartSpecifications, stmt.IndexOption, stmt.IfNotExists) } +// hasDescIndexColumn reports whether any built IndexColumn carries Desc=true. +func hasDescIndexColumn(cols []*model.IndexColumn) bool { + for _, c := range cols { + if c.Desc { + return true + } + } + return false +} + +// checkTiKVSupportsDescIndex returns an error if any non-TiFlash store in +// the cluster runs a TiKV version below MinTiKVVersionForDescIndex. Called +// when CREATE INDEX would persist a column with Desc=true. +// +// Two test-only escape hatches keep the check from blocking development: +// +// 1. Stores that never satisfy `tikv.Storage` — i.e. backends like the +// pure-Go mock used by some unit tests — short-circuit at the type +// assertion below; the unistore coprocessor used by those backends +// auto-detects DESC flag bytes, so the version gate is unnecessary. +// +// 2. MockStore-backed integration tests (`intest` builds) DO satisfy +// `tikv.Storage` and reach the PD-lookup path, but the mock fixture +// never fills in the `Version` string on its stores. The +// `tolerateMissingVersion=intest.InTest` flag passed to +// checkStoresMeetDescIndexMinVersion is what lets those tests +// proceed; production callers pass `false` and fail closed on a +// missing version. Do NOT remove the intest branch — it is not +// dead code, it covers MockStore. +func checkTiKVSupportsDescIndex(ctx sessionctx.Context) error { + tikvStore, ok := ctx.GetStore().(tikv.Storage) + if !ok { + return nil + } + pdCli := tikvStore.GetRegionCache().PDClient() + if pdCli == nil { + return nil + } + // Bound the PD round-trip so an unreachable PD doesn't hang DDL + // indefinitely. 5s matches the pattern used elsewhere for short PD + // metadata lookups in this package. + pdCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + stores, err := pdCli.GetAllStores(pdCtx) + if err != nil { + return errors.Annotate(err, "checking TiKV cluster version for descending index") + } + // In production we fail closed on missing versions. The mock store + // fixture used in `intest` builds reports empty versions for every + // replica; tolerate that case so integration tests can exercise + // descending-index DDL without spinning up a real PD. + return checkStoresMeetDescIndexMinVersion(stores, MinTiKVVersionForDescIndex, intest.InTest) +} + +// checkStoresMeetDescIndexMinVersion is the pure version-comparison core of +// checkTiKVSupportsDescIndex, split out so the caller can pass a mocked +// store list in unit tests without spinning up a PD client. +// +// tolerateMissingVersion controls how to treat stores that report an empty +// version string. The production caller passes false (fail closed); the +// in-process test caller passes true so the mock-store fixture, which never +// fills in a version, doesn't block descending-index DDL. +// Garbage (unparsable) version strings always fail closed because they are +// unambiguously a misconfiguration, never a fixture artifact. +func checkStoresMeetDescIndexMinVersion(stores []*metapb.Store, minVersion string, tolerateMissingVersion bool) error { + required, err := semver.NewVersion(minVersion) + if err != nil { + return errors.Trace(err) + } + for _, s := range stores { + if s.GetState() == metapb.StoreState_Tombstone { + continue + } + if storeIsTiFlash(s) { + continue + } + if s.Version == "" { + if tolerateMissingVersion { + continue + } + return errors.Errorf( + "cannot create descending-order index: TiKV store %d at %s has not reported a version yet; retry after the cluster has reported its store versions", + s.Id, s.Address) + } + actual, parseErr := semver.NewVersion(strings.TrimPrefix(s.Version, "v")) + if parseErr != nil { + return errors.Annotatef(parseErr, + "cannot create descending-order index: TiKV store %d at %s reports an unparsable version %q", + s.Id, s.Address, s.Version) + } + // Compare on Major.Minor.Patch only — strict semver treats pre-release + // labels as lower than the corresponding stable, so a `9.0.0-beta.2` + // nightly that DOES contain the feature would otherwise be rejected + // by a `9.0.0` floor. Operators routinely run pre-release/RC clusters + // for testing; once the feature lands on a release branch the + // nightly/beta tag for that branch is a stronger guarantee than the + // strict-semver lexicographic ordering, not a weaker one. + base := *actual + base.PreRelease = "" + if base.LessThan(*required) { + return errors.Errorf( + "cannot create descending-order index: TiKV store %d at %s reports version %s, which is below the minimum %s required by this feature; upgrade TiKV before retrying", + s.Id, s.Address, s.Version, minVersion) + } + } + return nil +} + +func storeIsTiFlash(store *metapb.Store) bool { + for _, label := range store.Labels { + if label.Key == placement.EngineLabelKey && label.Value == placement.EngineLabelTiFlash { + return true + } + } + return false +} + // addHypoIndexIntoCtx adds this index as a hypo-index into this ctx. func (*executor) addHypoIndexIntoCtx(ctx sessionctx.Context, schemaName, tableName ast.CIStr, indexInfo *model.IndexInfo) error { sctx := ctx.GetSessionVars() @@ -4984,6 +5152,10 @@ func (e *executor) createIndex(ctx sessionctx.Context, ti ast.Ident, keyType ast return errors.Trace(dbterror.ErrOptOnCacheTable.GenWithStackByArgs("Create Index")) } + // Apply tidb_enable_descending_index gate at the SQL frontend, before + // the DDL job is enqueued. See ApplyDescGateToIndexParts. + ApplyDescGateToIndexParts(indexPartSpecifications) + metaBuildCtx := NewMetaBuildContextWithSctx(ctx) indexName, hiddenCols, err := checkIndexNameAndColumns(metaBuildCtx, t, indexName, indexPartSpecifications, model.ColumnarIndexTypeNA, ifNotExists) if err != nil { @@ -5009,6 +5181,20 @@ func (e *executor) createIndex(ctx sessionctx.Context, ti ast.Ident, keyType ast return errors.Trace(err) } + // Skip the cluster-version preflight for hypothetical indexes + // (CREATE HYPO INDEX). Hypo indexes are planner-only — they never + // produce KV mutations and never reach a TiKV coprocessor — so an + // older or unhealthy TiKV cluster has no bearing on whether the + // index can be defined. Without this guard, `CREATE HYPO INDEX + // ... DESC` would either fail on an out-of-date cluster or block + // for the PD round-trip even though it is a no-op on storage. + isHypo := indexOption != nil && indexOption.Tp == ast.IndexTypeHypo + if hasDescIndexColumn(indexColumns) && !isHypo { + if err := checkTiKVSupportsDescIndex(ctx); err != nil { + return err + } + } + if err = checkCreateGlobalIndex(ctx.GetSessionVars().StmtCtx.ErrCtx(), tblInfo, indexName.O, indexColumns, unique, indexOption != nil && indexOption.Global); err != nil { return err } diff --git a/pkg/ddl/index.go b/pkg/ddl/index.go index d030e41017b77..3100ee1f26cba 100644 --- a/pkg/ddl/index.go +++ b/pkg/ddl/index.go @@ -110,6 +110,29 @@ var DefaultCumulativeTimeout = 1 * time.Minute // exported for testing. var DefaultAnalyzeCheckInterval = 10 * time.Second +// ApplyDescGateToIndexParts implements the create-time-only semantics of the +// `tidb_enable_descending_index` system variable (pingcap/tidb#2519). +// +// It is called by SQL-frontend DDL paths (CreateTable, CreateIndex, +// CreatePrimaryKey, ...) on the AST IndexPartSpecification slice BEFORE the +// DDL job is enqueued. When the gate is off, every Desc flag is cleared so +// the rest of the DDL pipeline — including the DDL owner replaying the job — +// sees an unconditional ASC layout. When the gate is on, Desc is left intact. +// +// Re-reading the global atomic from buildIndexColumns at job-execution time +// would be wrong: a `SET GLOBAL tidb_enable_descending_index` between +// statement submission and DDL owner replay would silently flip the persisted +// schema. Baking the decision in here at submission time makes the result +// independent of any later toggling. +func ApplyDescGateToIndexParts(parts []*ast.IndexPartSpecification) { + if vardef.EnableDescendingIndex.Load() { + return + } + for _, ip := range parts { + ip.Desc = false + } +} + func buildIndexColumns(ctx *metabuild.Context, columns []*model.ColumnInfo, indexPartSpecifications []*ast.IndexPartSpecification, columnarIndexType model.ColumnarIndexType) ([]*model.IndexColumn, bool, error) { // Build offsets. idxParts := make([]*model.IndexColumn, 0, len(indexPartSpecifications)) @@ -132,6 +155,16 @@ func buildIndexColumns(ctx *metabuild.Context, columns []*model.ColumnInfo, inde if columnarIndexType == model.ColumnarIndexTypeFulltext && !types.IsString(col.FieldType.GetType()) { return nil, false, dbterror.ErrUnsupportedAddColumnarIndex.FastGenByArgs(fmt.Sprintf("only support string type, but this is type: %s", col.FieldType.String())) } + // Columnar indexes (vector / inverted / fulltext) cannot support + // descending order regardless of the gate. ApplyDescGateToIndexParts + // has already cleared Desc when the gate is off, so reaching this + // branch means the user explicitly asked for DESC on a columnar + // index with the gate ON — surface the limitation instead of + // silently dropping the keyword. Fulltext repeats this check in + // buildFullTextInfoWithCheck for defence in depth. + if ip.Desc && columnarIndexType != model.ColumnarIndexTypeNA { + return nil, false, dbterror.ErrUnsupportedAddColumnarIndex.FastGenByArgs(fmt.Sprintf("%s does not support DESC order", columnarIndexType.SQLName())) + } // return error in strict sql mode if columnarIndexType == model.ColumnarIndexTypeNA { @@ -175,10 +208,18 @@ func buildIndexColumns(ctx *metabuild.Context, columns []*model.ColumnInfo, inde ctx.AppendWarning(dbterror.ErrTooLongKey.FastGenByArgs(sumLength, maxIndexLength)) } + // ip.Desc has already been gated at the SQL frontend by + // ApplyDescGateToIndexParts: when tidb_enable_descending_index is OFF, + // every part comes in with Desc=false, preserving historical MySQL 5.7 + // behaviour for legacy migration scripts. We can therefore copy the + // flag through unconditionally — re-reading the global atomic here + // would race with `SET GLOBAL` between job submission and DDL owner + // replay. idxParts = append(idxParts, &model.IndexColumn{ Name: col.Name, Offset: col.Offset, Length: indexColLen, + Desc: ip.Desc, }) } @@ -431,6 +472,24 @@ func BuildIndexInfo( if err != nil { return nil, errors.Trace(err) } + // Reject DESC on a clustered PRIMARY KEY. For PKIsHandle the PK column is + // the row's int handle itself (encoded ascending and never going through + // the DESC-aware index encoder), and for IsCommonHandle the PK columns + // determine the row-key byte order, which the common-handle range builder + // does not yet encode with per-column DESC awareness. Allowing DESC here + // would silently miscompare row keys, so error out at DDL time. A + // NONCLUSTERED primary key is encoded just like any unique secondary + // index and supports DESC normally. + if isPrimary && idxInfo.HasDescColumn() && (tblInfo.IsCommonHandle || tblInfo.PKIsHandle) { + return nil, errors.Errorf( + "DESC is not supported on the columns of a clustered PRIMARY KEY; either drop the DESC keyword or declare the primary key as NONCLUSTERED") + } + // Bump the index-metadata version when any descending column is present so + // that an older TiDB reading this schema refuses to serve queries against + // the index rather than returning wrong results — see IndexInfo.IsServable. + if idxInfo.HasDescColumn() { + idxInfo.Version = 1 + } if indexOption != nil { idxInfo.Comment = indexOption.Comment diff --git a/pkg/distsql/request_builder.go b/pkg/distsql/request_builder.go index 97447aeea7fef..b2e2bc9f36e95 100644 --- a/pkg/distsql/request_builder.go +++ b/pkg/distsql/request_builder.go @@ -15,6 +15,7 @@ package distsql import ( + "bytes" "fmt" "math" "sort" @@ -701,15 +702,31 @@ func PartitionHandlesToKVRanges(handles []kv.Handle) ([]kv.KeyRange, []int) { return krs, hints } -// IndexRangesToKVRanges converts index ranges to "KeyRange". +// IndexRangesToKVRanges converts index ranges to "KeyRange". For indexes with +// any descending-order column, use IndexRangesToKVRangesWithDesc. func IndexRangesToKVRanges(dctx *distsqlctx.DistSQLContext, tid, idxID int64, ranges []*ranger.Range) (*kv.KeyRanges, error) { - return IndexRangesToKVRangesWithInterruptSignal(dctx, tid, idxID, ranges, nil, nil) + return IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, tid, idxID, nil, ranges, nil, nil) +} + +// IndexRangesToKVRangesWithDesc is like IndexRangesToKVRanges but threads a +// per-column desc flag slice down to the byte encoder. Columns marked +// descending are complemented and, if the resulting byte low/high pair is +// inverted, automatically swapped. See EncodeIndexKeyWithDesc. +func IndexRangesToKVRangesWithDesc(dctx *distsqlctx.DistSQLContext, tid, idxID int64, desc []bool, ranges []*ranger.Range) (*kv.KeyRanges, error) { + return IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, tid, idxID, desc, ranges, nil, nil) } // IndexRangesToKVRangesWithInterruptSignal converts index ranges to "KeyRange". // The process can be interrupted by set `interruptSignal` to true. func IndexRangesToKVRangesWithInterruptSignal(dctx *distsqlctx.DistSQLContext, tid, idxID int64, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { - keyRanges, err := indexRangesToKVRangesForTablesWithInterruptSignal(dctx, []int64{tid}, idxID, ranges, memTracker, interruptSignal) + return IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, tid, idxID, nil, ranges, memTracker, interruptSignal) +} + +// IndexRangesToKVRangesWithDescAndInterruptSignal is the full-featured entry +// point that takes both per-column desc flags and an interrupt signal. The +// other IndexRangesToKVRanges* helpers delegate here with sensible defaults. +func IndexRangesToKVRangesWithDescAndInterruptSignal(dctx *distsqlctx.DistSQLContext, tid, idxID int64, desc []bool, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { + keyRanges, err := indexRangesToKVRangesForTablesWithInterruptSignal(dctx, []int64{tid}, idxID, desc, ranges, memTracker, interruptSignal) if err != nil { return nil, err } @@ -719,13 +736,19 @@ func IndexRangesToKVRangesWithInterruptSignal(dctx *distsqlctx.DistSQLContext, t // IndexRangesToKVRangesForTables converts indexes ranges to "KeyRange". func IndexRangesToKVRangesForTables(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, ranges []*ranger.Range) (*kv.KeyRanges, error) { - return indexRangesToKVRangesForTablesWithInterruptSignal(dctx, tids, idxID, ranges, nil, nil) + return indexRangesToKVRangesForTablesWithInterruptSignal(dctx, tids, idxID, nil, ranges, nil, nil) } -// IndexRangesToKVRangesForTablesWithInterruptSignal converts indexes ranges to "KeyRange". -// The process can be interrupted by set `interruptSignal` to true. -func indexRangesToKVRangesForTablesWithInterruptSignal(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { - return indexRangesToKVWithoutSplit(dctx, tids, idxID, ranges, memTracker, interruptSignal) +// IndexRangesToKVRangesForTablesWithDesc is the desc-aware variant of +// IndexRangesToKVRangesForTables. +func IndexRangesToKVRangesForTablesWithDesc(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, desc []bool, ranges []*ranger.Range) (*kv.KeyRanges, error) { + return indexRangesToKVRangesForTablesWithInterruptSignal(dctx, tids, idxID, desc, ranges, nil, nil) +} + +// indexRangesToKVRangesForTablesWithInterruptSignal converts index ranges to +// "KeyRange". The process can be interrupted by setting `interruptSignal` to true. +func indexRangesToKVRangesForTablesWithInterruptSignal(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, desc []bool, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { + return indexRangesToKVWithoutSplit(dctx, tids, idxID, desc, ranges, memTracker, interruptSignal) } // CommonHandleRangesToKVRanges converts common handle ranges to "KeyRange". @@ -782,7 +805,7 @@ func VerifyTxnScope(txnScope string, physicalTableID int64, is infoschema.MetaOn return true } -func indexRangesToKVWithoutSplit(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { +func indexRangesToKVWithoutSplit(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, desc []bool, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { krs := make([][]kv.KeyRange, len(tids)) for i := range krs { krs[i] = make([]kv.KeyRange, 0, len(ranges)) @@ -796,7 +819,7 @@ func indexRangesToKVWithoutSplit(dctx *distsqlctx.DistSQLContext, tids []int64, // encodeIndexKey and EncodeIndexSeekKey is time-consuming, thus we need to // check the interrupt signal periodically. for i, ran := range ranges { - low, high, err := EncodeIndexKey(dctx, ran) + low, high, err := EncodeIndexKeyWithDesc(dctx, ran, desc) if err != nil { return nil, err } @@ -827,8 +850,30 @@ func indexRangesToKVWithoutSplit(dctx *distsqlctx.DistSQLContext, tids []int64, return kv.NewPartitionedKeyRanges(krs), nil } -// EncodeIndexKey gets encoded keys containing low and high +// EncodeIndexKey gets encoded keys containing low and high. +// For indexes that contain descending-order columns, use EncodeIndexKeyWithDesc. func EncodeIndexKey(dctx *distsqlctx.DistSQLContext, ran *ranger.Range) (low, high []byte, err error) { + return EncodeIndexKeyWithDesc(dctx, ran, nil) +} + +// EncodeIndexKeyWithDesc encodes a range's bounds into the byte low/high pair +// fed to TiKV, honoring per-column descending-order flags (pingcap/tidb#2519). +// +// When desc is nil or all-false the result is byte-identical to EncodeIndexKey: +// no allocations beyond what the ascending path already needs, no semantic +// change. When any column is descending, its memcomparable bytes are +// bitwise-complemented so forward byte-order iteration yields the declared +// direction. +// +// Descending encoding reverses comparison order, so a semantic range like +// `col IN (low, high)` can encode to byte low ≥ byte high. If that happens +// we swap the two byte strings and swap the LowExclude/HighExclude flags, +// restoring a well-ordered [byteLow, byteHigh] pair before the usual +// PrefixNext adjustments run. This one-shot swap is correct for single- +// column DESC ranges and for composite ranges whose differentiating column +// is DESC; point ranges (byteLow == byteHigh) skip the swap and behave +// identically to the ascending path. +func EncodeIndexKeyWithDesc(dctx *distsqlctx.DistSQLContext, ran *ranger.Range, desc []bool) (low, high []byte, err error) { tz := time.UTC errCtx := errctx.StrictNoWarningContext if dctx != nil { @@ -836,26 +881,68 @@ func EncodeIndexKey(dctx *distsqlctx.DistSQLContext, ran *ranger.Range) (low, hi errCtx = dctx.ErrCtx } - low, err = codec.EncodeKey(tz, nil, ran.LowVal...) + hasDesc := false + for _, d := range desc { + if d { + hasDesc = true + break + } + } + + if !hasDesc { + low, err = codec.EncodeKey(tz, nil, ran.LowVal...) + } else { + low, err = codec.EncodeKeyWithDesc(tz, nil, desc, ran.LowVal...) + } err = errCtx.HandleError(err) if err != nil { return nil, nil, err } - if ran.LowExclude { - low = kv.Key(low).PrefixNext() + if !hasDesc { + high, err = codec.EncodeKey(tz, nil, ran.HighVal...) + } else { + high, err = codec.EncodeKeyWithDesc(tz, nil, desc, ran.HighVal...) } - high, err = codec.EncodeKey(tz, nil, ran.HighVal...) err = errCtx.HandleError(err) if err != nil { return nil, nil, err } - if !ran.HighExclude { + lowExclude, highExclude := ran.LowExclude, ran.HighExclude + if hasDesc && bytes.Compare(low, high) > 0 { + // The semantic low encoded to a larger byte than the semantic high + // because descending columns flip byte ordering. Swap so the caller + // always sees a well-ordered [low, high] pair, and swap the exclude + // flags so PrefixNext gets applied to the correct side. + low, high = high, low + lowExclude, highExclude = highExclude, lowExclude + } + + if lowExclude { + low = kv.Key(low).PrefixNext() + } + if !highExclude { high = kv.Key(high).PrefixNext() } return low, high, nil } +// indexDescFlags returns a per-column descending-order slice derived from +// idxInfo, or nil when the index has no DESC columns. Returning nil lets +// downstream encoders stay on the ascending fast path. Used at distsql +// callsites that have an *IndexInfo in hand but were originally written +// against the legacy idxID-only range helpers. +func indexDescFlags(idxInfo *model.IndexInfo) []bool { + if idxInfo == nil || !idxInfo.HasDescColumn() { + return nil + } + desc := make([]bool, len(idxInfo.Columns)) + for i, c := range idxInfo.Columns { + desc[i] = c.Desc + } + return desc +} + // BuildTableRanges returns the key ranges encompassing the entire table, // and its partitions if exists. func BuildTableRanges(tbl *model.TableInfo) ([]kv.KeyRange, error) { @@ -871,7 +958,7 @@ func BuildTableRanges(tbl *model.TableInfo) ([]kv.KeyRange, error) { if idx.State != model.StatePublic || !idx.Global { continue } - idxRanges, err := IndexRangesToKVRanges(nil, tbl.ID, idx.ID, ranger.FullRange()) + idxRanges, err := IndexRangesToKVRangesWithDesc(nil, tbl.ID, idx.ID, indexDescFlags(idx), ranger.FullRange()) if err != nil { return nil, err } @@ -908,7 +995,7 @@ func appendRanges(tbl *model.TableInfo, tblID int64) ([]kv.KeyRange, error) { continue } ranges = ranger.FullRange() - idxRanges, err := IndexRangesToKVRanges(nil, tblID, index.ID, ranges) + idxRanges, err := IndexRangesToKVRangesWithDesc(nil, tblID, index.ID, indexDescFlags(index), ranges) if err != nil { return nil, errors.Trace(err) } diff --git a/pkg/executor/admin.go b/pkg/executor/admin.go index ed9814a08e264..88ae4353f9305 100644 --- a/pkg/executor/admin.go +++ b/pkg/executor/admin.go @@ -818,7 +818,7 @@ func (e *CleanupIndexExec) buildIndexScan(ctx context.Context, txn kv.Transactio } var builder distsql.RequestBuilder ranges := ranger.FullRange() - keyRanges, err := distsql.IndexRangesToKVRanges(e.Ctx().GetDistSQLCtx(), e.physicalID, e.index.Meta().ID, ranges) + keyRanges, err := distsql.IndexRangesToKVRangesWithDesc(e.Ctx().GetDistSQLCtx(), e.physicalID, e.index.Meta().ID, indexColDescFlags(e.index.Meta()), ranges) if err != nil { return nil, err } diff --git a/pkg/executor/builder.go b/pkg/executor/builder.go index a1655fb5765ef..958758a633583 100644 --- a/pkg/executor/builder.go +++ b/pkg/executor/builder.go @@ -4146,7 +4146,8 @@ func (builder *dataReaderBuilder) buildPartitionedTableReaderKVRangesForIndexJoi } if canLocateByKey { for pid, contents := range contentsByPID { - tmp, err := buildKvRangesForIndexJoin(dctx, rctx, pid, -1, contents, indexRanges, keyOff2IdxOff, cwc, kvRangeMemTracker, interruptSignal) + // Common handle PK is always ascending (no per-column DESC), so desc=nil. + tmp, err := buildKvRangesForIndexJoin(dctx, rctx, pid, -1, nil, contents, indexRanges, keyOff2IdxOff, cwc, kvRangeMemTracker, interruptSignal) if err != nil { return nil, err } @@ -4154,7 +4155,7 @@ func (builder *dataReaderBuilder) buildPartitionedTableReaderKVRangesForIndexJoi } } else { for _, p := range usedPartitionList { - tmp, err := buildKvRangesForIndexJoin(dctx, rctx, p.GetPhysicalID(), -1, lookUpContents, indexRanges, keyOff2IdxOff, cwc, kvRangeMemTracker, interruptSignal) + tmp, err := buildKvRangesForIndexJoin(dctx, rctx, p.GetPhysicalID(), -1, nil, lookUpContents, indexRanges, keyOff2IdxOff, cwc, kvRangeMemTracker, interruptSignal) if err != nil { return nil, err } @@ -5123,7 +5124,7 @@ func (builder *dataReaderBuilder) buildTableReaderForIndexJoin(ctx context.Conte tbInfo := e.table.Meta() if tbInfo.GetPartitionInfo() == nil || !builder.sctx.GetSessionVars().StmtCtx.UseDynamicPartitionPrune() { if v.IsCommonHandle { - kvRanges, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, getPhysicalTableID(e.table), -1, lookUpContents, indexRanges, keyOff2IdxOff, cwc, memTracker, interruptSignal) + kvRanges, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, getPhysicalTableID(e.table), -1, nil, lookUpContents, indexRanges, keyOff2IdxOff, cwc, memTracker, interruptSignal) if err != nil { return nil, err } @@ -5297,7 +5298,7 @@ func (builder *dataReaderBuilder) buildIndexReaderForIndexJoin(ctx context.Conte } tbInfo := e.table.Meta() if tbInfo.GetPartitionInfo() == nil || !builder.sctx.GetSessionVars().StmtCtx.UseDynamicPartitionPrune() { - kvRanges, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, e.physicalTableID, e.index.ID, lookUpContents, indexRanges, keyOff2IdxOff, cwc, memoryTracker, interruptSignal) + kvRanges, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, e.physicalTableID, e.index.ID, indexColDescFlags(e.index), lookUpContents, indexRanges, keyOff2IdxOff, cwc, memoryTracker, interruptSignal) if err != nil { return nil, err } @@ -5353,7 +5354,7 @@ func (builder *dataReaderBuilder) buildIndexLookUpReaderForIndexJoin(ctx context tbInfo := e.table.Meta() if tbInfo.GetPartitionInfo() == nil || !builder.sctx.GetSessionVars().StmtCtx.UseDynamicPartitionPrune() { - kvRange, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, getPhysicalTableID(e.table), e.index.ID, lookUpContents, indexRanges, keyOff2IdxOff, cwc, memTracker, interruptSignal) + kvRange, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, getPhysicalTableID(e.table), e.index.ID, indexColDescFlags(e.index), lookUpContents, indexRanges, keyOff2IdxOff, cwc, memTracker, interruptSignal) if err != nil { return nil, err } @@ -5485,7 +5486,9 @@ func buildRangesForIndexJoin(rctx *rangerctx.RangerContext, lookUpContents []*jo } // buildKvRangesForIndexJoin builds kv ranges for index join when the inner plan is index scan plan. -func buildKvRangesForIndexJoin(dctx *distsqlctx.DistSQLContext, pctx *rangerctx.RangerContext, tableID, indexID int64, lookUpContents []*join.IndexJoinLookUpContent, +// desc carries the per-column descending flags of the index (nil or all-false for +// ascending-only indexes, which preserves the original encoder fast path). +func buildKvRangesForIndexJoin(dctx *distsqlctx.DistSQLContext, pctx *rangerctx.RangerContext, tableID, indexID int64, desc []bool, lookUpContents []*join.IndexJoinLookUpContent, ranges []*ranger.Range, keyOff2IdxOff []int, cwc *physicalop.ColWithCmpFuncManager, memTracker *memory.Tracker, interruptSignal *atomic.Value, ) (_ []kv.KeyRange, err error) { kvRanges := make([]kv.KeyRange, 0, len(ranges)*len(lookUpContents)) @@ -5508,7 +5511,7 @@ func buildKvRangesForIndexJoin(dctx *distsqlctx.DistSQLContext, pctx *rangerctx. if indexID == -1 { tmpKvRanges, err = distsql.CommonHandleRangesToKVRanges(dctx, []int64{tableID}, ranges) } else { - tmpKvRanges, err = distsql.IndexRangesToKVRangesWithInterruptSignal(dctx, tableID, indexID, ranges, memTracker, interruptSignal) + tmpKvRanges, err = distsql.IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, tableID, indexID, desc, ranges, memTracker, interruptSignal) } if err != nil { return nil, err @@ -5556,10 +5559,24 @@ func buildKvRangesForIndexJoin(dctx *distsqlctx.DistSQLContext, pctx *rangerctx. tmpKeyRanges, err := distsql.CommonHandleRangesToKVRanges(dctx, []int64{tableID}, tmpDatumRanges) return tmpKeyRanges.FirstPartitionRange(), err } - tmpKeyRanges, err := distsql.IndexRangesToKVRangesWithInterruptSignal(dctx, tableID, indexID, tmpDatumRanges, memTracker, interruptSignal) + tmpKeyRanges, err := distsql.IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, tableID, indexID, desc, tmpDatumRanges, memTracker, interruptSignal) return tmpKeyRanges.FirstPartitionRange(), err } +// indexColDescFlags returns a per-column descending-order slice derived from +// idxInfo, or nil when the index has no DESC columns. Returning nil lets +// downstream encoders stay on the ascending fast path. +func indexColDescFlags(idxInfo *model.IndexInfo) []bool { + if idxInfo == nil || !idxInfo.HasDescColumn() { + return nil + } + desc := make([]bool, len(idxInfo.Columns)) + for i, c := range idxInfo.Columns { + desc[i] = c.Desc + } + return desc +} + func (b *executorBuilder) buildWindow(v *physicalop.PhysicalWindow) exec.Executor { childExec := b.build(v.Children()[0]) if b.err != nil { diff --git a/pkg/executor/distsql.go b/pkg/executor/distsql.go index b6329e59379c3..48af60e42ca05 100644 --- a/pkg/executor/distsql.go +++ b/pkg/executor/distsql.go @@ -336,7 +336,7 @@ func (e *IndexReaderExecutor) buildKVRangesForIndexReader() ([]kv.KeyRange, erro results := make([]kv.KeyRange, 0, len(groupedRanges)) for _, ranges := range groupedRanges { - kvRanges, err := buildKeyRanges(e.dctx, ranges, e.partRangeMap, tableIDs, e.index.ID, nil) + kvRanges, err := buildKeyRanges(e.dctx, ranges, e.partRangeMap, tableIDs, e.index.ID, indexColDescFlags(e.index), nil) if err != nil { return nil, err } @@ -619,6 +619,7 @@ func buildKeyRanges(dctx *distsqlctx.DistSQLContext, rangeOverrideForPartitionID map[int64][]*ranger.Range, physicalIDs []int64, indexID int64, + desc []bool, memTracker *memory.Tracker, ) ([][]kv.KeyRange, error) { results := make([][]kv.KeyRange, 0, len(physicalIDs)) @@ -633,7 +634,7 @@ func buildKeyRanges(dctx *distsqlctx.DistSQLContext, } results = append(results, rRanges.FirstPartitionRange()) } else { - singleRanges, err := distsql.IndexRangesToKVRangesWithInterruptSignal(dctx, physicalID, indexID, ranges, memTracker, nil) + singleRanges, err := distsql.IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, physicalID, indexID, desc, ranges, memTracker, nil) if err != nil { return nil, err } @@ -661,7 +662,7 @@ func (e *IndexLookUpExecutor) buildTableKeyRanges() (err error) { kvRanges := make([][]kv.KeyRange, 0, len(groupedRanges)) physicalTblIDsForPartitionKVRanges := make([]int64, 0, len(tableIDs)*len(groupedRanges)) for _, ranges := range groupedRanges { - kvRange, err := buildKeyRanges(e.dctx, ranges, e.partitionRangeMap, tableIDs, e.index.ID, e.memTracker) + kvRange, err := buildKeyRanges(e.dctx, ranges, e.partitionRangeMap, tableIDs, e.index.ID, indexColDescFlags(e.index), e.memTracker) if err != nil { return err } diff --git a/pkg/executor/executor_pkg_test.go b/pkg/executor/executor_pkg_test.go index 7b738ce65795e..6580e49add349 100644 --- a/pkg/executor/executor_pkg_test.go +++ b/pkg/executor/executor_pkg_test.go @@ -64,7 +64,7 @@ func TestBuildKvRangesForIndexJoinWithoutCwc(t *testing.T) { keyOff2IdxOff := []int{1, 3} ctx := mock.NewContext() - kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, joinKeyRows, indexRanges, keyOff2IdxOff, nil, nil, nil) + kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, nil, joinKeyRows, indexRanges, keyOff2IdxOff, nil, nil, nil) require.NoError(t, err) // Check the kvRanges is in order. for i, kvRange := range kvRanges { @@ -94,7 +94,7 @@ func TestBuildKvRangesForIndexJoinWithoutCwcAndWithMemoryTracker(t *testing.T) { keyOff2IdxOff := []int{1, 3} ctx := mock.NewContext() memTracker := memory.NewTracker(memory.LabelForIndexWorker, -1) - kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, joinKeyRows, indexRanges, keyOff2IdxOff, nil, memTracker, nil) + kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, nil, joinKeyRows, indexRanges, keyOff2IdxOff, nil, memTracker, nil) require.NoError(t, err) // Check the kvRanges is in order. for i, kvRange := range kvRanges { @@ -116,7 +116,7 @@ func TestBuildKvRangesForIndexJoinWithoutCwcAndWithMemoryTracker(t *testing.T) { keyOff2IdxOff := []int{1, 3} ctx := mock.NewContext() memTracker := memory.NewTracker(memory.LabelForIndexWorker, -1) - kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, joinKeyRows, indexRanges, keyOff2IdxOff, nil, memTracker, nil) + kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, nil, joinKeyRows, indexRanges, keyOff2IdxOff, nil, memTracker, nil) require.NoError(t, err) // Check the kvRanges is in order. for i, kvRange := range kvRanges { diff --git a/pkg/executor/index_merge_reader.go b/pkg/executor/index_merge_reader.go index 898d208bf1b38..31760ccfe0a51 100644 --- a/pkg/executor/index_merge_reader.go +++ b/pkg/executor/index_merge_reader.go @@ -191,7 +191,10 @@ func (e *IndexMergeReaderExecutor) Open(_ context.Context) (err error) { } for i, idx := range e.indexes { if idx != nil && idx.Global { - keyRange, _ := distsql.IndexRangesToKVRanges(e.sctx.GetDistSQLCtx(), e.table.Meta().ID, idx.ID, e.ranges[i]) + keyRange, err := distsql.IndexRangesToKVRangesWithDesc(e.sctx.GetDistSQLCtx(), e.table.Meta().ID, idx.ID, indexColDescFlags(idx), e.ranges[i]) + if err != nil { + return err + } e.partitionKeyRanges[i] = [][]kv.KeyRange{keyRange.FirstPartitionRange()} } else { for _, pKeyRanges := range tmpPartitionKeyRanges { @@ -253,7 +256,7 @@ func (e *IndexMergeReaderExecutor) buildKeyRangesForTable(tbl table.Table) (rang ranges = append(ranges, keyRanges) continue } - keyRange, err := distsql.IndexRangesToKVRanges(dctx, getPhysicalTableID(tbl), e.indexes[i].ID, e.ranges[i]) + keyRange, err := distsql.IndexRangesToKVRangesWithDesc(dctx, getPhysicalTableID(tbl), e.indexes[i].ID, indexColDescFlags(e.indexes[i]), e.ranges[i]) if err != nil { return nil, err } diff --git a/pkg/executor/infoschema_reader.go b/pkg/executor/infoschema_reader.go index 8b50ab852272e..6bb48e013265d 100644 --- a/pkg/executor/infoschema_reader.go +++ b/pkg/executor/infoschema_reader.go @@ -547,6 +547,11 @@ func (e *memtableRetriever) setDataForStatisticsInTable( subPart = key.Length } + collation := "A" + if key.Desc { + collation = "D" + } + record := types.MakeDatums( infoschema.CatalogVal, // TABLE_CATALOG schema.O, // TABLE_SCHEMA @@ -556,7 +561,7 @@ func (e *memtableRetriever) setDataForStatisticsInTable( index.Name.O, // INDEX_NAME i+1, // SEQ_IN_INDEX colName, // COLUMN_NAME - "A", // COLLATION + collation, // COLLATION 0, // CARDINALITY subPart, // SUB_PART nil, // PACKED diff --git a/pkg/executor/show.go b/pkg/executor/show.go index 790bb2f953892..1b0691b49789d 100644 --- a/pkg/executor/show.go +++ b/pkg/executor/show.go @@ -885,13 +885,17 @@ func (e *ShowExec) fetchShowIndex() error { ndv = colStats.NDV } + collation := "A" + if col.Desc { + collation = "D" + } e.appendRow([]any{ tb.Meta().Name.O, // Table nonUniq, // Non_unique idx.Meta().Name.O, // Key_name i + 1, // Seq_in_index colName, // Column_name - "A", // Collation + collation, // Collation ndv, // Cardinality subPart, // Sub_part nil, // Packed @@ -1250,6 +1254,9 @@ func constructResultOfShowCreateTable(ctx sessionctx.Context, dbName *ast.CIStr, colInfo = fmt.Sprintf("%s(%s)", colInfo, strconv.Itoa(c.Length)) } } + if c.Desc { + colInfo += " DESC" + } cols = append(cols, colInfo) } if idxInfo.VectorInfo != nil { diff --git a/pkg/executor/slow_query_sql_test.go b/pkg/executor/slow_query_sql_test.go index f9fc328759dad..732ef26c4a309 100644 --- a/pkg/executor/slow_query_sql_test.go +++ b/pkg/executor/slow_query_sql_test.go @@ -459,6 +459,16 @@ func TestWarningsInSlowQuery(t *testing.T) { } } +// checkStorageEngines polls slow_query because the prior MustExec's slow-log +// write isn't always immediately visible to the read here under CI load +// (issue #66727). +func checkStorageEngines(t *testing.T, tk *testkit.TestKit, where, expected string) { + t.Helper() + tk.EventuallyMustQueryAndCheck( + "select storage_from_kv, storage_from_mpp from information_schema.slow_query where "+where, + nil, testkit.Rows(expected), 2*time.Second, 50*time.Millisecond) +} + func TestStorageEnginesInSlowQuery(t *testing.T) { originCfg := config.GetGlobalConfig() newCfg := *originCfg @@ -466,11 +476,19 @@ func TestStorageEnginesInSlowQuery(t *testing.T) { require.NoError(t, err) newCfg.Log.SlowQueryFile = f.Name() config.StoreGlobalConfig(&newCfg) - defer func() { + t.Cleanup(func() { + if t.Failed() { + // On failure, dump the slow log to disambiguate a missing entry from one + // that's present but doesn't match the expected pattern (issue #66727). + if data, err := os.ReadFile(f.Name()); err == nil { + t.Logf("slow log contents (%d bytes):\n%s", len(data), data) + } + } + config.StoreGlobalConfig(originCfg) require.NoError(t, f.Close()) require.NoError(t, os.Remove(newCfg.Log.SlowQueryFile)) - }() + }) require.NoError(t, logutil.InitLogger(newCfg.Log.ToLogConfig())) store, dom := testkit.CreateMockStoreAndDomain(t, mockstore.WithMockTiFlash(2)) tk := testkit.NewTestKit(t, store) @@ -483,16 +501,12 @@ func TestStorageEnginesInSlowQuery(t *testing.T) { // Query that doesn't read from any storage engines tk.MustExec("select 1") - tk.MustQuery("select storage_from_kv, storage_from_mpp from information_schema.slow_query " + - "where query = 'select 1;'"). - Check(testkit.Rows("0 0")) + checkStorageEngines(t, tk, "query = 'select 1;'", "0 0") // Query that only reads from TiKV tk.MustExec("create table t_tikv (a int)") tk.MustExec("select /*+ read_from_storage(tikv[t_tikv]) */ a from t_tikv") - tk.MustQuery("select storage_from_kv, storage_from_mpp from information_schema.slow_query " + - "where query like 'select%t_tikv;'"). - Check(testkit.Rows("1 0")) + checkStorageEngines(t, tk, "query like 'select%t_tikv;'", "1 0") // Query that only reads from TiFlash tk.MustExec("create table t_tiflash (a int)") @@ -500,57 +514,43 @@ func TestStorageEnginesInSlowQuery(t *testing.T) { tb := external.GetTableByName(t, tk, "test", "t_tiflash") require.NoError(t, dom.DDLExecutor().UpdateTableReplicaInfo(tk.Session(), tb.Meta().ID, true)) tk.MustExec("select /*+ read_from_storage(tiflash[t_tiflash]) */ a from t_tiflash;") - tk.MustQuery("select storage_from_kv, storage_from_mpp from information_schema.slow_query " + - "where query like 'select%t_tiflash;'"). - Check(testkit.Rows("0 1")) + checkStorageEngines(t, tk, "query like 'select%t_tiflash;'", "0 1") // Query that reads from both TiKV and TiFlash tk.MustExec("select /*+ read_from_storage(tikv[t_tikv]) */ t_tikv.a, /*+ read_from_storage(tiflash[t_tiflash]) */ t_tiflash.a from t_tikv, t_tiflash") - tk.MustQuery("select storage_from_kv, storage_from_mpp from information_schema.slow_query " + - "where query like 'select%t_tikv, t_tiflash;'"). - Check(testkit.Rows("1 1")) + checkStorageEngines(t, tk, "query like 'select%t_tikv, t_tiflash;'", "1 1") // Point get queries should register as reading from TiKV tk.MustExec("create table t_pointget (a int primary key)") query := "select a from t_pointget where a = 1" tk.MustHavePlan(query, "Point_Get") tk.MustExec(query) - tk.MustQuery("select storage_from_kv, storage_from_mpp from information_schema.slow_query " + - "where query like 'select%t_pointget%;'"). - Check(testkit.Rows("1 0")) + checkStorageEngines(t, tk, "query like 'select%t_pointget%;'", "1 0") // Index readers should register as reading from TiKV tk.MustExec("create table t_index_reader (a int, key (a))") query = "select a from t_index_reader where a = 1" tk.MustHavePlan(query, "IndexReader") tk.MustQuery(query) - tk.MustQuery("select storage_from_kv, storage_from_mpp from information_schema.slow_query " + - "where query like 'select%t_index_reader%;'"). - Check(testkit.Rows("1 0")) + checkStorageEngines(t, tk, "query like 'select%t_index_reader%;'", "1 0") // Index lookups should register as reading from TiKV tk.MustExec("create table t_index_lookup (a int, b int, index (a))") tk.MustIndexLookup("select a, b from t_index_lookup where a = 1") - tk.MustQuery("select storage_from_kv, storage_from_mpp from information_schema.slow_query " + - "where query like 'select%t_index_lookup%;'"). - Check(testkit.Rows("1 0")) + checkStorageEngines(t, tk, "query like 'select%t_index_lookup%;'", "1 0") // Index merge readers should register as reading from TiKV tk.MustExec("create table t_index_merge(a int, b int, primary key (a), unique key (b))") query = "select /*+ use_index_merge(t_index_merge, a, b) */ * from t_index_merge where a = 1 or b = 1" tk.MustHavePlan(query, "IndexMerge") tk.MustExec(query) - tk.MustQuery("select storage_from_kv, storage_from_mpp from information_schema.slow_query " + - "where query like 'select%t_index_merge%;'"). - Check(testkit.Rows("1 0")) + checkStorageEngines(t, tk, "query like 'select%t_index_merge%;'", "1 0") // TABLESAMPLE queries should register as reading from TiKV query = "select * from t_tikv tablesample regions();" tk.MustHavePlan(query, "TableSample") tk.MustExec(query) - tk.MustQuery("select storage_from_kv, storage_from_mpp from information_schema.slow_query " + - "where query like 'select%tablesample%;'"). - Check(testkit.Rows("1 0")) + checkStorageEngines(t, tk, "query like 'select%tablesample%;'", "1 0") } func TestSessionConnectAttrsInSlowQuery(t *testing.T) { diff --git a/pkg/lightning/backend/local/job_worker.go b/pkg/lightning/backend/local/job_worker.go index 21dc25ccfef47..1b25153a89317 100644 --- a/pkg/lightning/backend/local/job_worker.go +++ b/pkg/lightning/backend/local/job_worker.go @@ -16,6 +16,7 @@ package local import ( "context" + goerrors "errors" "io" "strings" "sync" @@ -197,7 +198,7 @@ func (w *regionJobBaseWorker) runJob(ctx context.Context, job *regionJob) error job.convertStageTo(needRescan) return nil } - res, err := w.writeFn(ctx, job) + res, err := w.writeWithTimeout(ctx, job) err = injectfailpoint.DXFRandomErrorWithOnePercentWrapper(err) if err != nil { if !w.isRetryableImportTiKVError(err) { @@ -266,6 +267,34 @@ func (w *regionJobBaseWorker) runJob(ctx context.Context, job *regionJob) error } } +func (w *regionJobBaseWorker) writeWithTimeout( + ctx context.Context, + job *regionJob, +) (ret *tikvWriteResult, err error) { + // set a timeout for the write operation, if it takes too long, we will + // return with common.ErrWriteTooSlow and let caller retry the whole job + // instead of being stuck forever. + timeout := 15 * time.Minute + failpoint.Inject("shortWaitNTimeout", func(val failpoint.Value) { + ms := val.(int) + timeout = time.Duration(ms) * time.Millisecond + }) + ctx, cancel := context.WithTimeoutCause(ctx, timeout, common.ErrWriteTooSlow) + defer cancel() + + ret, err = w.writeFn(ctx, job) + if err == nil { + return ret, nil + } + if errors.Cause(err) == context.DeadlineExceeded { + if cause := context.Cause(ctx); goerrors.Is(cause, common.ErrWriteTooSlow) { + tidblogutil.Logger(ctx).Info("experiencing a wait timeout while writing to TiKV") + err = errors.Trace(cause) + } + } + return ret, err +} + func (*regionJobBaseWorker) isRetryableImportTiKVError(err error) bool { err = errors.Cause(err) // io.EOF is not retryable in normal case diff --git a/pkg/lightning/backend/local/job_worker_test.go b/pkg/lightning/backend/local/job_worker_test.go index bd2acbf884cb7..43851dc26ea5b 100644 --- a/pkg/lightning/backend/local/job_worker_test.go +++ b/pkg/lightning/backend/local/job_worker_test.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/tidb/pkg/ingestor/errdef" "github.com/pingcap/tidb/pkg/ingestor/ingestcli" ingestclimock "github.com/pingcap/tidb/pkg/ingestor/ingestcli/mock" + "github.com/pingcap/tidb/pkg/lightning/common" "github.com/pingcap/tidb/pkg/resourcemanager/pool/workerpool" rcmgrutil "github.com/pingcap/tidb/pkg/resourcemanager/util" "github.com/pingcap/tidb/pkg/testkit/testfailpoint" @@ -305,6 +306,25 @@ func TestCloudRegionJobWorker(t *testing.T) { require.True(t, ctrl.Satisfied()) }) + t.Run("timeout while creating ingest client should be handled by runJob wrapper", func(t *testing.T) { + job := ®ionJob{ + keyRange: engineapi.Range{Start: []byte("a"), End: []byte("z")}, + stage: regionScanned, + ingestData: mockIngestData{{[]byte("a"), []byte("a")}}, + region: &split.RegionInfo{Region: &metapb.Region{ + Id: 1, Peers: []*metapb.Peer{{StoreId: 1}}, + }, Leader: &metapb.Peer{StoreId: 1}}, + } + mockIngestCli.EXPECT().WriteClient(gomock.Any(), gomock.Any()).Return(nil, context.DeadlineExceeded) + ctx, cancel := context.WithTimeoutCause(context.Background(), 0, common.ErrWriteTooSlow) + defer cancel() + err := cloudW.runJob(ctx, job) + require.NoError(t, err) + require.Equal(t, needRescan, job.stage) + require.ErrorIs(t, job.lastRetryableErr, common.ErrWriteTooSlow) + require.True(t, ctrl.Satisfied()) + }) + t.Run("failed to write data", func(t *testing.T) { job := ®ionJob{ keyRange: engineapi.Range{Start: []byte("a"), End: []byte("z")}, diff --git a/pkg/lightning/backend/local/region_job.go b/pkg/lightning/backend/local/region_job.go index bd610786078fc..ac9a48888aa22 100644 --- a/pkg/lightning/backend/local/region_job.go +++ b/pkg/lightning/backend/local/region_job.go @@ -339,34 +339,6 @@ func (local *Backend) doWrite(ctx context.Context, j *regionJob) (ret *tikvWrite failpoint.Return(front.write.result, err) }) - var cancel context.CancelFunc - // set a timeout for the write operation, if it takes too long, we will return with common.ErrWriteTooSlow and let caller retry the whole job instead of being stuck forever. - timeout := 15 * time.Minute - ctx, cancel = context.WithTimeoutCause(ctx, timeout, common.ErrWriteTooSlow) - defer cancel() - - // A defer function to handle all DeadlineExceeded errors that may occur - // during the write operation using this context with 15 minutes timeout. - // When the error is "context deadline exceeded", we will check if the cause - // is common.ErrWriteTooSlow and return the common.ErrWriteTooSlow instead so - // our caller would be able to retry this doWrite operation. By doing this - // defer we are hoping to handle all DeadlineExceeded error during this - // write, either from gRPC stream or write limiter WaitN operation. - wctx := ctx - defer func() { - if err == nil { - return - } - if errors.Cause(err) == context.DeadlineExceeded { - if cause := context.Cause(wctx); goerrors.Is(cause, common.ErrWriteTooSlow) { - tidblogutil.Logger(ctx).Info("Experiencing a wait timeout while writing to tikv", - zap.Int("store-write-bwlimit", local.BackendConfig.StoreWriteBWLimit), - zap.Int("limit-size", local.writeLimiter.Limit())) - err = errors.Trace(cause) // return the common.ErrWriteTooSlow instead to let caller retry it - } - } - }() - apiVersion := local.tikvCodec.GetAPIVersion() clientFactory := local.importClientFactory kvBatchSize := local.KVWriteBatchSize @@ -485,19 +457,6 @@ func (local *Backend) doWrite(ctx context.Context, j *regionJob) (ret *tikvWrite regionMaxSize = j.regionSplitSize * 4 / 3 } - // preparation work for the write timeout fault injection, only enabled if the following failpoint is enabled - wcancel := func() {} - failpoint.Inject("shortWaitNTimeout", func(val failpoint.Value) { - var innerTimeout time.Duration - // GO_FAILPOINTS action supplies the duration in - ms, _ := val.(int) - innerTimeout = time.Duration(ms) * time.Millisecond - tidblogutil.Logger(ctx).Info("Injecting a timeout to write context.") - wctx, wcancel = context.WithTimeoutCause( - ctx, innerTimeout, common.ErrWriteTooSlow) - }) - defer wcancel() - flushKVs := func() error { req.Chunk.(*sst.WriteRequest_Batch).Batch.Pairs = pairs[:count] preparedMsg := &grpc.PreparedMsg{} @@ -506,11 +465,15 @@ func (local *Backend) doWrite(ctx context.Context, j *regionJob) (ret *tikvWrite if err := preparedMsg.Encode(clients[0], req); err != nil { return err } - + failpoint.Inject("shortWaitNTimeout", func(val failpoint.Value) { + // add 20 to make sure the ctx is timed out. + ms := val.(int) + 20 + time.Sleep(time.Duration(ms) * time.Millisecond) + }) for i := range clients { // original ctx would be used when failpoint is not enabled // that new context would be used when failpoint is enabled - err := writeLimiter.WaitN(wctx, allPeers[i].StoreId, int(size)) + err := writeLimiter.WaitN(ctx, allPeers[i].StoreId, int(size)) if err != nil { // We expect to encounter two types of errors here: // 1. context.DeadlineExceeded — occurs when the calculated delay is @@ -520,13 +483,8 @@ func (local *Backend) doWrite(ctx context.Context, j *regionJob) (ret *tikvWrite // path triggered when the delay already exceeds the remaining // time for context before sleeping. // - // Unfortunately, we cannot precisely control when the context will - // expire, so both scenarios are valid and expected. - // Fortunately, the "rate: Wait" error is already treated as - // retryable, so we only need to explicitly handle - // context.DeadlineExceeded here. - // We rely on the defer function at the top of doWrite to handle it - // for us in general. + // "rate: Wait" error is already treated as retryable, + // writeWithTimeout will handle the context.DeadlineExceeded. return errors.Trace(err) } if err := clients[i].SendMsg(preparedMsg); err != nil { diff --git a/pkg/meta/model/index.go b/pkg/meta/model/index.go index 82e3a618291d3..bd646a0b00be5 100644 --- a/pkg/meta/model/index.go +++ b/pkg/meta/model/index.go @@ -19,6 +19,7 @@ import ( "strings" "sync/atomic" + "github.com/pingcap/errors" "github.com/pingcap/tidb/pkg/config/kerneltype" "github.com/pingcap/tidb/pkg/parser" "github.com/pingcap/tidb/pkg/parser/ast" @@ -284,6 +285,51 @@ type IndexInfo struct { // 2=v2 with partition ID in key only (TODO). GlobalIndexVersion uint8 `json:"global_index_version,omitempty"` RegionSplitPolicy *RegionSplitPolicy `json:"region_split_policy,omitempty"` // RegionSplitPolicy is the persistent split policy. + // Version is a monotonic feature fence for the index metadata format. + // 0 = legacy; all features representable in older TiDB versions. + // 1 = uses per-column descending order (any Columns[i].Desc == true). + // An older TiDB reading an index with Version greater than maxKnownIndexVersion + // must refuse to serve queries against it to avoid returning wrong results. + Version uint8 `json:"version,omitempty"` +} + +// maxKnownIndexVersion is the highest IndexInfo.Version this TiDB binary understands. +// Bump this when adding index-level features that older binaries cannot safely ignore. +const maxKnownIndexVersion uint8 = 1 + +// IsServable reports whether this TiDB binary can safely serve queries against +// the index. It returns false if the index metadata uses a newer format version +// than this binary knows about — the caller should surface a clear error rather +// than risk returning wrong rows. +func (index *IndexInfo) IsServable() bool { + return index.Version <= maxKnownIndexVersion +} + +// UnservableErr returns the error to surface when an unservable index is +// encountered during query planning or DDL. The message names the index and +// the version mismatch so an operator can tell whether to upgrade TiDB or +// drop the index before downgrading. +func (index *IndexInfo) UnservableErr(tableName string) error { + return errors.Errorf( + "index `%s`%s uses metadata version %d, which is newer than this TiDB binary supports (max %d); upgrade TiDB or DROP INDEX to roll back", + index.Name.O, formatTableQualifier(tableName), index.Version, maxKnownIndexVersion) +} + +func formatTableQualifier(tableName string) string { + if tableName == "" { + return "" + } + return " on table `" + tableName + "`" +} + +// HasDescColumn reports whether any column of the index is stored in descending order. +func (index *IndexInfo) HasDescColumn() bool { + for _, col := range index.Columns { + if col.Desc { + return true + } + } + return false } // Hash64 implement HashEquals interface. @@ -498,6 +544,10 @@ type IndexColumn struct { Length int `json:"length"` // Whether this index column use changing type UseChangingType bool `json:"using_changing_type,omitempty"` + // Desc indicates that this index column is stored in descending order. + // False (the zero value) means ascending order; this preserves backward + // compatibility with schemas written before descending indexes were supported. + Desc bool `json:"desc,omitempty"` } // Clone clones IndexColumn. diff --git a/pkg/meta/model/index_test.go b/pkg/meta/model/index_test.go index 546975050a640..e340392d257cf 100644 --- a/pkg/meta/model/index_test.go +++ b/pkg/meta/model/index_test.go @@ -15,6 +15,7 @@ package model import ( + "encoding/json" "fmt" "testing" @@ -76,3 +77,68 @@ func TestGlobalIndexV1SupportedForNextGen(t *testing.T) { require.True(t, GetGlobalIndexV1Supported()) } } + +func TestIndexColumnDescRoundTrip(t *testing.T) { + // Ascending is the zero value and must be omitted from JSON for backward + // compatibility with schemas written before descending indexes were supported. + ascCol := IndexColumn{Name: ast.NewCIStr("a"), Offset: 0, Length: -1} + ascJSON, err := json.Marshal(ascCol) + require.NoError(t, err) + require.NotContains(t, string(ascJSON), "desc") + + descCol := IndexColumn{Name: ast.NewCIStr("b"), Offset: 1, Length: -1, Desc: true} + descJSON, err := json.Marshal(descCol) + require.NoError(t, err) + require.Contains(t, string(descJSON), `"desc":true`) + + // Unknown fields from a newer TiDB must decode cleanly on an older binary. + var decoded IndexColumn + require.NoError(t, json.Unmarshal(descJSON, &decoded)) + require.True(t, decoded.Desc) + + // Clone preserves Desc. + require.True(t, descCol.Clone().Desc) +} + +func TestIndexInfoHasDescColumn(t *testing.T) { + c0 := newColumnForTest(0, 0) + c1 := newColumnForTest(1, 1) + + idx := newIndexForTest(10, c0, c1) + require.False(t, idx.HasDescColumn()) + + idx.Columns[1].Desc = true + require.True(t, idx.HasDescColumn()) +} + +func TestIndexInfoIsServable(t *testing.T) { + idx := newIndexForTest(1, newColumnForTest(0, 0)) + + // Version 0 (legacy) and 1 (introduces Desc) are both serviceable today. + require.True(t, idx.IsServable()) + idx.Version = 1 + require.True(t, idx.IsServable()) + + // A newer-than-known version must be rejected so old binaries refuse to + // serve queries against indexes whose semantics they do not understand. + idx.Version = 255 + require.False(t, idx.IsServable()) +} + +func TestIndexInfoUnservableErr(t *testing.T) { + idx := newIndexForTest(7, newColumnForTest(0, 0)) + idx.Version = 99 + + err := idx.UnservableErr("orders") + require.Error(t, err) + // The message must name the index, the table, and the version mismatch + // so an operator can decide whether to upgrade or DROP INDEX. + msg := err.Error() + require.Contains(t, msg, idx.Name.O) + require.Contains(t, msg, "orders") + require.Contains(t, msg, "99") + require.Contains(t, msg, "DROP INDEX") + + // Empty table name is allowed (callers without context just leave it off). + require.NotPanics(t, func() { _ = idx.UnservableErr("") }) +} diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go index 2ab5584966976..91c99fe127c70 100644 --- a/pkg/planner/core/find_best_task.go +++ b/pkg/planner/core/find_best_task.go @@ -1080,10 +1080,12 @@ func matchProperty(ds *logicalop.DataSource, path *util.AccessPath, prop *proper } // Match SortItems physical property. - all, _ := prop.AllSameOrder() - // When the prop is empty or `all` is false, `matchProperty` is better to be `PropNotMatched` because - // it needs not to keep order for index scan. - if prop.IsSortItemEmpty() || !all { + // We no longer reject mixed-direction sort items up-front (pingcap/tidb#2519): + // the per-item loop below decides whether a forward or reverse scan can + // satisfy the request, and rejects the path only when the per-column + // mismatch pattern is itself mixed (some columns naturally satisfied, + // others requiring a flipped scan — unsatisfiable by any single scan). + if prop.IsSortItemEmpty() { return property.PropNotMatched } @@ -1153,11 +1155,40 @@ func matchProperty(ds *logicalop.DataSource, path *util.AccessPath, prop *proper matchResult := property.PropMatched groupByColIdxs := make([]int, 0) colIdx := 0 + // matchedScanDescSet/matchedScanDesc record whether a forward or a reverse + // cop scan satisfies the property. For each (sortItem, idxCol) pair the + // "natural" direction is forward iff sortItem.Desc == idxCol.Desc; + // otherwise the scan would have to be reversed for that single column. + // All matched pairs must agree — if some need forward and others reverse, + // no single scan direction can satisfy the property and we reject the + // path. This is the unlock for mixed-direction composites such as + // INDEX(a ASC, b DESC) satisfying ORDER BY a ASC, b DESC with a forward + // scan, or INDEX(a, b) (both ASC) satisfying ORDER BY a DESC, b DESC + // with a reverse scan. See pingcap/tidb#2519. + var matchedScanDesc bool + matchedScanDescSet := false for _, sortItem := range prop.SortItems { found := false for ; colIdx < len(idxCols); colIdx++ { // Case 1: this sort item is satisfied by the index column, go to match the next sort item. if idxColLens[colIdx] == types.UnspecifiedLength && sortItem.Col.EqualColumn(idxCols[colIdx]) { + // Index columns at colIdx >= len(path.Index.Columns) are the + // appended CommonHandle PK suffix. DDL rejects DESC on the + // columns of a clustered PRIMARY KEY (see BuildIndexInfo in + // pkg/ddl/index.go), so those suffix columns are guaranteed + // to be ascending and the default below is safe. If that + // guard is ever loosened, this code must be updated to read + // per-column direction from the primary index metadata. + thisIdxDesc := false + if path.Index != nil && colIdx < len(path.Index.Columns) { + thisIdxDesc = path.Index.Columns[colIdx].Desc + } + thisScanDesc := sortItem.Desc != thisIdxDesc + if matchedScanDescSet && thisScanDesc != matchedScanDesc { + return property.PropNotMatched + } + matchedScanDesc = thisScanDesc + matchedScanDescSet = true found = true colIdx++ break @@ -1220,9 +1251,13 @@ func matchProperty(ds *logicalop.DataSource, path *util.AccessPath, prop *proper if len(groups) > 0 { path.GroupedRanges = groups path.GroupByColIdxs = groupByColIdxs + path.MatchedScanDesc = matchedScanDesc return property.PropMatchedNeedMergeSort } } + if matchResult == property.PropMatched { + path.MatchedScanDesc = matchedScanDesc + } return matchResult } @@ -2978,7 +3013,10 @@ func convertToBatchPointGet(ds *logicalop.DataSource, prop *property.PhysicalPro } if !prop.IsSortItemEmpty() { batchPointGetPlan.KeepOrder = true - batchPointGetPlan.Desc = prop.SortItems[0].Desc + // matchProperty already encoded "do we need to flip the byte + // scan?" into MatchedScanDesc, accounting for any descending + // columns in the index. Just consume that decision. + batchPointGetPlan.Desc = candidate.path.MatchedScanDesc } if candidate.path.IsSingleScan { batchPointGetPlan.SetAccessCols(candidate.path.IdxCols) diff --git a/pkg/planner/core/operator/physicalop/physical_index_scan.go b/pkg/planner/core/operator/physicalop/physical_index_scan.go index 0b5f17b8f1016..6317fe2a06732 100644 --- a/pkg/planner/core/operator/physicalop/physical_index_scan.go +++ b/pkg/planner/core/operator/physicalop/physical_index_scan.go @@ -696,7 +696,18 @@ func GetOriginalPhysicalIndexScan(ds *logicalop.DataSource, prop *property.Physi } // Index scan should maintain order (true for both normal sorting via SortItems and partial order via PartialOrderInfo) if prop.NeedKeepOrder() { - is.Desc = prop.GetSortDescForKeepOrder() + // matchProperty already determined whether the byte scan must flow + // forward or in reverse to satisfy this property, including the + // per-column XOR with descending storage directions. Consume that + // decision directly. PartialOrderInfo cases (TopN with prefix + // ordering) still rely on the uniform-direction reverse-scan path + // because matchPartialOrderProperty does not yet record per-column + // directions; fall back to the legacy formula there. + if path != nil && len(prop.SortItems) > 0 { + is.Desc = path.MatchedScanDesc + } else { + is.Desc = prop.GetSortDescForKeepOrder() + } is.KeepOrder = true } return is diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go index b03c46464a0a8..46ab6fcccde83 100644 --- a/pkg/planner/core/planbuilder.go +++ b/pkg/planner/core/planbuilder.go @@ -1271,6 +1271,34 @@ func checkAutoForceIndexLookUpPushDown(ctx base.PlanContext, tblInfo *model.Tabl return checkIndexLookUpPushDownSupported(ctx, tblInfo, index, true) } +// checkAllIndicesServable returns an error if tableInfo carries any +// writable index whose metadata format is newer than this TiDB binary +// understands. Used by DML builders (INSERT / REPLACE / UPDATE / DELETE +// / LOAD DATA / IMPORT INTO) to fail fast before reaching table-level +// mutation paths that would silently mis-maintain the index. +// +// "Writable" here matches `tables.IsIndexWritable`, i.e. every state +// except StateDeleteOnly / StateDeleteReorganization. Restricting to +// StatePublic is wrong: StateWriteOnly and StateWriteReorganization +// indexes (mid-DDL) are also written by `addRowIndices` and +// `rebuildUpdateRecordIndices` in pkg/table/tables/tables.go via the +// same IsIndexWritable predicate. An older TiDB binary running DML +// against a table whose new DESC index is in WriteOnly would otherwise +// pass the planner gate and corrupt the index in the mutation path. +func checkAllIndicesServable(tableInfo *model.TableInfo) error { + for _, idx := range tableInfo.Indices { + // Mirror tables.IsIndexWritable on the bare IndexInfo, so the + // fence stays in lockstep with the actual write predicate. + if idx.State == model.StateDeleteOnly || idx.State == model.StateDeleteReorganization { + continue + } + if !idx.IsServable() { + return idx.UnservableErr(tableInfo.Name.O) + } + } + return nil +} + func getPossibleAccessPaths(ctx base.PlanContext, tableHints *hint.PlanHints, indexHints []*ast.IndexHint, tbl table.Table, dbName, tblName ast.CIStr, check bool, hasFlagPartitionProcessor bool) ([]*util.AccessPath, error) { tblInfo := tbl.Meta() publicPaths := make([]*util.AccessPath, 0, len(tblInfo.Indices)+2) @@ -1346,6 +1374,18 @@ func getPossibleAccessPaths(ctx base.PlanContext, tableHints *hint.PlanHints, in continue } } + // Refuse to plan against an index whose metadata format is newer + // than this binary understands (pingcap/tidb#2519). Silently + // skipping would risk wrong results — surface a clear error so + // the operator upgrades TiDB or drops the index. + // + // This check fires AFTER latest-index reconciliation so a snapshot + // copy of an index that the latest schema has dropped or moved out + // of StatePublic gets skipped before we error on its (now + // irrelevant) format version. + if !index.IsServable() { + return nil, index.UnservableErr(tblName.O) + } if index.InvertedInfo != nil { invertedIndexes[index.Name.L] = struct{}{} continue @@ -4079,6 +4119,14 @@ func (b *PlanBuilder) buildInsert(ctx context.Context, insert *ast.InsertStmt) ( } return nil, err } + // Refuse INSERT/REPLACE if any of the table's indexes uses a metadata + // version newer than this TiDB binary understands — maintaining such an + // index would risk wrong rows or mismatched encoding (pingcap/tidb#2519). + // SELECT/UPDATE/DELETE plans are guarded by getPossibleAccessPaths, but + // INSERT VALUES never enumerates access paths, so check explicitly here. + if err := checkAllIndicesServable(tableInfo); err != nil { + return nil, err + } // Build Schema with DBName otherwise ColumnRef with DBName cannot match any Column in Schema. schema, names, err := expression.TableInfo2SchemaAndNames(b.ctx.GetExprCtx(), tn.Schema, tableInfo) if err != nil { @@ -4490,6 +4538,14 @@ func (b *PlanBuilder) buildLoadData(ctx context.Context, ld *ast.LoadDataStmt) ( options = append(options, &loadDataOpt) } tnW := b.resolveCtx.GetTableName(ld.Table) + // Same servability fence as INSERT (pingcap/tidb#2519): LOAD DATA writes + // every secondary index of the target table, so an unservable index + // would be silently mis-maintained without this guard. + if tnW != nil && tnW.TableInfo != nil { + if err := checkAllIndicesServable(tnW.TableInfo); err != nil { + return nil, err + } + } p := LoadData{ FileLocRef: ld.FileLocRef, OnDuplicate: ld.OnDuplicate, @@ -4791,6 +4847,15 @@ func (b *PlanBuilder) buildImportInto(ctx context.Context, ld *ast.ImportIntoStm db := b.ctx.GetSessionVars().CurrentDB return nil, infoschema.ErrTableNotExists.FastGenByArgs(db, tableInfo.Name.O) } + // Same servability fence as INSERT (pingcap/tidb#2519): IMPORT INTO + // writes every secondary index of the target table. Run it against + // the same latest-schema snapshot the rest of buildImportInto uses + // — `tnW.TableInfo` can be stale here, so checking it would let an + // older TiDB miss a writable newer-format index that exists only in + // the latest schema. + if err := checkAllIndicesServable(tableInPlan.Meta()); err != nil { + return nil, err + } schema, names, err := expression.TableInfo2SchemaAndNames(b.ctx.GetExprCtx(), ast.NewCIStr(""), tableInfo) if err != nil { return nil, err diff --git a/pkg/planner/util/path.go b/pkg/planner/util/path.go index 9a315ff65ac85..8ff55ad2f1f9d 100644 --- a/pkg/planner/util/path.go +++ b/pkg/planner/util/path.go @@ -132,6 +132,19 @@ type AccessPath struct { // IsIntHandlePath indicates whether this path is table path. IsIntHandlePath bool IsCommonHandlePath bool + + // MatchedScanDesc records whether the cop scan must be reversed in order + // to satisfy the PhysicalProperty matched by matchProperty. False means a + // forward scan over the encoded keyspace already produces rows in the + // requested order; true means the consumer should set IndexScan.Desc=true + // so TiKV streams keys in reverse byte order. Meaningful only when + // matchProperty returns PropMatched or PropMatchedNeedMergeSort. + // + // Computing this in matchProperty (per (sortItem, idxCol) pair) lets us + // support indexes whose columns mix ASC and DESC storage directions + // (pingcap/tidb#2519): so long as the per-column mismatch pattern is + // uniform, the scan direction is well-defined. + MatchedScanDesc bool // Forced means this path is generated by `use/force index()`. Forced bool ForceKeepOrder bool @@ -193,6 +206,7 @@ func (path *AccessPath) Clone() *AccessPath { MinAccessCondsForDNFCond: path.MinAccessCondsForDNFCond, IsIntHandlePath: path.IsIntHandlePath, IsCommonHandlePath: path.IsCommonHandlePath, + MatchedScanDesc: path.MatchedScanDesc, Forced: path.Forced, ForceKeepOrder: path.ForceKeepOrder, ForceNoKeepOrder: path.ForceNoKeepOrder, diff --git a/pkg/planner/util/path_test.go b/pkg/planner/util/path_test.go index 8187a9d61fdc1..dfa47a8d330fd 100644 --- a/pkg/planner/util/path_test.go +++ b/pkg/planner/util/path_test.go @@ -121,3 +121,21 @@ func TestOnlyPointRange(t *testing.T) { indexPath.Ranges = []*ranger.Range{&onePointRange} require.False(t, indexPath.OnlyPointRange(tc)) } + +func TestAccessPathCloneCopiesMatchedScanDesc(t *testing.T) { + // Regression for pingcap/tidb#2519: AccessPath.Clone() must propagate + // MatchedScanDesc, otherwise plan rebuilds and index-merge alternative + // cloning silently reset the field to false and a path that needed a + // reverse cop scan ends up doing a forward scan instead, producing + // rows in the wrong order. + original := &util.AccessPath{ + IsCommonHandlePath: true, + MatchedScanDesc: true, + } + cloned := original.Clone() + require.True(t, cloned.MatchedScanDesc, "MatchedScanDesc must survive Clone()") + + // Mutating the clone must not leak back. + cloned.MatchedScanDesc = false + require.True(t, original.MatchedScanDesc) +} diff --git a/pkg/session/BUILD.bazel b/pkg/session/BUILD.bazel index 8730e6e9ea553..d5471d01ec16f 100644 --- a/pkg/session/BUILD.bazel +++ b/pkg/session/BUILD.bazel @@ -160,6 +160,7 @@ go_test( "main_test.go", "session_test.go", "tidb_test.go", + "upgrade_backfill_test.go", "upgrade_test.go", ], embed = [":session"], diff --git a/pkg/session/upgrade_backfill_test.go b/pkg/session/upgrade_backfill_test.go new file mode 100644 index 0000000000000..7c511a5c8c866 --- /dev/null +++ b/pkg/session/upgrade_backfill_test.go @@ -0,0 +1,91 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package session + +import ( + "context" + "fmt" + "testing" + + "github.com/pingcap/tidb/pkg/config/kerneltype" + "github.com/pingcap/tidb/pkg/meta" + "github.com/pingcap/tidb/pkg/sessionctx/vardef" + "github.com/stretchr/testify/require" +) + +func TestUpgradeToVer259BackfillsIgnoreInlistPlanDigest(t *testing.T) { + if kerneltype.IsNextGen() { + t.Skip("Skip this case because there is no upgrade in the first release of next-gen kernel") + } + + ctx := context.Background() + store, dom := CreateStoreAndBootstrap(t) + defer func() { require.NoError(t, store.Close()) }() + + ver258 := version258 + seV258 := CreateSessionAndSetID(t, store) + txn, err := store.Begin() + require.NoError(t, err) + m := meta.NewMutator(txn) + err = m.FinishBootstrap(int64(ver258)) + require.NoError(t, err) + RevertVersionAndVariables(t, seV258, ver258) + + // Simulate a cluster upgraded through the old path where the variable existed in code + // but its row was never backfilled into mysql.global_variables. + MustExec(t, seV258, fmt.Sprintf( + "delete from mysql.GLOBAL_VARIABLES where variable_name='%s'", + vardef.TiDBIgnoreInlistPlanDigest, + )) + err = txn.Commit(ctx) + require.NoError(t, err) + store.SetOption(StoreBootstrappedKey, nil) + + res := MustExecToRecodeSet(t, seV258, fmt.Sprintf( + "select * from mysql.GLOBAL_VARIABLES where variable_name='%s'", + vardef.TiDBIgnoreInlistPlanDigest, + )) + chk := res.NewChunk(nil) + require.NoError(t, res.Next(ctx, chk)) + require.Equal(t, 0, chk.NumRows()) + require.NoError(t, res.Close()) + + dom.Close() + domCurVer, err := BootstrapSession(store) + require.NoError(t, err) + defer domCurVer.Close() + + seCurVer := CreateSessionAndSetID(t, store) + ver, err := GetBootstrapVersion(seCurVer) + require.NoError(t, err) + require.Equal(t, currentBootstrapVersion, ver) + + res = MustExecToRecodeSet(t, seCurVer, fmt.Sprintf( + "select * from mysql.GLOBAL_VARIABLES where variable_name='%s'", + vardef.TiDBIgnoreInlistPlanDigest, + )) + chk = res.NewChunk(nil) + require.NoError(t, res.Next(ctx, chk)) + require.Equal(t, 1, chk.NumRows()) + require.Equal(t, "OFF", chk.GetRow(0).GetString(1)) + require.NoError(t, res.Close()) + + res = MustExecToRecodeSet(t, seCurVer, "select @@global.tidb_ignore_inlist_plan_digest") + chk = res.NewChunk(nil) + require.NoError(t, res.Next(ctx, chk)) + require.Equal(t, 1, chk.NumRows()) + require.Equal(t, int64(0), chk.GetRow(0).GetInt64(0)) + require.NoError(t, res.Close()) +} diff --git a/pkg/session/upgrade_def.go b/pkg/session/upgrade_def.go index 66cc914c435f5..6d7cf8f137d69 100644 --- a/pkg/session/upgrade_def.go +++ b/pkg/session/upgrade_def.go @@ -493,6 +493,12 @@ const ( // Add the default value management for `tidb_analyze_distsql_scan_concurrency`. // If the cluster is upgraded from a version that has no such variable, we set it to the global.tidb_distsql_scan_concurrency value. version258 = 258 + + // version259 + // Backfill tidb_ignore_inlist_plan_digest for upgraded clusters where the row in + // mysql.global_variables was never materialized when the variable was introduced. + // Use the current sysvar default when the row is missing. + version259 = 259 ) // versionedUpgradeFunction is a struct that holds the upgrade function related @@ -506,7 +512,7 @@ type versionedUpgradeFunction struct { // currentBootstrapVersion is defined as a variable, so we can modify its value for testing. // please make sure this is the largest version -var currentBootstrapVersion int64 = version258 +var currentBootstrapVersion int64 = version259 var ( // this list must be ordered by version in ascending order, and the function @@ -689,6 +695,7 @@ var ( {version: version256, fn: upgradeToVer256}, {version: version257, fn: upgradeToVer257}, {version: version258, fn: upgradeToVer258}, + {version: version259, fn: upgradeToVer259}, } ) @@ -2109,3 +2116,7 @@ func upgradeToVer258(s sessionapi.Session, _ int64) { } initGlobalVariableIfNotExists(s, vardef.TiDBAnalyzeDistSQLScanConcurrency, rows[0].GetString(0)) } + +func upgradeToVer259(s sessionapi.Session, _ int64) { + initGlobalVariableIfNotExists(s, vardef.TiDBIgnoreInlistPlanDigest, vardef.Off) +} diff --git a/pkg/sessionctx/vardef/tidb_vars.go b/pkg/sessionctx/vardef/tidb_vars.go index 008fee847b823..aa821d05a86a6 100644 --- a/pkg/sessionctx/vardef/tidb_vars.go +++ b/pkg/sessionctx/vardef/tidb_vars.go @@ -995,6 +995,14 @@ const ( // TODO(crazycs520): remove this after foreign key GA. TiDBEnableForeignKey = "tidb_enable_foreign_key" + // TiDBEnableDescendingIndex controls whether CREATE INDEX ... (col DESC) is + // honored. When off (the default), the DESC keyword is silently ignored to + // preserve historical behavior and allow migration scripts written for MySQL + // 5.7 to continue working. When on, the descending order is persisted into + // IndexColumn and (once supporting TiKV is available) used for physical + // encoding. See pingcap/tidb#2519. + TiDBEnableDescendingIndex = "tidb_enable_descending_index" + // TiDBForeignKeyCheckInSharedLock indicates whether to use shared lock for foreign key check. TiDBForeignKeyCheckInSharedLock = "tidb_foreign_key_check_in_shared_lock" @@ -1584,6 +1592,7 @@ const ( DefTiDBEnableCollectExecutionInfo = true DefTiDBAllowAutoRandExplicitInsert = false DefTiDBEnableClusteredIndex = ClusteredIndexDefModeOn + DefTiDBEnableDescendingIndex = false DefTiDBRedactLog = Off DefTiDBRestrictedReadOnly = false DefTiDBSuperReadOnly = false @@ -1889,8 +1898,11 @@ var ( // DDLDiskQuota is the temporary variable for set disk quota for lightning DDLDiskQuota = atomic.NewUint64(DefTiDBDDLDiskQuota) // EnableForeignKey indicates whether to enable foreign key feature. - EnableForeignKey = atomic.NewBool(true) - EnableRCReadCheckTS = atomic.NewBool(false) + EnableForeignKey = atomic.NewBool(true) + // EnableDescendingIndex gates persisting the DESC flag on index columns at + // CREATE INDEX / CREATE TABLE time. See TiDBEnableDescendingIndex. + EnableDescendingIndex = atomic.NewBool(DefTiDBEnableDescendingIndex) + EnableRCReadCheckTS = atomic.NewBool(false) // EnableRowLevelChecksum indicates whether to append checksum to row values. EnableRowLevelChecksum = atomic.NewBool(DefTiDBEnableRowLevelChecksum) LowResolutionTSOUpdateInterval = atomic.NewUint32(DefTiDBLowResolutionTSOUpdateInterval) diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go index dfa195593eb6d..54539f039c8b5 100644 --- a/pkg/sessionctx/variable/sysvar.go +++ b/pkg/sessionctx/variable/sysvar.go @@ -2002,6 +2002,12 @@ var defaultSysVars = []*SysVar{ }, GetGlobal: func(_ context.Context, s *SessionVars) (string, error) { return BoolToOnOff(vardef.EnableForeignKey.Load()), nil }}, + {Scope: vardef.ScopeGlobal, Name: vardef.TiDBEnableDescendingIndex, Value: BoolToOnOff(vardef.DefTiDBEnableDescendingIndex), Type: vardef.TypeBool, SetGlobal: func(_ context.Context, s *SessionVars, val string) error { + vardef.EnableDescendingIndex.Store(TiDBOptOn(val)) + return nil + }, GetGlobal: func(_ context.Context, s *SessionVars) (string, error) { + return BoolToOnOff(vardef.EnableDescendingIndex.Load()), nil + }}, {Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.CollationDatabase, Value: mysql.DefaultCollationName, skipInit: true, Validation: func(vars *SessionVars, normalizedValue string, originalValue string, scope vardef.ScopeFlag) (string, error) { return checkCollation(vars, normalizedValue, originalValue, scope) }, SetSession: func(s *SessionVars, val string) error { diff --git a/pkg/table/tables/mutation_checker.go b/pkg/table/tables/mutation_checker.go index b4a65422b5623..58df654bc826f 100644 --- a/pkg/table/tables/mutation_checker.go +++ b/pkg/table/tables/mutation_checker.go @@ -27,6 +27,7 @@ import ( "github.com/pingcap/tidb/pkg/table" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/codec" "github.com/pingcap/tidb/pkg/util/collate" "github.com/pingcap/tidb/pkg/util/dbterror" "github.com/pingcap/tidb/pkg/util/logutil" @@ -272,6 +273,26 @@ func checkIndexKeys( loc := tc.Location() for i, v := range decodedIndexValues { fieldType := t.Columns[indexInfo.Columns[i].Offset].FieldType.ArrayType() + // DecodeIndexKV returns column bytes whose origin depends on the + // index encoding version: for some columns the bytes were lifted + // from the key (and therefore bitwise-complemented when the + // column is DESC), for others they came from the index value + // (the restored-data path used for _bin collations and clustered + // v1 indexes), where the bytes are never complemented regardless + // of the column direction. + // + // Auto-detect rather than blindly trust IndexColumn.Desc: only + // un-complement when the leading flag byte is recognisably a + // DESC marker. ASC bytes (whether from key or value) decode + // directly. Restored bytes for a DESC column appear as ASC here + // and are correctly left untouched. + if indexInfo.Columns[i].Desc && len(v) > 0 && codec.IsDescFlag(v[0]) { + cp := make([]byte, len(v)) + for j := range v { + cp[j] = v[j] ^ 0xFF + } + v = cp + } datum, err := tablecodec.DecodeColumnValue(v, fieldType, loc) if err != nil { return errors.Trace(err) @@ -351,7 +372,10 @@ func collectTableMutationsFromBufferStage(t *TableCommon, memBuffer kv.MemBuffer } } } else { - _, m.indexID, _, err = tablecodec.DecodeIndexKey(m.key) + // Only the index ID is consumed downstream. Avoid DecodeIndexKey, + // which also parses every column value as a string and therefore + // fails on descending-complemented bytes. + _, m.indexID, _, err = tablecodec.DecodeKeyHead(m.key) if err != nil { err = errors.Trace(err) } diff --git a/pkg/tablecodec/tablecodec.go b/pkg/tablecodec/tablecodec.go index d6ef6d97e21fd..796ed409e41fc 100644 --- a/pkg/tablecodec/tablecodec.go +++ b/pkg/tablecodec/tablecodec.go @@ -1247,7 +1247,18 @@ func GenIndexKey(loc *time.Location, tblInfo *model.TableInfo, idxInfo *model.In key = GetIndexKeyBuf(buf, RecordRowKeyLen+len(indexedValues)*9+9) key = appendTableIndexPrefix(key, phyTblID) key = codec.EncodeInt(key, idxInfo.ID) - key, err = codec.EncodeKey(loc, key, indexedValues...) + // For all-ascending indexes (the overwhelmingly common case) the fast + // path through EncodeKey produces byte-identical output to the DESC-aware + // encoder, so stay on it to avoid building a []bool per insert. + if idxInfo.HasDescColumn() { + desc := make([]bool, len(idxInfo.Columns)) + for i, c := range idxInfo.Columns { + desc[i] = c.Desc + } + key, err = codec.EncodeKeyWithDesc(loc, key, desc, indexedValues...) + } else { + key, err = codec.EncodeKey(loc, key, indexedValues...) + } if err != nil { return nil, false, err } diff --git a/pkg/tablecodec/tablecodec_test.go b/pkg/tablecodec/tablecodec_test.go index b63438a2fae48..58f243826eb85 100644 --- a/pkg/tablecodec/tablecodec_test.go +++ b/pkg/tablecodec/tablecodec_test.go @@ -375,6 +375,89 @@ func TestCutKeyNew(t *testing.T) { require.Equal(t, types.NewIntDatum(100), handleVal) } +func TestGenIndexKeyWithDescColumns(t *testing.T) { + tblInfo := &model.TableInfo{ + ID: 1, + Name: ast.NewCIStr("t"), + Columns: []*model.ColumnInfo{ + {ID: 1, Name: ast.NewCIStr("a"), Offset: 0, FieldType: *types.NewFieldType(mysql.TypeLonglong)}, + {ID: 2, Name: ast.NewCIStr("b"), Offset: 1, FieldType: *types.NewFieldType(mysql.TypeLonglong)}, + }, + } + // Ascending-only index: the DESC-aware code path must produce byte-for- + // byte identical output to the ordinary path so existing indexes read by + // older TiDB binaries keep working. + ascIdx := &model.IndexInfo{ + ID: 10, + Name: ast.NewCIStr("idx_asc"), + Columns: []*model.IndexColumn{ + {Name: ast.NewCIStr("a"), Offset: 0, Length: types.UnspecifiedLength}, + {Name: ast.NewCIStr("b"), Offset: 1, Length: types.UnspecifiedLength}, + }, + } + // Mixed composite: a ASC, b DESC. The second column's bytes must be + // bitwise-complemented relative to the ascending encoding so a forward + // RocksDB iterator yields (a ASC, b DESC) row order. + mixedIdx := &model.IndexInfo{ + ID: 11, + Name: ast.NewCIStr("idx_mixed"), + Columns: []*model.IndexColumn{ + {Name: ast.NewCIStr("a"), Offset: 0, Length: types.UnspecifiedLength, Desc: false}, + {Name: ast.NewCIStr("b"), Offset: 1, Length: types.UnspecifiedLength, Desc: true}, + }, + Version: 1, + } + + loc := time.UTC + vals := []types.Datum{types.NewIntDatum(5), types.NewIntDatum(7)} + + ascKey, _, err := GenIndexKey(loc, tblInfo, ascIdx, tblInfo.ID, vals, kv.IntHandle(1), nil) + require.NoError(t, err) + + // Recreate an all-ascending key using EncodeKeyWithDesc(nil) to assert + // the fast path and the DESC-aware path agree bit-for-bit. + explicitAsc, _, err := GenIndexKey(loc, tblInfo, &model.IndexInfo{ + ID: ascIdx.ID, Name: ascIdx.Name, Columns: []*model.IndexColumn{ + {Name: ast.NewCIStr("a"), Offset: 0, Length: types.UnspecifiedLength, Desc: false}, + {Name: ast.NewCIStr("b"), Offset: 1, Length: types.UnspecifiedLength, Desc: false}, + }, + }, tblInfo.ID, vals, kv.IntHandle(1), nil) + require.NoError(t, err) + require.Equal(t, ascKey, explicitAsc, "all-ASC desc flags must match EncodeKey's output") + + mixedKey, _, err := GenIndexKey(loc, tblInfo, mixedIdx, tblInfo.ID, vals, kv.IntHandle(1), nil) + require.NoError(t, err) + + // The two keys share the table/index prefix plus column a's encoding, + // and diverge on column b: the mixed key has b's bytes complemented. + require.NotEqual(t, ascKey, mixedKey, "mixed-direction index must produce different bytes") + + // Re-encode the column region ourselves and compare against the + // trailing portion of each generated key (the prefix is identical + // because we supplied the same tblInfo/idxID). + ascCols, err := codec.EncodeKey(loc, nil, vals...) + require.NoError(t, err) + mixedCols, err := codec.EncodeKeyWithDesc(loc, nil, []bool{false, true}, vals...) + require.NoError(t, err) + // Locate each column region inside the generated keys: the prefix length is + // identical for both because we supplied the same tblInfo/idxID. + require.True(t, len(ascKey) >= len(ascCols)) + require.True(t, len(mixedKey) >= len(mixedCols)) + require.Equal(t, ascCols, ascKey[len(ascKey)-len(ascCols)-9:len(ascKey)-9], + "ASC index column bytes must match EncodeKey output (9 bytes trailing handle suffix)") + require.Equal(t, mixedCols, mixedKey[len(mixedKey)-len(mixedCols)-9:len(mixedKey)-9], + "mixed index column bytes must match EncodeKeyWithDesc output") + + // Decoding round-trip: cut past the column bytes using DESC-aware cutter. + colRegion := mixedKey[len(mixedKey)-len(mixedCols)-9 : len(mixedKey)-9] + rem, d0, err := codec.DecodeOneWithDesc(colRegion, false) + require.NoError(t, err) + require.Equal(t, int64(5), d0.GetInt64()) + _, d1, err := codec.DecodeOneWithDesc(rem, true) + require.NoError(t, err) + require.Equal(t, int64(7), d1.GetInt64()) +} + func TestCutKey(t *testing.T) { colIDs := []int64{1, 2, 3} values := []types.Datum{types.NewIntDatum(1), types.NewBytesDatum([]byte("abc")), types.NewFloat64Datum(5.5)} diff --git a/pkg/util/codec/codec.go b/pkg/util/codec/codec.go index 6974393011437..296eeb3e7f7f3 100644 --- a/pkg/util/codec/codec.go +++ b/pkg/util/codec/codec.go @@ -57,6 +57,26 @@ const ( // IntHandleFlag is only used to encode int handle key. const IntHandleFlag = intFlag +// IsDescFlag reports whether b is a leading flag byte from an EncodeKeyWithDesc +// descending-column encoding (the bitwise complement of one of the ascending +// flag bytes supported by peek/DecodeOne). Callers that received column bytes +// from a heterogeneous source (e.g. tablecodec.DecodeIndexKV mixes key-derived +// and value-derived bytes) can use this to decide whether the bytes need to be +// un-complemented before being interpreted as a regular memcomparable datum. +// See pingcap/tidb#2519. +func IsDescFlag(b byte) bool { + switch b { + case ^NilFlag, ^bytesFlag, ^compactBytesFlag, ^intFlag, ^uintFlag, + ^floatFlag, ^decimalFlag, ^durationFlag: + return true + } + return false +} + +// isDescFlag is the unexported alias kept for in-package call sites that +// predate IsDescFlag. +func isDescFlag(b byte) bool { return IsDescFlag(b) } + const ( sizeUint64 = unsafe.Sizeof(uint64(0)) sizeUint8 = unsafe.Sizeof(uint8(0)) @@ -307,6 +327,103 @@ func EncodeKey(loc *time.Location, b []byte, v ...types.Datum) ([]byte, error) { return encode(loc, b, v, true) } +// EncodeKeyWithDesc is like EncodeKey, but bitwise-complements the encoded +// bytes of every datum whose corresponding entry in desc is true. Complementing +// a memcomparable-encoded datum reverses its byte comparison order, so a +// forward iterator over keys built this way yields descending order for those +// columns. Columns whose desc entry is false (or whose index is past len(desc)) +// are encoded identically to EncodeKey, preserving mixed-direction composite +// indexes like INDEX(a ASC, b DESC). See pingcap/tidb#2519. +// +// Returns an error if any DESC column has a Kind whose underlying encoding is +// not memcomparable in the sense that "complement of encoded bytes" preserves +// reverse order — currently KindMysqlJSON and KindVectorFloat32. Those types +// aren't usable as ordinary index columns anyway (json/vector indexes go +// through their own pipeline), but the explicit rejection prevents a +// caller from accidentally producing keys whose comparison semantics would +// be undefined. +func EncodeKeyWithDesc(loc *time.Location, b []byte, desc []bool, v ...types.Datum) ([]byte, error) { + for i := range v { + isDesc := i < len(desc) && desc[i] + if isDesc { + switch v[i].Kind() { + case types.KindMysqlJSON, types.KindVectorFloat32: + return b, errors.Errorf( + "descending-order encoding is not supported for column kind %v; this kind has no memcomparable form whose bitwise complement preserves reverse order", + v[i].Kind()) + } + } + start := len(b) + var err error + b, err = encode(loc, b, v[i:i+1], true) + if err != nil { + return b, err + } + if isDesc { + for j := start; j < len(b); j++ { + b[j] ^= 0xFF + } + } + } + return b, nil +} + +// DecodeOneWithDesc decodes a single datum from a byte slice produced by +// EncodeKeyWithDesc. When desc is false it is equivalent to DecodeOne; when +// true it inverts the encoded bytes and dispatches through the existing +// decoder, so all types supported by DecodeOne work unchanged. +// +// This allocates a scratch buffer to hold the complemented prefix. Callers on +// hot paths that decode many consecutive descending columns should consider a +// streaming variant; none of the current callsites are hot enough to justify +// the added complexity. +func DecodeOneWithDesc(b []byte, desc bool) (remain []byte, d types.Datum, err error) { + if !desc { + return DecodeOne(b) + } + if len(b) < 1 { + return nil, d, errors.New("invalid encoded key") + } + // peek() inspects the flag byte and computes the encoded length, so we + // complement b into a scratch buffer first. A full copy keeps the logic + // simple — DecodeOne handles every type uniformly on the scratch copy. + cp := make([]byte, len(b)) + for i := range b { + cp[i] = b[i] ^ 0xFF + } + rem, d, err := DecodeOne(cp) + if err != nil { + return nil, d, errors.Trace(err) + } + consumed := len(b) - len(rem) + return b[consumed:], d, nil +} + +// CutOneWithDesc cuts the first encoded datum from b, where the column may be +// stored with descending-complemented bytes (desc=true). It is equivalent to +// CutOne when desc is false. Used by index-key decoders that only need to +// advance past a column without materialising its value. +func CutOneWithDesc(b []byte, desc bool) (data []byte, remain []byte, err error) { + if !desc { + return CutOne(b) + } + if len(b) < 1 { + return nil, nil, errors.New("invalid encoded key") + } + // peek() in cmpl-space gives the encoded length. We only need one byte of + // scratch (the flag) plus up to the encoded length for variable-length + // types, so take the minimum slice needed. + cp := make([]byte, len(b)) + for i := range b { + cp[i] = b[i] ^ 0xFF + } + l, err := peek(cp) + if err != nil { + return nil, nil, errors.Trace(err) + } + return b[:l], b[l:], nil +} + // EncodeValue appends the encoded values to byte slice b, returning the appended // slice. It does not guarantee the order for comparison. func EncodeValue(loc *time.Location, b []byte, v ...types.Datum) ([]byte, error) { @@ -1511,6 +1628,34 @@ func peek(b []byte) (length int, err error) { l, err = types.PeekBytesAsJSON(b) case vectorFloat32Flag: l, err = types.PeekBytesAsVectorFloat32(b) + // Descending-order flags (pingcap/tidb#2519). EncodeKeyWithDesc writes + // bitwise-complemented bytes, so the leading flag becomes ^original. We + // compute the length of the column exactly as for the ascending variant + // because complement preserves byte count. Variable-length types need to + // walk group markers in complement space — we do so by complementing a + // scratch copy of the body. + case ^NilFlag: + // NULL in descending encoding has no body bytes. + case ^intFlag, ^uintFlag, ^floatFlag, ^durationFlag: + l = 8 + case ^bytesFlag: + tmp := make([]byte, len(b)) + for i := range b { + tmp[i] = b[i] ^ 0xFF + } + l, err = peekBytes(tmp) + case ^compactBytesFlag: + tmp := make([]byte, len(b)) + for i := range b { + tmp[i] = b[i] ^ 0xFF + } + l, err = peekCompactBytes(tmp) + case ^decimalFlag: + tmp := make([]byte, len(b)) + for i := range b { + tmp[i] = b[i] ^ 0xFF + } + l, err = types.DecimalPeak(tmp) default: return 0, errors.Errorf("invalid encoded key flag %v", flag) } @@ -1584,6 +1729,10 @@ type Decoder struct { // buf is only used for DecodeBytes to avoid the cost of makeslice. buf []byte + // descBuf is a reusable scratch buffer for the descending-column + // auto-detect path so a per-row index scan does not allocate a fresh + // slice for every DESC column it decodes (pingcap/tidb#2519). + descBuf []byte } // NewDecoder creates a Decoder. @@ -1595,10 +1744,45 @@ func NewDecoder(chk *chunk.Chunk, timezone *time.Location) *Decoder { } // DecodeOne decodes one value to chunk and returns the remained bytes. +// +// Descending-order columns (pingcap/tidb#2519) are written as bitwise- +// complemented memcomparable bytes. Their leading flag byte is the bit- +// complement of the ascending flag (e.g. intFlag=0x03 becomes 0xFC, NilFlag +// =0x00 becomes 0xFF). When the input begins with one of those complemented +// flags we transparently invert just the bytes this column consumes and fall +// through to the ascending switch — callers need no schema awareness. The +// 0xFA collision between ^floatFlag and maxFlag is not observable on stored +// rows because maxFlag only appears in range sentinels, which never reach +// this decoder. func (decoder *Decoder) DecodeOne(b []byte, colIdx int, ft *types.FieldType) (remain []byte, err error) { if len(b) < 1 { return nil, errors.New("invalid encoded key") } + if isDescFlag(b[0]) { + length, err := peek(b) + if err != nil { + return nil, errors.Trace(err) + } + // Reuse decoder.descBuf instead of allocating a fresh scratch slice + // per DESC column. This is on the per-row decode hot path; for a + // composite DESC index every row would otherwise drop a few-hundred- + // byte allocation. The buffer's contents are not referenced after + // the recursive DecodeOne call returns, so reuse is safe even for + // variable-length types whose decoded form (Bytes) gets copied into + // the chunk inside the ASC branch. + if cap(decoder.descBuf) < length { + decoder.descBuf = make([]byte, length) + } else { + decoder.descBuf = decoder.descBuf[:length] + } + for i := range length { + decoder.descBuf[i] = b[i] ^ 0xFF + } + if _, err := decoder.DecodeOne(decoder.descBuf, colIdx, ft); err != nil { + return nil, errors.Trace(err) + } + return b[length:], nil + } chk := decoder.chk flag := b[0] b = b[1:] diff --git a/pkg/util/codec/codec_test.go b/pkg/util/codec/codec_test.go index 88f5c6d677ecf..23b47b7dce69e 100644 --- a/pkg/util/codec/codec_test.go +++ b/pkg/util/codec/codec_test.go @@ -113,6 +113,220 @@ func estimateValuesSize(typeCtx types.Context, vals []types.Datum) (int, error) return size, nil } +func TestEncodeKeyWithDescRoundTrip(t *testing.T) { + // Every datum kind we care about for an index key. Passing nil for desc + // or all-false must be identical to EncodeKey so the encoding is a strict + // superset of the ascending path. + typeCtx := types.DefaultStmtNoWarningContext.WithLocation(time.Local) + vals := types.MakeDatums(int64(-5), uint64(42), float64(3.14), []byte("hello"), "world", nil) + + asc, err := EncodeKey(typeCtx.Location(), nil, vals...) + require.NoError(t, err) + same, err := EncodeKeyWithDesc(typeCtx.Location(), nil, nil, vals...) + require.NoError(t, err) + require.Equal(t, asc, same, "nil desc slice must match EncodeKey byte-for-byte") + + // Every column descending: each column's encoded prefix is bitwise- + // complemented compared to the ascending form. We decode one datum at a + // time with DecodeOneWithDesc and check we recover the original. + desc := []bool{true, true, true, true, true, true} + allDesc, err := EncodeKeyWithDesc(typeCtx.Location(), nil, desc, vals...) + require.NoError(t, err) + remain := allDesc + for i, v := range vals { + var got types.Datum + remain, got, err = DecodeOneWithDesc(remain, true) + require.NoError(t, err, "column %d", i) + switch v.Kind() { + case types.KindInt64: + require.Equal(t, types.KindInt64, got.Kind(), "column %d kind", i) + require.Equal(t, v.GetInt64(), got.GetInt64(), "column %d", i) + case types.KindUint64: + require.Equal(t, types.KindUint64, got.Kind(), "column %d kind", i) + require.Equal(t, v.GetUint64(), got.GetUint64(), "column %d", i) + case types.KindFloat64: + require.Equal(t, types.KindFloat64, got.Kind(), "column %d kind", i) + require.Equal(t, v.GetFloat64(), got.GetFloat64(), "column %d", i) + case types.KindBytes, types.KindString: + // Strings are encoded via bytesFlag and decoded as KindBytes — + // this mirrors the ascending DecodeOne behaviour. + require.Equal(t, types.KindBytes, got.Kind(), "column %d kind", i) + require.Equal(t, v.GetBytes(), got.GetBytes(), "column %d", i) + case types.KindNull: + require.Equal(t, types.KindNull, got.Kind(), "column %d kind", i) + } + } + require.Empty(t, remain, "every byte should have been consumed") + + // Mixed composite: (a ASC, b DESC, c ASC). The ASC columns' bytes must + // match EncodeKey's output and only the DESC column's bytes should be + // complemented. + mixedVals := types.MakeDatums(int64(7), int64(9), int64(11)) + mixed, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{false, true, false}, mixedVals...) + require.NoError(t, err) + // First column bytes: same as EncodeKey of just int64(7). + prefix, err := EncodeKey(typeCtx.Location(), nil, mixedVals[0]) + require.NoError(t, err) + require.Equal(t, prefix, mixed[:len(prefix)]) + // Decoding round-trips with per-column desc flags. + rem := mixed + rem, d0, err := DecodeOneWithDesc(rem, false) + require.NoError(t, err) + require.Equal(t, int64(7), d0.GetInt64()) + rem, d1, err := DecodeOneWithDesc(rem, true) + require.NoError(t, err) + require.Equal(t, int64(9), d1.GetInt64()) + rem, d2, err := DecodeOneWithDesc(rem, false) + require.NoError(t, err) + require.Equal(t, int64(11), d2.GetInt64()) + require.Empty(t, rem) +} + +func TestEncodeKeyWithDescOrdering(t *testing.T) { + // The core property: for a DESC column, user value a < b must produce + // encoded(a) > encoded(b) in byte order. A forward scan over the encoded + // keyspace then yields descending user order. + typeCtx := types.DefaultStmtNoWarningContext.WithLocation(time.Local) + enc := func(v int64, desc bool) []byte { + b, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{desc}, types.NewIntDatum(v)) + require.NoError(t, err) + return b + } + // Ascending sanity: a < b → encoded(a) < encoded(b). + require.Less(t, bytes.Compare(enc(-100, false), enc(-1, false)), 0) + require.Less(t, bytes.Compare(enc(-1, false), enc(0, false)), 0) + require.Less(t, bytes.Compare(enc(0, false), enc(1, false)), 0) + require.Less(t, bytes.Compare(enc(1, false), enc(1<<30, false)), 0) + + // Descending: byte order inverts. + require.Greater(t, bytes.Compare(enc(-100, true), enc(-1, true)), 0) + require.Greater(t, bytes.Compare(enc(-1, true), enc(0, true)), 0) + require.Greater(t, bytes.Compare(enc(0, true), enc(1, true)), 0) + require.Greater(t, bytes.Compare(enc(1, true), enc(1<<30, true)), 0) + + // NULL on a DESC column must sort AFTER real values (MySQL 8.0's + // "NULLS LAST" default for ORDER BY ... DESC). Encoded as NilFlag=0x00 + // complemented to 0xFF, it's the largest possible first byte, so a + // forward scan reaches it last. + nullDesc, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{true}, types.NewDatum(nil)) + require.NoError(t, err) + require.Greater(t, bytes.Compare(nullDesc, enc(1<<62, true)), 0, + "NULL must sort after any real value on a DESC column") +} + +func TestEncodeKeyWithDescRangeSentinels(t *testing.T) { + // Range builders place MinNotNullDatum and MaxValueDatum at infinity-side + // boundaries. When such a sentinel hits a DESC column its encoded bytes + // are bitwise-complemented along with the rest, and the relative byte + // order between sentinels, real values, and NULL must still produce a + // well-formed scan range. + // + // The semantics we lock down here: + // * NULL on a DESC column sorts LAST in byte order (MySQL "DESC NULLS + // LAST" default), so a forward scan reaches it after every real value. + // * MaxValueDatum on a DESC column sorts FIRST (it's the user-visible + // +∞, so "largest first" comes first in DESC order). + // * MinNotNullDatum on a DESC column sorts BETWEEN real values and NULL + // (it's the user-visible "smallest non-null", so it comes just before + // NULL in DESC order). + typeCtx := types.DefaultStmtNoWarningContext.WithLocation(time.Local) + + encDesc := func(d types.Datum) []byte { + out, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{true}, d) + require.NoError(t, err) + return out + } + + nullBytes := encDesc(types.NewDatum(nil)) + maxBytes := encDesc(types.MaxValueDatum()) + minBytes := encDesc(types.MinNotNullDatum()) + smallVal := encDesc(types.NewIntDatum(1)) + largeVal := encDesc(types.NewIntDatum(1 << 30)) + + // MaxValue ≤ every real value (in user terms +∞ ≥ x; in DESC byte order + // that flips to "comes first"). + require.Less(t, bytes.Compare(maxBytes, smallVal), 0, + "DESC MaxValueDatum must sort before any real value") + require.Less(t, bytes.Compare(maxBytes, largeVal), 0) + + // Real values still sort largest→smallest among themselves. + require.Less(t, bytes.Compare(largeVal, smallVal), 0, + "DESC: larger user value -> smaller byte encoding") + + // MinNotNull ≥ every real value (it represents -∞ on the non-null side). + require.Greater(t, bytes.Compare(minBytes, smallVal), 0, + "DESC MinNotNullDatum must sort after every real value") + require.Greater(t, bytes.Compare(minBytes, largeVal), 0) + + // NULL sorts strictly after MinNotNull. + require.Greater(t, bytes.Compare(nullBytes, minBytes), 0, + "DESC NULL must sort after MinNotNullDatum") + + // And after every real value (i.e. NULLs come last in DESC order). + require.Greater(t, bytes.Compare(nullBytes, smallVal), 0) + require.Greater(t, bytes.Compare(nullBytes, largeVal), 0) + + // FullRange [Null, MaxValue] in user terms, after the swap that the + // distsql layer performs, gives a well-ordered byte range: + // byteLow = encode_DESC(MaxValue) (smaller) + // byteHigh = encode_DESC(Null) (larger) + // Walking the keyspace forward from byteLow to byteHigh hits every real + // value plus NULL. + require.Less(t, bytes.Compare(maxBytes, nullBytes), 0, + "forward walk from MaxValue to NULL must be a non-empty byte range") +} + +func TestEncodeKeyWithDescRejectsUnsupportedKinds(t *testing.T) { + // JSON and VECTOR_FLOAT32 don't have a memcomparable form whose bitwise + // complement preserves reverse order, so EncodeKeyWithDesc must refuse + // rather than silently producing a key whose comparison semantics are + // undefined. ASC encoding of these kinds remains unaffected. + typeCtx := types.DefaultStmtNoWarningContext.WithLocation(time.Local) + + jsonDatum := types.NewDatum(types.CreateBinaryJSON(map[string]any{"k": "v"})) + _, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{true}, jsonDatum) + require.Error(t, err, "DESC encoding of KindMysqlJSON must be rejected") + require.Contains(t, err.Error(), "descending-order encoding is not supported") + + // ASC encoding of the same datum still works. + _, err = EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{false}, jsonDatum) + require.NoError(t, err, "ASC encoding of JSON must keep working") + + // Mixed: an ASC JSON column followed by a DESC int column is fine. + mixed, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{false, true}, + jsonDatum, types.NewIntDatum(7)) + require.NoError(t, err) + require.NotEmpty(t, mixed) +} + +func TestCutOneWithDesc(t *testing.T) { + // CutOneWithDesc must advance past a DESC-encoded column by exactly the + // same number of bytes the encoder wrote — otherwise subsequent columns + // of a composite key would be misaligned. + typeCtx := types.DefaultStmtNoWarningContext.WithLocation(time.Local) + composite, err := EncodeKeyWithDesc( + typeCtx.Location(), nil, + []bool{false, true, false}, + types.NewIntDatum(1), types.NewBytesDatum([]byte("middle")), types.NewIntDatum(3), + ) + require.NoError(t, err) + + rem := composite + part0, rem, err := CutOneWithDesc(rem, false) + require.NoError(t, err) + require.NotEmpty(t, part0) + part1, rem, err := CutOneWithDesc(rem, true) + require.NoError(t, err) + require.NotEmpty(t, part1) + part2, rem, err := CutOneWithDesc(rem, false) + require.NoError(t, err) + require.NotEmpty(t, part2) + require.Empty(t, rem) + + // The three parts must concatenate back to the original key. + require.Equal(t, composite, append(append(append([]byte{}, part0...), part1...), part2...)) +} + func TestCodecKeyCompare(t *testing.T) { table := []struct { Left []types.Datum diff --git a/pkg/util/signal/exit.go b/pkg/util/signal/exit.go index 827dbd69bcdc2..534eec32c87dc 100644 --- a/pkg/util/signal/exit.go +++ b/pkg/util/signal/exit.go @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !windows + package signal import ( diff --git a/pkg/util/signal/signal_posix.go b/pkg/util/signal/signal_posix.go index f975b47b21bd5..721644d75d66b 100644 --- a/pkg/util/signal/signal_posix.go +++ b/pkg/util/signal/signal_posix.go @@ -71,7 +71,7 @@ func SetupUSR1Handler() { } // SetupSignalHandler setup signal handler for TiDB Server -func SetupSignalHandler(shutdownFunc func()) { +func SetupSignalHandler(shutdownFunc func(sig os.Signal)) { closeSignalChan := make(chan os.Signal, 1) signal.Notify(closeSignalChan, syscall.SIGHUP, @@ -82,6 +82,6 @@ func SetupSignalHandler(shutdownFunc func()) { go func() { sig := <-closeSignalChan logutil.BgLogger().Info("got signal to exit", zap.Stringer("signal", sig)) - shutdownFunc() + shutdownFunc(sig) }() } diff --git a/pkg/util/signal/signal_wasm.go b/pkg/util/signal/signal_wasm.go index 02ac7b75f2cd7..f1f4d455b1048 100644 --- a/pkg/util/signal/signal_wasm.go +++ b/pkg/util/signal/signal_wasm.go @@ -14,9 +14,11 @@ package signal +import "os" + // SetupUSR1Handler sets up a signal handler for SIGUSR1. func SetupUSR1Handler() {} -// SetupSignalHandler setup signal handler for TiDB Server -func SetupSignalHandler(shutdownFunc func(bool)) { +// SetupSignalHandler is a no-op on WASM and never invokes shutdownFunc. +func SetupSignalHandler(shutdownFunc func(sig os.Signal)) { } diff --git a/pkg/util/signal/signal_windows.go b/pkg/util/signal/signal_windows.go index 01ea9d7027676..0b0b9b82587c6 100644 --- a/pkg/util/signal/signal_windows.go +++ b/pkg/util/signal/signal_windows.go @@ -28,7 +28,7 @@ import ( func SetupUSR1Handler() {} // SetupSignalHandler setup signal handler for TiDB Server -func SetupSignalHandler(shutdownFunc func()) { +func SetupSignalHandler(shutdownFunc func(sig os.Signal)) { //todo deal with dump goroutine stack on windows closeSignalChan := make(chan os.Signal, 1) signal.Notify(closeSignalChan, @@ -40,6 +40,16 @@ func SetupSignalHandler(shutdownFunc func()) { go func() { sig := <-closeSignalChan logutil.BgLogger().Info("got signal to exit", zap.Stringer("signal", sig)) - shutdownFunc() + shutdownFunc(sig) }() } + +// TiDBExit sends a signal to the current process. +func TiDBExit(sig syscall.Signal) { + p, err := os.FindProcess(os.Getpid()) + if err != nil { + return + } + // Best effort; Windows does not support POSIX signal shutdown semantics. + _ = p.Signal(sig) +} diff --git a/tests/desc_index_e2e.sql b/tests/desc_index_e2e.sql new file mode 100644 index 0000000000000..12d34f867f7ff --- /dev/null +++ b/tests/desc_index_e2e.sql @@ -0,0 +1,170 @@ +-- End-to-end smoke test for descending-order indexes (pingcap/tidb#2519). +-- +-- Run via the wrapper at tests/run_desc_index_e2e.sh, which boots a tiup +-- playground cluster against caller-supplied TiDB and TiKV binaries: +-- +-- TIDB_BIN=/path/to/bin/tidb-server \ +-- TIKV_BIN=/path/to/target/release/tikv-server \ +-- ./tests/run_desc_index_e2e.sh +-- +-- Or directly against any cluster that exposes 127.0.0.1:4000: +-- +-- mysql -h 127.0.0.1 -P 4000 -u root --force < tests/desc_index_e2e.sql +-- +-- Each section runs a small SQL block with the expected behaviour annotated +-- above it. A failure (wrong row order, missing rows, unexpected error) +-- indicates the wire format between TiDB and TiKV is inconsistent for that +-- scenario. Section 3 deliberately exercises a DDL that must be rejected, +-- so use --force when running interactively. + +-- ============================================================================ +-- 0. Setup +-- ============================================================================ +CREATE DATABASE IF NOT EXISTS test; +USE test; + +-- Drop any leftover state from a previous run. +DROP TABLE IF EXISTS desc_e2e_single; +DROP TABLE IF EXISTS desc_e2e_mixed; +DROP TABLE IF EXISTS desc_e2e_pk_clustered_should_fail; +DROP TABLE IF EXISTS desc_e2e_pk_nonclustered; +DROP TABLE IF EXISTS desc_e2e_strings; +DROP TABLE IF EXISTS desc_e2e_expression; + +-- The feature is opt-in. +SELECT @@global.tidb_enable_descending_index AS before_set; +SET @@global.tidb_enable_descending_index = ON; +SELECT @@global.tidb_enable_descending_index AS after_set; + +-- ============================================================================ +-- 1. Single descending column: INDEX(b DESC) +-- ============================================================================ +CREATE TABLE desc_e2e_single (a INT, b INT, INDEX idx_b (b DESC)); +INSERT INTO desc_e2e_single VALUES (1, 10), (2, 20), (3, 5), (4, 30), (5, 15); + +-- Metadata round-trip +SHOW CREATE TABLE desc_e2e_single; +SELECT collation FROM information_schema.statistics WHERE table_name='desc_e2e_single' AND index_name='idx_b'; +-- Expected: D + +-- Forward scan satisfies ORDER BY b DESC: no Sort, no 'desc' flag. +EXPLAIN FORMAT='brief' SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b DESC; +SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b DESC; +-- Expected order: 30, 20, 15, 10, 5 + +-- Reverse scan satisfies ORDER BY b ASC: 'keep order:true, desc' in plan. +EXPLAIN FORMAT='brief' SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b ASC; +SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b ASC; +-- Expected order: 5, 10, 15, 20, 30 + +-- Point lookup on the DESC column. +EXPLAIN FORMAT='brief' SELECT a FROM desc_e2e_single USE INDEX(idx_b) WHERE b = 20; +SELECT a FROM desc_e2e_single USE INDEX(idx_b) WHERE b = 20; +-- Expected: 2 + +SELECT a FROM desc_e2e_single USE INDEX(idx_b) WHERE b = 999; +-- Expected: empty result + +-- Range scan +EXPLAIN FORMAT='brief' SELECT b FROM desc_e2e_single USE INDEX(idx_b) WHERE b BETWEEN 10 AND 25 ORDER BY b DESC; +SELECT b FROM desc_e2e_single USE INDEX(idx_b) WHERE b BETWEEN 10 AND 25 ORDER BY b DESC; +-- Expected: 20, 15, 10 + +-- ============================================================================ +-- 2. Mixed-direction composite: INDEX(a, b DESC) — MySQL 8.0 flagship case +-- ============================================================================ +CREATE TABLE desc_e2e_mixed (a INT, b INT, INDEX idx_ab (a, b DESC)); +INSERT INTO desc_e2e_mixed VALUES (1, 10), (1, 5), (1, 15), (2, 20), (2, 8), (2, 12); + +-- Forward scan satisfies ORDER BY a ASC, b DESC (column-by-column match). +EXPLAIN FORMAT='brief' SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a ASC, b DESC; +SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a ASC, b DESC; +-- Expected: (1,15)(1,10)(1,5)(2,20)(2,12)(2,8) + +-- Reverse scan satisfies ORDER BY a DESC, b ASC (bitwise complement). +EXPLAIN FORMAT='brief' SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a DESC, b ASC; +SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a DESC, b ASC; +-- Expected: (2,8)(2,12)(2,20)(1,5)(1,10)(1,15) + +-- Unsatisfiable by either direction: planner inserts Sort. +EXPLAIN FORMAT='brief' SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a, b; +SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a, b; +-- Expected: (1,5)(1,10)(1,15)(2,8)(2,12)(2,20) + +-- ============================================================================ +-- 3. PRIMARY KEY direction guards +-- ============================================================================ + +-- Clustered PK with DESC must be rejected at DDL time. +-- Expected: error "DESC is not supported on the columns of a clustered PRIMARY KEY" +CREATE TABLE desc_e2e_pk_clustered_should_fail (a INT, b INT, PRIMARY KEY (a, b DESC) CLUSTERED); + +-- NONCLUSTERED PK with DESC is allowed (encoded as a unique secondary index). +CREATE TABLE desc_e2e_pk_nonclustered (a INT, b INT, PRIMARY KEY (a DESC) NONCLUSTERED); +INSERT INTO desc_e2e_pk_nonclustered VALUES (10, 1), (5, 2), (20, 3), (1, 4); +SELECT a FROM desc_e2e_pk_nonclustered USE INDEX(`PRIMARY`) ORDER BY a DESC; +-- Expected: 20, 10, 5, 1 + +-- ============================================================================ +-- 4. String column DESC +-- ============================================================================ +CREATE TABLE desc_e2e_strings (id INT, name VARCHAR(32), INDEX idx_name (name DESC)); +INSERT INTO desc_e2e_strings VALUES (1,'apple'),(2,'banana'),(3,'cherry'),(4,'date'),(5,'elderberry'); + +EXPLAIN FORMAT='brief' SELECT name FROM desc_e2e_strings USE INDEX(idx_name) ORDER BY name DESC; +SELECT name FROM desc_e2e_strings USE INDEX(idx_name) ORDER BY name DESC; +-- Expected: elderberry, date, cherry, banana, apple + +SELECT id FROM desc_e2e_strings USE INDEX(idx_name) WHERE name = 'cherry'; +-- Expected: 3 + +-- ============================================================================ +-- 5. Expression index with DESC +-- ============================================================================ +CREATE TABLE desc_e2e_expression (a INT, b INT, INDEX idx_expr ((a + b) DESC)); +INSERT INTO desc_e2e_expression VALUES (1,10),(2,20),(3,5),(4,30),(5,15); + +SHOW CREATE TABLE desc_e2e_expression; +SELECT collation FROM information_schema.statistics WHERE table_name='desc_e2e_expression'; +-- Expected: D + +EXPLAIN FORMAT='brief' SELECT a, b FROM desc_e2e_expression USE INDEX(idx_expr) ORDER BY (a + b) DESC; +SELECT a, b FROM desc_e2e_expression USE INDEX(idx_expr) ORDER BY (a + b) DESC; +-- Expected: rows with a+b in descending order: (4,30)=34, (2,20)=22, (5,15)=20, (1,10)=11, (3,5)=8 + +-- ============================================================================ +-- 6. UPDATE / DELETE against DESC indexes +-- ============================================================================ +UPDATE desc_e2e_single SET a = a + 100 WHERE b = 20; +SELECT * FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b DESC; +-- Expected: row (102, 20) shows updated a + +DELETE FROM desc_e2e_single WHERE b = 5; +SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b DESC; +-- Expected: 30, 20, 15, 10 (5 is gone) + +-- ============================================================================ +-- 7. Sysvar create-time-only semantics +-- ============================================================================ +SET @@global.tidb_enable_descending_index = OFF; +-- Existing DESC tables continue to work after the flip. +SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b DESC; +-- Expected: 30, 20, 15, 10 + +-- New CREATE INDEX silently drops DESC when the gate is off. +CREATE TABLE desc_e2e_off_new (x INT, INDEX idx_x (x DESC)); +SELECT collation FROM information_schema.statistics WHERE table_name='desc_e2e_off_new'; +-- Expected: A (DESC was dropped) +DROP TABLE desc_e2e_off_new; + +-- ============================================================================ +-- 8. Cleanup +-- ============================================================================ +SET @@global.tidb_enable_descending_index = DEFAULT; +DROP TABLE IF EXISTS desc_e2e_single; +DROP TABLE IF EXISTS desc_e2e_mixed; +DROP TABLE IF EXISTS desc_e2e_pk_nonclustered; +DROP TABLE IF EXISTS desc_e2e_strings; +DROP TABLE IF EXISTS desc_e2e_expression; + +SELECT 'e2e test complete' AS status; diff --git a/tests/run_desc_index_e2e.sh b/tests/run_desc_index_e2e.sh new file mode 100755 index 0000000000000..cb17b697efee1 --- /dev/null +++ b/tests/run_desc_index_e2e.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# End-to-end smoke test runner for descending-order indexes +# (pingcap/tidb#2519). +# +# Boots a tiup playground cluster with caller-supplied TiDB and TiKV +# binaries, runs tests/desc_index_e2e.sql against it via the mysql +# client, and tears the cluster down on exit. Output is split into +# "playground.log" (TiDB / TiKV / PD logs) and "e2e.out" (SQL output +# you'll want to diff against the expectations annotated in the SQL +# file). +# +# Usage: +# TIDB_BIN=/path/to/bin/tidb-server \ +# TIKV_BIN=/path/to/target/release/tikv-server \ +# ./tests/run_desc_index_e2e.sh +# +# Optional environment overrides: +# SQL_FILE path to the SQL script (default: alongside this script) +# TIUP_TAG playground tag (default: desc-e2e) +# WAIT_SECS seconds to wait for TiDB to come up (default: 120) +# OUT_DIR where to write playground.log + e2e.out (default: mktemp) +# +# Requires: tiup, mysql client, plus the two binaries above. The TiKV +# binary must include the descending-order coprocessor changes from +# tikv/tikv#19558. + +set -euo pipefail + +: "${TIDB_BIN:?set TIDB_BIN to the path of the desc-index TiDB binary}" +: "${TIKV_BIN:?set TIKV_BIN to the path of the desc-index TiKV binary}" +SQL_FILE="${SQL_FILE:-$(dirname "$0")/desc_index_e2e.sql}" +TIUP_TAG="${TIUP_TAG:-desc-e2e}" +WAIT_SECS="${WAIT_SECS:-120}" +OUT_DIR="${OUT_DIR:-$(mktemp -d -t desc-e2e-XXXXXX)}" + +for f in "$TIDB_BIN" "$TIKV_BIN" "$SQL_FILE"; do + if [[ ! -f "$f" ]]; then + echo "missing: $f" >&2 + exit 1 + fi +done + +echo ">>> output directory: $OUT_DIR" +echo ">>> tidb-server: $TIDB_BIN" +echo ">>> tikv-server: $TIKV_BIN" +echo ">>> sql file: $SQL_FILE" + +# Kick off tiup playground; record its PID so we can tear down cleanly. +# Flag names: --db / --kv / --pd (not --tidb/--tikv); binpath flags +# follow the same convention. +tiup playground nightly \ + --kv 1 --db 1 --pd 1 \ + --kv.binpath "$TIKV_BIN" \ + --db.binpath "$TIDB_BIN" \ + --tag "$TIUP_TAG" \ + --without-monitor \ + > "$OUT_DIR/playground.log" 2>&1 & +PLAYGROUND_PID=$! + +cleanup() { + echo + echo ">>> tearing down playground (pid=$PLAYGROUND_PID)" + # tiup playground installs a signal handler that closes the children. + kill "$PLAYGROUND_PID" 2>/dev/null || true + wait "$PLAYGROUND_PID" 2>/dev/null || true + echo ">>> logs: $OUT_DIR/playground.log" + echo ">>> output: $OUT_DIR/e2e.out" +} +trap cleanup EXIT + +# Wait for TiDB to start accepting connections (port 4000). +echo ">>> waiting up to ${WAIT_SECS}s for TiDB to come up on 127.0.0.1:4000" +for _ in $(seq 1 "$WAIT_SECS"); do + if mysql -h 127.0.0.1 -P 4000 -u root -e "SELECT 1" >/dev/null 2>&1; then + echo ">>> TiDB is up" + break + fi + sleep 1 +done + +if ! mysql -h 127.0.0.1 -P 4000 -u root -e "SELECT 1" >/dev/null 2>&1; then + echo "TiDB did not come up within ${WAIT_SECS}s; see $OUT_DIR/playground.log" >&2 + exit 1 +fi + +# Run the SQL file. Use --table for human-readable output (with column +# headers), --comments so the section banners survive into the log, and +# --force so the deliberate error case in Section 3 (CLUSTERED PRIMARY +# KEY with DESC must be rejected) does not abort the rest of the script. +# Pipe via stdin rather than `-e "source $SQL_FILE"` because `source` +# runs in single-statement mode and silently ignores --force. Both +# stdout and stderr are tee'd so expected-error lines show up inline +# next to the matching SQL statement. +echo ">>> running $SQL_FILE" +mysql -h 127.0.0.1 -P 4000 -u root --table --comments --force \ + --default-character-set=utf8mb4 \ + < "$SQL_FILE" 2>&1 \ + | tee "$OUT_DIR/e2e.out" + +echo +echo ">>> e2e run finished — review $OUT_DIR/e2e.out for unexpected rows."