Skip to content

Commit e7e34e2

Browse files
committed
Add failing test for retry + abort issue
1 parent 8f274f7 commit e7e34e2

1 file changed

Lines changed: 99 additions & 0 deletions

File tree

go/logic/migrator_test.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,3 +1210,102 @@ func TestCheckAbort_DetectsContextCancellation(t *testing.T) {
12101210
t.Fatal("Expected checkAbort to return error when context is cancelled")
12111211
}
12121212
}
1213+
1214+
func (suite *MigratorTestSuite) TestPanicOnWarningsDuplicateDuringCutoverWithHighRetries() {
1215+
ctx := context.Background()
1216+
1217+
// Create table with email column (no unique constraint initially)
1218+
_, err := suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY AUTO_INCREMENT, email VARCHAR(100))", getTestTableName()))
1219+
suite.Require().NoError(err)
1220+
1221+
// Insert initial rows with unique email values - passes pre-flight validation
1222+
_, err = suite.db.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s (email) VALUES ('user1@example.com')", getTestTableName()))
1223+
suite.Require().NoError(err)
1224+
_, err = suite.db.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s (email) VALUES ('user2@example.com')", getTestTableName()))
1225+
suite.Require().NoError(err)
1226+
_, err = suite.db.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s (email) VALUES ('user3@example.com')", getTestTableName()))
1227+
suite.Require().NoError(err)
1228+
1229+
// Verify we have 3 rows
1230+
var count int
1231+
err = suite.db.QueryRowContext(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s", getTestTableName())).Scan(&count)
1232+
suite.Require().NoError(err)
1233+
suite.Require().Equal(3, count)
1234+
1235+
// Create postpone flag file
1236+
tmpDir, err := os.MkdirTemp("", "gh-ost-postpone-test")
1237+
suite.Require().NoError(err)
1238+
defer os.RemoveAll(tmpDir)
1239+
postponeFlagFile := filepath.Join(tmpDir, "postpone.flag")
1240+
err = os.WriteFile(postponeFlagFile, []byte{}, 0644)
1241+
suite.Require().NoError(err)
1242+
1243+
// Start migration in goroutine
1244+
done := make(chan error, 1)
1245+
go func() {
1246+
connectionConfig, err := getTestConnectionConfig(ctx, suite.mysqlContainer)
1247+
if err != nil {
1248+
done <- err
1249+
return
1250+
}
1251+
1252+
migrationContext := newTestMigrationContext()
1253+
migrationContext.ApplierConnectionConfig = connectionConfig
1254+
migrationContext.InspectorConnectionConfig = connectionConfig
1255+
migrationContext.SetConnectionConfig("innodb")
1256+
migrationContext.AlterStatementOptions = "ADD UNIQUE KEY unique_email_idx (email)"
1257+
migrationContext.HeartbeatIntervalMilliseconds = 100
1258+
migrationContext.PostponeCutOverFlagFile = postponeFlagFile
1259+
migrationContext.PanicOnWarnings = true
1260+
1261+
// High retry count + exponential backoff means retries will take a long time and fail the test if not properly aborted
1262+
migrationContext.SetDefaultNumRetries(30)
1263+
migrationContext.CutOverExponentialBackoff = true
1264+
migrationContext.SetExponentialBackoffMaxInterval(128)
1265+
1266+
migrator := NewMigrator(migrationContext, "0.0.0")
1267+
1268+
//nolint:contextcheck
1269+
done <- migrator.Migrate()
1270+
}()
1271+
1272+
// Wait for migration to reach postponed state
1273+
// TODO replace this with an actual check for postponed state
1274+
time.Sleep(3 * time.Second)
1275+
1276+
// Now insert a duplicate email value while migration is postponed
1277+
// This simulates data arriving during migration that would violate the unique constraint
1278+
_, err = suite.db.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s (email) VALUES ('user1@example.com')", getTestTableName()))
1279+
suite.Require().NoError(err)
1280+
1281+
// Verify we now have 4 rows (including the duplicate)
1282+
err = suite.db.QueryRowContext(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s", getTestTableName())).Scan(&count)
1283+
suite.Require().NoError(err)
1284+
suite.Require().Equal(4, count)
1285+
1286+
// Unpostpone the migration - gh-ost will now try to apply binlog events with the duplicate
1287+
err = os.Remove(postponeFlagFile)
1288+
suite.Require().NoError(err)
1289+
1290+
// Wait for Migrate() to return - with timeout to detect if it hangs
1291+
select {
1292+
case migrateErr := <-done:
1293+
// Success - Migrate() returned
1294+
// It should return an error due to the duplicate
1295+
suite.Require().Error(migrateErr, "Expected migration to fail due to duplicate key violation")
1296+
suite.Require().Contains(migrateErr.Error(), "Duplicate entry", "Error should mention duplicate entry")
1297+
case <-time.After(5 * time.Minute):
1298+
suite.FailNow("Migrate() hung and did not return within 5 minutes - failure to abort on warnings in retry loop")
1299+
}
1300+
1301+
// Verify all 4 rows are still in the original table (no silent data loss)
1302+
err = suite.db.QueryRowContext(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s", getTestTableName())).Scan(&count)
1303+
suite.Require().NoError(err)
1304+
suite.Require().Equal(4, count, "Original table should still have all 4 rows")
1305+
1306+
// Verify both user1@example.com entries still exist
1307+
var duplicateCount int
1308+
err = suite.db.QueryRowContext(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s WHERE email = 'user1@example.com'", getTestTableName())).Scan(&duplicateCount)
1309+
suite.Require().NoError(err)
1310+
suite.Require().Equal(2, duplicateCount, "Should have 2 duplicate email entries")
1311+
}

0 commit comments

Comments
 (0)