@@ -1210,3 +1210,102 @@ func TestCheckAbort_DetectsContextCancellation(t *testing.T) {
12101210 t .Fatal ("Expected checkAbort to return error when context is cancelled" )
12111211 }
12121212}
1213+
1214+ func (suite * MigratorTestSuite ) TestPanicOnWarningsDuplicateDuringCutoverWithHighRetries () {
1215+ ctx := context .Background ()
1216+
1217+ // Create table with email column (no unique constraint initially)
1218+ _ , err := suite .db .ExecContext (ctx , fmt .Sprintf ("CREATE TABLE %s (id INT PRIMARY KEY AUTO_INCREMENT, email VARCHAR(100))" , getTestTableName ()))
1219+ suite .Require ().NoError (err )
1220+
1221+ // Insert initial rows with unique email values - passes pre-flight validation
1222+ _ , err = suite .db .ExecContext (ctx , fmt .Sprintf ("INSERT INTO %s (email) VALUES ('user1@example.com')" , getTestTableName ()))
1223+ suite .Require ().NoError (err )
1224+ _ , err = suite .db .ExecContext (ctx , fmt .Sprintf ("INSERT INTO %s (email) VALUES ('user2@example.com')" , getTestTableName ()))
1225+ suite .Require ().NoError (err )
1226+ _ , err = suite .db .ExecContext (ctx , fmt .Sprintf ("INSERT INTO %s (email) VALUES ('user3@example.com')" , getTestTableName ()))
1227+ suite .Require ().NoError (err )
1228+
1229+ // Verify we have 3 rows
1230+ var count int
1231+ err = suite .db .QueryRowContext (ctx , fmt .Sprintf ("SELECT COUNT(*) FROM %s" , getTestTableName ())).Scan (& count )
1232+ suite .Require ().NoError (err )
1233+ suite .Require ().Equal (3 , count )
1234+
1235+ // Create postpone flag file
1236+ tmpDir , err := os .MkdirTemp ("" , "gh-ost-postpone-test" )
1237+ suite .Require ().NoError (err )
1238+ defer os .RemoveAll (tmpDir )
1239+ postponeFlagFile := filepath .Join (tmpDir , "postpone.flag" )
1240+ err = os .WriteFile (postponeFlagFile , []byte {}, 0644 )
1241+ suite .Require ().NoError (err )
1242+
1243+ // Start migration in goroutine
1244+ done := make (chan error , 1 )
1245+ go func () {
1246+ connectionConfig , err := getTestConnectionConfig (ctx , suite .mysqlContainer )
1247+ if err != nil {
1248+ done <- err
1249+ return
1250+ }
1251+
1252+ migrationContext := newTestMigrationContext ()
1253+ migrationContext .ApplierConnectionConfig = connectionConfig
1254+ migrationContext .InspectorConnectionConfig = connectionConfig
1255+ migrationContext .SetConnectionConfig ("innodb" )
1256+ migrationContext .AlterStatementOptions = "ADD UNIQUE KEY unique_email_idx (email)"
1257+ migrationContext .HeartbeatIntervalMilliseconds = 100
1258+ migrationContext .PostponeCutOverFlagFile = postponeFlagFile
1259+ migrationContext .PanicOnWarnings = true
1260+
1261+ // High retry count + exponential backoff means retries will take a long time and fail the test if not properly aborted
1262+ migrationContext .SetDefaultNumRetries (30 )
1263+ migrationContext .CutOverExponentialBackoff = true
1264+ migrationContext .SetExponentialBackoffMaxInterval (128 )
1265+
1266+ migrator := NewMigrator (migrationContext , "0.0.0" )
1267+
1268+ //nolint:contextcheck
1269+ done <- migrator .Migrate ()
1270+ }()
1271+
1272+ // Wait for migration to reach postponed state
1273+ // TODO replace this with an actual check for postponed state
1274+ time .Sleep (3 * time .Second )
1275+
1276+ // Now insert a duplicate email value while migration is postponed
1277+ // This simulates data arriving during migration that would violate the unique constraint
1278+ _ , err = suite .db .ExecContext (ctx , fmt .Sprintf ("INSERT INTO %s (email) VALUES ('user1@example.com')" , getTestTableName ()))
1279+ suite .Require ().NoError (err )
1280+
1281+ // Verify we now have 4 rows (including the duplicate)
1282+ err = suite .db .QueryRowContext (ctx , fmt .Sprintf ("SELECT COUNT(*) FROM %s" , getTestTableName ())).Scan (& count )
1283+ suite .Require ().NoError (err )
1284+ suite .Require ().Equal (4 , count )
1285+
1286+ // Unpostpone the migration - gh-ost will now try to apply binlog events with the duplicate
1287+ err = os .Remove (postponeFlagFile )
1288+ suite .Require ().NoError (err )
1289+
1290+ // Wait for Migrate() to return - with timeout to detect if it hangs
1291+ select {
1292+ case migrateErr := <- done :
1293+ // Success - Migrate() returned
1294+ // It should return an error due to the duplicate
1295+ suite .Require ().Error (migrateErr , "Expected migration to fail due to duplicate key violation" )
1296+ suite .Require ().Contains (migrateErr .Error (), "Duplicate entry" , "Error should mention duplicate entry" )
1297+ case <- time .After (5 * time .Minute ):
1298+ suite .FailNow ("Migrate() hung and did not return within 5 minutes - failure to abort on warnings in retry loop" )
1299+ }
1300+
1301+ // Verify all 4 rows are still in the original table (no silent data loss)
1302+ err = suite .db .QueryRowContext (ctx , fmt .Sprintf ("SELECT COUNT(*) FROM %s" , getTestTableName ())).Scan (& count )
1303+ suite .Require ().NoError (err )
1304+ suite .Require ().Equal (4 , count , "Original table should still have all 4 rows" )
1305+
1306+ // Verify both user1@example.com entries still exist
1307+ var duplicateCount int
1308+ err = suite .db .QueryRowContext (ctx , fmt .Sprintf ("SELECT COUNT(*) FROM %s WHERE email = 'user1@example.com'" , getTestTableName ())).Scan (& duplicateCount )
1309+ suite .Require ().NoError (err )
1310+ suite .Require ().Equal (2 , duplicateCount , "Should have 2 duplicate email entries" )
1311+ }
0 commit comments