diff --git a/External/diffmatchpatch/DiffMatchPatchCFUtilities.c b/External/diffmatchpatch/DiffMatchPatchCFUtilities.c index 588f3d78..d4515b72 100755 --- a/External/diffmatchpatch/DiffMatchPatchCFUtilities.c +++ b/External/diffmatchpatch/DiffMatchPatchCFUtilities.c @@ -112,6 +112,9 @@ CFIndex diff_commonPrefix(CFStringRef text1, CFStringRef text2) { char2 = CFStringGetCharacterFromInlineBuffer(&text2_inlineBuffer, i); if (char1 != char2) { + if ( CFStringIsSurrogateLowCharacter(char1) || CFStringIsSurrogateHighCharacter(char1) ) { + i = MAX(i - 1, 0); + } return i; } } @@ -142,7 +145,11 @@ CFIndex diff_commonSuffix(CFStringRef text1, CFStringRef text2) { char2 = CFStringGetCharacterFromInlineBuffer(&text2_inlineBuffer, (text2_length - i)); if (char1 != char2) { - return i - 1; + if ( CFStringIsSurrogateLowCharacter(char1) || CFStringIsSurrogateHighCharacter(char1) ) { + return MIN(i - 2, 0); + } + + return i - 1; } } return n; @@ -652,6 +659,10 @@ CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two) { CFStringGetCharacterAtIndex(one, (CFStringGetLength(one) - 1)); UniChar char2 = CFStringGetCharacterAtIndex(two, 0); + Boolean char1IsSurrogate = + CFStringIsSurrogateLowCharacter(char1) || CFStringIsSurrogateHighCharacter(char1); + Boolean char2IsSurrogate = + CFStringIsSurrogateLowCharacter(char2) || CFStringIsSurrogateHighCharacter(char1); Boolean nonAlphaNumeric1 = !CFCharacterSetIsCharacterMember(alphaNumericSet, char1); Boolean nonAlphaNumeric2 = @@ -668,7 +679,7 @@ CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two) { lineBreak1 && diff_regExMatch(one, &blankLineEndRegEx); Boolean blankLine2 = lineBreak2 && diff_regExMatch(two, &blankLineStartRegEx); - + if (blankLine1 || blankLine2) { // Five points for blank lines. return 5; @@ -681,7 +692,7 @@ CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two) { } else if (whitespace1 || whitespace2) { // Two points for whitespace. return 2; - } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + } else if ((nonAlphaNumeric1 && !char1IsSurrogate) || (nonAlphaNumeric2 && !char2IsSurrogate)) { // One point for non-alphanumeric. return 1; } diff --git a/SimperiumTests/DiffMatchPatchTest.m b/SimperiumTests/DiffMatchPatchTest.m index fbc325c2..abf42a2f 100755 --- a/SimperiumTests/DiffMatchPatchTest.m +++ b/SimperiumTests/DiffMatchPatchTest.m @@ -48,6 +48,17 @@ - (void)testDiffCommonPrefixTest { XCTAssertEqual((NSUInteger)4, [dmp diff_commonPrefixOfFirstString:@"1234" andSecondString:@"1234xyz"], @"Common suffix whole case failed."); } +- (void)testDiffCommonPrefixDoesntSplitSurrogatePairs { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + NSString *pristine = @"β˜ΊοΈπŸ––πŸΏ"; + NSString *edited = @"β˜ΊοΈπŸ˜ƒπŸ––πŸΏ"; + NSString *common = @"☺️"; + + NSUInteger prefix = [dmp diff_commonPrefixOfFirstString:pristine andSecondString:edited]; + XCTAssertEqual(prefix, common.length, @"Common Prefix should match the common emoji's length"); +} + - (void)testDiffCommonSuffixTest { DiffMatchPatch *dmp = [DiffMatchPatch new]; @@ -62,6 +73,19 @@ - (void)testDiffCommonSuffixTest { XCTAssertEqual((NSUInteger)4, [dmp diff_commonSuffixOfFirstString:@"1234" andSecondString:@"xyz1234"], @"Detect any common suffix. Whole case."); } +- (void)testDiffCommonSuffixDoesntSplitSurrogatePairs { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + NSString *pristine = @"β˜ΊοΈπŸ––πŸΏ"; + NSString *edited = @"β˜ΊοΈπŸ˜ƒπŸ––πŸΏ"; + NSString *expected = @"πŸ––πŸΏ"; + + NSUInteger suffix = [dmp diff_commonSuffixOfFirstString:pristine andSecondString:edited]; + NSString *common = [pristine substringFromIndex:pristine.length - suffix]; + + XCTAssertEqualObjects(expected, common, @"Common Suffix should match the last emoji"); +} + - (void)testDiffCommonOverlapTest { DiffMatchPatch *dmp = [DiffMatchPatch new]; @@ -388,6 +412,21 @@ - (void)testDiffCleanupSemanticLosslessTest { XCTAssertEqualObjects(expectedResult, diffs, @"Sentence boundaries."); } +- (void)testDiffCleanupSemanticLosslessDoesNotTamperWithSurrogatePairsTest { + NSArray *expected = @[ + [Diff diffWithOperation:DIFF_EQUAL andText:@"☺️"], + [Diff diffWithOperation:DIFF_INSERT andText:@"πŸ˜ƒ"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"πŸ––πŸΏ"] + ]; + + NSMutableArray *diffs = [expected mutableCopy]; + + DiffMatchPatch *dmp = [DiffMatchPatch new]; + [dmp diff_cleanupSemanticLossless:diffs]; + + XCTAssertEqualObjects(diffs, expected, @"The result should match the input!"); +} + - (void)testDiffCleanupSemanticTest { DiffMatchPatch *dmp = [DiffMatchPatch new]; NSMutableArray *expectedResult = nil;