-
Notifications
You must be signed in to change notification settings - Fork 4k
Expand file tree
/
Copy pathsanitize.go
More file actions
56 lines (51 loc) · 1.32 KB
/
sanitize.go
File metadata and controls
56 lines (51 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
package sanitize
// FilterInvisibleCharacters removes invisible or control characters that should not appear
// in user-facing titles or bodies. This includes:
// - Unicode tag characters: U+E0001, U+E0020–U+E007F
// - BiDi control characters: U+202A–U+202E, U+2066–U+2069
// - Hidden modifier characters: U+200B, U+200C, U+200E, U+200F, U+00AD, U+FEFF, U+180E, U+2060–U+2064
func FilterInvisibleCharacters(input string) string {
if input == "" {
return input
}
// Filter runes
out := make([]rune, 0, len(input))
for _, r := range input {
if !shouldRemoveRune(r) {
out = append(out, r)
}
}
return string(out)
}
func shouldRemoveRune(r rune) bool {
switch r {
case 0x200B, // ZERO WIDTH SPACE
0x200C, // ZERO WIDTH NON-JOINER
0x200E, // LEFT-TO-RIGHT MARK
0x200F, // RIGHT-TO-LEFT MARK
0x00AD, // SOFT HYPHEN
0xFEFF, // ZERO WIDTH NO-BREAK SPACE
0x180E: // MONGOLIAN VOWEL SEPARATOR
return true
case 0xE0001: // TAG
return true
}
// Ranges
// Unicode tags: U+E0020–U+E007F
if r >= 0xE0020 && r <= 0xE007F {
return true
}
// BiDi controls: U+202A–U+202E
if r >= 0x202A && r <= 0x202E {
return true
}
// BiDi isolates: U+2066–U+2069
if r >= 0x2066 && r <= 0x2069 {
return true
}
// Hidden modifiers: U+2060–U+2064
if r >= 0x2060 && r <= 0x2064 {
return true
}
return false
}