|
24 | 24 | // </summary> |
25 | 25 | // -------------------------------------------------------------------------------------------------------------------- |
26 | 26 |
|
27 | | -using System.Collections.Generic; |
| 27 | +using System; |
28 | 28 |
|
29 | 29 | namespace HttpMultipartParser |
30 | 30 | { |
@@ -53,54 +53,50 @@ public static int Search(byte[] haystack, byte[] needle) |
53 | 53 | return Search(haystack, needle, haystack.Length); |
54 | 54 | } |
55 | 55 |
|
56 | | - /// <summary> |
57 | | - /// Finds if a sequence exists within another sequence. |
58 | | - /// </summary> |
59 | | - /// <param name="haystack"> |
60 | | - /// The sequence to search. |
61 | | - /// </param> |
62 | | - /// <param name="needle"> |
63 | | - /// The sequence to look for. |
64 | | - /// </param> |
65 | | - /// <param name="haystackLength"> |
66 | | - /// The length of the haystack to consider for searching. |
67 | | - /// </param> |
68 | | - /// <returns> |
69 | | - /// The start position of the found sequence or -1 if nothing was found. |
70 | | - /// </returns> |
| 56 | + /// <summary>Finds if a sequence exists within another sequence.</summary> |
| 57 | + /// <param name="haystack">The sequence to search.</param> |
| 58 | + /// <param name="needle">The sequence to look for.</param> |
| 59 | + /// <param name="haystackLength">The length of the haystack to consider for searching.</param> |
| 60 | + /// <returns>The start position of the found sequence or -1 if nothing was found.</returns> |
| 61 | + /// <remarks>Inspired by https://stackoverflow.com/a/39021296/153084 .</remarks> |
71 | 62 | public static int Search(byte[] haystack, byte[] needle, int haystackLength) |
72 | 63 | { |
73 | | - var charactersInNeedle = new HashSet<byte>(needle); |
| 64 | + const int SEQUENCE_NOT_FOUND = -1; |
| 65 | + |
| 66 | + // Validate the parameters |
| 67 | + if (haystack == null || haystack.Length == 0) return SEQUENCE_NOT_FOUND; |
| 68 | + if (needle == null || needle.Length == 0) return SEQUENCE_NOT_FOUND; |
| 69 | + if (needle.Length > haystack.Length) return SEQUENCE_NOT_FOUND; |
| 70 | + if (haystackLength > haystack.Length || haystackLength < 1) throw new ArgumentException("Length must be between 1 and the length of the haystack."); |
74 | 71 |
|
75 | | - var length = needle.Length; |
76 | | - var index = 0; |
77 | | - while (index + length <= haystackLength) |
| 72 | + int currentIndex = 0; |
| 73 | + int end = haystackLength - needle.Length; // past here no match is possible |
| 74 | + byte firstByte = needle[0]; // cached to tell compiler there's no aliasing |
| 75 | + |
| 76 | + while (currentIndex <= end) |
78 | 77 | { |
79 | | - // Worst case scenario: Go back to character-by-character parsing until we find a non-match |
80 | | - // or we find the needle. |
81 | | - if (charactersInNeedle.Contains(haystack[index + length - 1])) |
| 78 | + // scan for first byte only. compiler-friendly. |
| 79 | + if (haystack[currentIndex] == firstByte) |
82 | 80 | { |
83 | | - var needleIndex = 0; |
84 | | - while (haystack[index + needleIndex] == needle[needleIndex]) |
| 81 | + // scan for rest of sequence |
| 82 | + for (int offset = 1; ; ++offset) |
85 | 83 | { |
86 | | - if (needleIndex == needle.Length - 1) |
| 84 | + if (offset == needle.Length) |
| 85 | + { // full sequence matched? |
| 86 | + return currentIndex; |
| 87 | + } |
| 88 | + else if (haystack[currentIndex + offset] != needle[offset]) |
87 | 89 | { |
88 | | - // Found our match! |
89 | | - return index; |
| 90 | + break; |
90 | 91 | } |
91 | | - |
92 | | - needleIndex += 1; |
93 | 92 | } |
94 | | - |
95 | | - index += 1; |
96 | | - index += needleIndex; |
97 | | - continue; |
98 | 93 | } |
99 | 94 |
|
100 | | - index += length; |
| 95 | + ++currentIndex; |
101 | 96 | } |
102 | 97 |
|
103 | | - return -1; |
| 98 | + // end of array reached without match |
| 99 | + return SEQUENCE_NOT_FOUND; |
104 | 100 | } |
105 | 101 |
|
106 | 102 | #endregion |
|
0 commit comments