Skip to content

Commit fd3d463

Browse files
committed
Replace the logic of the SubsequenceFinder.Search method with the FindSequence algorithm
Resolves #98
1 parent 225eea0 commit fd3d463

1 file changed

Lines changed: 32 additions & 36 deletions

File tree

Source/HttpMultipartParser/SubsequenceFinder.cs

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
// </summary>
2525
// --------------------------------------------------------------------------------------------------------------------
2626

27-
using System.Collections.Generic;
27+
using System;
2828

2929
namespace HttpMultipartParser
3030
{
@@ -53,54 +53,50 @@ public static int Search(byte[] haystack, byte[] needle)
5353
return Search(haystack, needle, haystack.Length);
5454
}
5555

56-
/// <summary>
57-
/// Finds if a sequence exists within another sequence.
58-
/// </summary>
59-
/// <param name="haystack">
60-
/// The sequence to search.
61-
/// </param>
62-
/// <param name="needle">
63-
/// The sequence to look for.
64-
/// </param>
65-
/// <param name="haystackLength">
66-
/// The length of the haystack to consider for searching.
67-
/// </param>
68-
/// <returns>
69-
/// The start position of the found sequence or -1 if nothing was found.
70-
/// </returns>
56+
/// <summary>Finds if a sequence exists within another sequence.</summary>
57+
/// <param name="haystack">The sequence to search.</param>
58+
/// <param name="needle">The sequence to look for.</param>
59+
/// <param name="haystackLength">The length of the haystack to consider for searching.</param>
60+
/// <returns>The start position of the found sequence or -1 if nothing was found.</returns>
61+
/// <remarks>Inspired by https://stackoverflow.com/a/39021296/153084 .</remarks>
7162
public static int Search(byte[] haystack, byte[] needle, int haystackLength)
7263
{
73-
var charactersInNeedle = new HashSet<byte>(needle);
64+
const int SEQUENCE_NOT_FOUND = -1;
65+
66+
// Validate the parameters
67+
if (haystack == null || haystack.Length == 0) return SEQUENCE_NOT_FOUND;
68+
if (needle == null || needle.Length == 0) return SEQUENCE_NOT_FOUND;
69+
if (needle.Length > haystack.Length) return SEQUENCE_NOT_FOUND;
70+
if (haystackLength > haystack.Length || haystackLength < 1) throw new ArgumentException("Length must be between 1 and the length of the haystack.");
7471

75-
var length = needle.Length;
76-
var index = 0;
77-
while (index + length <= haystackLength)
72+
int currentIndex = 0;
73+
int end = haystackLength - needle.Length; // past here no match is possible
74+
byte firstByte = needle[0]; // cached to tell compiler there's no aliasing
75+
76+
while (currentIndex <= end)
7877
{
79-
// Worst case scenario: Go back to character-by-character parsing until we find a non-match
80-
// or we find the needle.
81-
if (charactersInNeedle.Contains(haystack[index + length - 1]))
78+
// scan for first byte only. compiler-friendly.
79+
if (haystack[currentIndex] == firstByte)
8280
{
83-
var needleIndex = 0;
84-
while (haystack[index + needleIndex] == needle[needleIndex])
81+
// scan for rest of sequence
82+
for (int offset = 1; ; ++offset)
8583
{
86-
if (needleIndex == needle.Length - 1)
84+
if (offset == needle.Length)
85+
{ // full sequence matched?
86+
return currentIndex;
87+
}
88+
else if (haystack[currentIndex + offset] != needle[offset])
8789
{
88-
// Found our match!
89-
return index;
90+
break;
9091
}
91-
92-
needleIndex += 1;
9392
}
94-
95-
index += 1;
96-
index += needleIndex;
97-
continue;
9893
}
9994

100-
index += length;
95+
++currentIndex;
10196
}
10297

103-
return -1;
98+
// end of array reached without match
99+
return SEQUENCE_NOT_FOUND;
104100
}
105101

106102
#endregion

0 commit comments

Comments
 (0)