Skip to content

Commit db8a1d1

Browse files
committed
(GH-98) Benchmark several possible solutions
1 parent 7f631f6 commit db8a1d1

3 files changed

Lines changed: 317 additions & 2 deletions

File tree

Source/HttpMultipartParser.Benchmark/HttpMultipartParser.Benchmark.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
</PropertyGroup>
77

88
<ItemGroup>
9-
<PackageReference Include="BenchmarkDotNet" Version="0.12.1" />
9+
<PackageReference Include="BenchmarkDotNet" Version="0.13.0" />
1010
</ItemGroup>
1111

1212
<ItemGroup>

Source/HttpMultipartParser.Benchmark/Program.cs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,13 @@ static void Main(string[] args)
1212
// To debug
1313
// config = new DebugInProcessConfig();
1414

15-
BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args, config);
15+
var types = new[]
16+
{
17+
typeof(SubsequenceFinderBenchmark),
18+
typeof(MultipartFormDataParserBenchmark)
19+
};
20+
21+
BenchmarkSwitcher.FromTypes(types).Run(args, config);
1622
}
1723
}
1824
}
Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
using BenchmarkDotNet.Attributes;
2+
using System;
3+
using System.Collections.Generic;
4+
5+
namespace HttpMultipartParser.Benchmark
6+
{
7+
[MemoryDiagnoser]
8+
[HtmlExporter]
9+
[JsonExporter]
10+
[MarkdownExporter]
11+
public class SubsequenceFinderBenchmark
12+
{
13+
// This is the default size buffer size defined in StramingMultipartFormDataParser.cs
14+
private const int DefaultBufferSize = 4096;
15+
16+
private readonly byte[] haystack = new byte[DefaultBufferSize];
17+
private readonly byte[] needle = new byte[2];
18+
19+
public SubsequenceFinderBenchmark()
20+
{
21+
// Pupulate the byte arrays with random data
22+
var random = new Random();
23+
random.NextBytes(haystack);
24+
random.NextBytes(needle);
25+
}
26+
27+
[Benchmark(Baseline = true)]
28+
public int OldSearch()
29+
{
30+
return OldSearch(haystack, needle, haystack.Length);
31+
}
32+
33+
[Benchmark]
34+
public int FindSequence()
35+
{
36+
return FindSequence(haystack, needle);
37+
}
38+
39+
[Benchmark]
40+
public int Locate()
41+
{
42+
return Locate(haystack, needle);
43+
}
44+
45+
[Benchmark]
46+
public int BoyerMoore()
47+
{
48+
var searcher = new BoyerMooreClass(needle);
49+
return searcher.Search(haystack);
50+
}
51+
52+
[Benchmark]
53+
public int ByteSearch()
54+
{
55+
return ByteSearch(haystack, needle);
56+
}
57+
58+
// This is the logic that was in HttpMultipartParser until July 2021.
59+
// As explained in this [GitHub issue](https://github.com/Http-Multipart-Data-Parser/Http-Multipart-Data-Parser/issues/98),
60+
// it contains a bug when the haystack contains a subset of the neddle immediately followed by the needle.
61+
private static int OldSearch(byte[] haystack, byte[] needle, int haystackLength)
62+
{
63+
var charactersInNeedle = new HashSet<byte>(needle);
64+
65+
var length = needle.Length;
66+
var index = 0;
67+
while (index + length <= haystackLength)
68+
{
69+
// Worst case scenario: Go back to character-by-character parsing until we find a non-match
70+
// or we find the needle.
71+
if (charactersInNeedle.Contains(haystack[index + length - 1]))
72+
{
73+
var needleIndex = 0;
74+
while (haystack[index + needleIndex] == needle[needleIndex])
75+
{
76+
if (needleIndex == needle.Length - 1)
77+
{
78+
// Found our match!
79+
return index;
80+
}
81+
82+
needleIndex += 1;
83+
}
84+
85+
index += 1;
86+
index += needleIndex;
87+
continue;
88+
}
89+
90+
index += length;
91+
}
92+
93+
return -1;
94+
}
95+
96+
// From: https://stackoverflow.com/a/39021296/153084
97+
private static int FindSequence(byte[] haystack, byte[] needle)
98+
{
99+
int currentIndex = 0;
100+
int end = haystack.Length - needle.Length; // past here no match is possible
101+
byte firstByte = needle[0]; // cached to tell compiler there's no aliasing
102+
103+
while (currentIndex <= end)
104+
{
105+
// scan for first byte only. compiler-friendly.
106+
if (haystack[currentIndex] == firstByte)
107+
{
108+
// scan for rest of sequence
109+
for (int offset = 1; ; ++offset)
110+
{
111+
if (offset == needle.Length)
112+
{ // full sequence matched?
113+
return currentIndex;
114+
}
115+
else if (haystack[currentIndex + offset] != needle[offset])
116+
{
117+
break;
118+
}
119+
}
120+
}
121+
122+
++currentIndex;
123+
}
124+
125+
// end of array reached without match
126+
return -1;
127+
}
128+
129+
// This is the solution proposed by @succeun in the GitHub issue
130+
private static int Locate(byte[] self, byte[] candidate)
131+
{
132+
if (IsEmptyLocate(self, candidate))
133+
return -1;
134+
135+
for (int i = 0; i < self.Length; i++)
136+
{
137+
if (!IsMatch(self, i, candidate))
138+
{
139+
continue;
140+
}
141+
return i;
142+
}
143+
144+
return -1;
145+
}
146+
147+
private static bool IsMatch(byte[] array, int position, byte[] candidate)
148+
{
149+
if (candidate.Length > (array.Length - position))
150+
return false;
151+
152+
for (int i = 0; i < candidate.Length; i++)
153+
if (array[position + i] != candidate[i])
154+
return false;
155+
156+
return true;
157+
}
158+
159+
private static bool IsEmptyLocate(byte[] array, byte[] candidate)
160+
{
161+
return array == null
162+
|| candidate == null
163+
|| array.Length == 0
164+
|| candidate.Length == 0
165+
|| candidate.Length > array.Length;
166+
}
167+
168+
// From: https://stackoverflow.com/a/37500883/153084
169+
private sealed class BoyerMooreClass
170+
{
171+
readonly byte[] needle;
172+
readonly int[] charTable;
173+
readonly int[] offsetTable;
174+
175+
public BoyerMooreClass(byte[] needle)
176+
{
177+
this.needle = needle;
178+
this.charTable = makeByteTable(needle);
179+
this.offsetTable = makeOffsetTable(needle);
180+
}
181+
182+
public int Search(byte[] haystack)
183+
{
184+
if (needle.Length == 0)
185+
return -1;
186+
187+
for (int i = needle.Length - 1; i < haystack.Length;)
188+
{
189+
int j;
190+
191+
for (j = needle.Length - 1; needle[j] == haystack[i]; --i, --j)
192+
{
193+
if (j != 0)
194+
continue;
195+
196+
return i;
197+
}
198+
199+
i += Math.Max(offsetTable[needle.Length - 1 - j], charTable[haystack[i]]);
200+
}
201+
202+
return -1;
203+
}
204+
205+
static int[] makeByteTable(byte[] needle)
206+
{
207+
const int ALPHABET_SIZE = 256;
208+
int[] table = new int[ALPHABET_SIZE];
209+
210+
for (int i = 0; i < table.Length; ++i)
211+
table[i] = needle.Length;
212+
213+
for (int i = 0; i < needle.Length - 1; ++i)
214+
table[needle[i]] = needle.Length - 1 - i;
215+
216+
return table;
217+
}
218+
219+
static int[] makeOffsetTable(byte[] needle)
220+
{
221+
int[] table = new int[needle.Length];
222+
int lastPrefixPosition = needle.Length;
223+
224+
for (int i = needle.Length - 1; i >= 0; --i)
225+
{
226+
if (isPrefix(needle, i + 1))
227+
lastPrefixPosition = i + 1;
228+
229+
table[needle.Length - 1 - i] = lastPrefixPosition - i + needle.Length - 1;
230+
}
231+
232+
for (int i = 0; i < needle.Length - 1; ++i)
233+
{
234+
int slen = suffixLength(needle, i);
235+
table[slen] = needle.Length - 1 - i + slen;
236+
}
237+
238+
return table;
239+
}
240+
241+
static bool isPrefix(byte[] needle, int p)
242+
{
243+
for (int i = p, j = 0; i < needle.Length; ++i, ++j)
244+
if (needle[i] != needle[j])
245+
return false;
246+
247+
return true;
248+
}
249+
250+
static int suffixLength(byte[] needle, int p)
251+
{
252+
int len = 0;
253+
254+
for (int i = p, j = needle.Length - 1; i >= 0 && needle[i] == needle[j]; --i, --j)
255+
++len;
256+
257+
return len;
258+
}
259+
}
260+
261+
// From: https://boncode.blogspot.com/2011/02/net-c-find-pattern-in-byte-array.html
262+
private static int ByteSearch(byte[] searchIn, byte[] searchBytes, int start = 0)
263+
{
264+
int found = -1;
265+
bool matched = false;
266+
//only look at this if we have a populated search array and search bytes with a sensible start
267+
if (searchIn.Length > 0 && searchBytes.Length > 0 && start <= (searchIn.Length - searchBytes.Length) && searchIn.Length >= searchBytes.Length)
268+
{
269+
//iterate through the array to be searched
270+
for (int i = start; i <= searchIn.Length - searchBytes.Length; i++)
271+
{
272+
//if the start bytes match we will start comparing all other bytes
273+
if (searchIn[i] == searchBytes[0])
274+
{
275+
if (searchIn.Length > 1)
276+
{
277+
//multiple bytes to be searched we have to compare byte by byte
278+
matched = true;
279+
for (int y = 1; y <= searchBytes.Length - 1; y++)
280+
{
281+
if (searchIn[i + y] != searchBytes[y])
282+
{
283+
matched = false;
284+
break;
285+
}
286+
}
287+
//everything matched up
288+
if (matched)
289+
{
290+
found = i;
291+
break;
292+
}
293+
294+
}
295+
else
296+
{
297+
//search byte is only one bit nothing else to do
298+
found = i;
299+
break; //stop the loop
300+
}
301+
302+
}
303+
}
304+
305+
}
306+
return found;
307+
}
308+
}
309+
}

0 commit comments

Comments
 (0)