Skip to content

Commit 4a88939

Browse files
committed
Simplify the new BOM filtering logic
1 parent 1ff08fc commit 4a88939

2 files changed

Lines changed: 54 additions & 39 deletions

File tree

Source/HttpMultipartParser/Extensions.cs

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -49,24 +49,5 @@ public static IEnumerable<string> GetParameterValues(this IMultipartFormDataPars
4949
.Where(p => p.Name.Equals(name, comparisonType))
5050
.Select(p => p.Data);
5151
}
52-
53-
/// <summary>
54-
/// Determines if the source byte array starts with the specified pattern.
55-
/// </summary>
56-
/// <param name="src">The source byte array.</param>
57-
/// <param name="pattern">The pattern.</param>
58-
/// <returns>True if the source byte array starts with the specified pattern, false otherwise.</returns>
59-
internal static bool StartsWith(this byte[] src, byte[] pattern)
60-
{
61-
if (src == null || pattern == null) return false;
62-
if (src.Length < pattern.Length) return false;
63-
64-
for (int i = 0; i < pattern.Length - 1; i++)
65-
{
66-
if (src[i] != pattern[i]) return false;
67-
}
68-
69-
return true;
70-
}
7152
}
7253
}

Source/HttpMultipartParser/RebufferableBinaryReader.cs

Lines changed: 54 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
// --------------------------------------------------------------------------------------------------------------------
2727

2828
using System.IO;
29-
using System.Linq;
3029
using System.Text;
3130
using System.Threading;
3231
using System.Threading.Tasks;
@@ -63,9 +62,9 @@ public class RebufferableBinaryReader
6362
private readonly BinaryStreamStack streamStack;
6463

6564
/// <summary>
66-
/// The BOM (AKA preamble) for the encoding.
65+
/// Counts the number of chunks read from the underlying stream.
6766
/// </summary>
68-
private readonly byte[] preamble;
67+
private int processedChunkCounter;
6968

7069
#endregion
7170

@@ -101,7 +100,7 @@ public RebufferableBinaryReader(Stream input, Encoding encoding, int bufferSize
101100
streamStack = new BinaryStreamStack(encoding);
102101
this.encoding = encoding;
103102
this.bufferSize = bufferSize;
104-
this.preamble = encoding.GetPreamble();
103+
processedChunkCounter = 0;
105104
}
106105

107106
#endregion
@@ -291,10 +290,7 @@ public byte[] ReadByteLine()
291290
public string ReadLine()
292291
{
293292
byte[] data = ReadByteLine();
294-
295-
if (data == null) return null;
296-
else if (data.StartsWith(preamble)) return encoding.GetString(data.Skip(preamble.Length).ToArray());
297-
else return encoding.GetString(data);
293+
return data == null ? null : encoding.GetString(data);
298294
}
299295

300296
/// <summary>
@@ -457,16 +453,38 @@ public async Task<byte[]> ReadByteLineAsync(CancellationToken cancellationToken
457453
public async Task<string> ReadLineAsync(CancellationToken cancellationToken = default)
458454
{
459455
byte[] data = await ReadByteLineAsync(cancellationToken).ConfigureAwait(false);
460-
461-
if (data == null) return null;
462-
else if (data.StartsWith(preamble)) return encoding.GetString(data.Skip(preamble.Length).ToArray());
463-
else return encoding.GetString(data);
456+
return data == null ? null : encoding.GetString(data);
464457
}
465458

466459
#endregion
467460

468461
#region Methods
469462

463+
/// <summary>
464+
/// Determines the byte order marking offset (if any) from the
465+
/// given buffer.
466+
/// </summary>
467+
/// <param name="buffer">
468+
/// The buffer to examine.
469+
/// </param>
470+
/// <returns>
471+
/// The <see cref="int" /> representing the length of the byte order marking.
472+
/// </returns>
473+
private int GetBomOffset(byte[] buffer)
474+
{
475+
byte[] bom = encoding.GetPreamble();
476+
bool usesBom = true;
477+
for (int i = 0; i < bom.Length; ++i)
478+
{
479+
if (bom[i] != buffer[i])
480+
{
481+
usesBom = false;
482+
}
483+
}
484+
485+
return usesBom ? bom.Length : 0;
486+
}
487+
470488
/// <summary>
471489
/// Reads more data from the stream into the stream stack.
472490
/// </summary>
@@ -518,14 +536,30 @@ private async Task<int> StreamDataAsync(CancellationToken cancellationToken = de
518536
private void PushToStack(byte[] buffer, int amountRead)
519537
{
520538
/*
521-
The logic in this method until August 2025 would eliminate the BOM (also called the encoding preamble).
522-
However, it's important to preserve the BOM when the data is binary, such as the content of a file.
523-
That's why we no longer eliminate the BOM in this method.
524-
The BOM is now eliminated in `ReadLine` and `ReadLineAsync` because we want to get rid of it
525-
when processing string data.
526-
*/
527-
528-
if (amountRead > 0) streamStack.Push(buffer, 0, amountRead);
539+
The logic in this method until August 2025 would eliminate the BOM (also called the encoding preamble)
540+
if it was present at the begining of each and every buffer read from the stream.
541+
542+
However, we only need to remove the BOM if present at the very begining of the stream.
543+
In other words: only remove the BOM from the first chunk.
544+
*/
545+
546+
if (amountRead > 0)
547+
{
548+
if (processedChunkCounter == 0)
549+
{
550+
int bomOffset = GetBomOffset(buffer);
551+
if (amountRead - bomOffset > 0)
552+
{
553+
streamStack.Push(buffer, bomOffset, amountRead - bomOffset);
554+
}
555+
}
556+
else
557+
{
558+
streamStack.Push(buffer, 0, amountRead);
559+
}
560+
561+
processedChunkCounter++;
562+
}
529563
}
530564

531565
#endregion

0 commit comments

Comments
 (0)