|
26 | 26 | // -------------------------------------------------------------------------------------------------------------------- |
27 | 27 |
|
28 | 28 | using System.IO; |
29 | | -using System.Linq; |
30 | 29 | using System.Text; |
31 | 30 | using System.Threading; |
32 | 31 | using System.Threading.Tasks; |
@@ -63,9 +62,9 @@ public class RebufferableBinaryReader |
63 | 62 | private readonly BinaryStreamStack streamStack; |
64 | 63 |
|
65 | 64 | /// <summary> |
66 | | - /// The BOM (AKA preamble) for the encoding. |
| 65 | + /// Counts the number of chunks read from the underlying stream. |
67 | 66 | /// </summary> |
68 | | - private readonly byte[] preamble; |
| 67 | + private int processedChunkCounter; |
69 | 68 |
|
70 | 69 | #endregion |
71 | 70 |
|
@@ -101,7 +100,7 @@ public RebufferableBinaryReader(Stream input, Encoding encoding, int bufferSize |
101 | 100 | streamStack = new BinaryStreamStack(encoding); |
102 | 101 | this.encoding = encoding; |
103 | 102 | this.bufferSize = bufferSize; |
104 | | - this.preamble = encoding.GetPreamble(); |
| 103 | + processedChunkCounter = 0; |
105 | 104 | } |
106 | 105 |
|
107 | 106 | #endregion |
@@ -291,10 +290,7 @@ public byte[] ReadByteLine() |
291 | 290 | public string ReadLine() |
292 | 291 | { |
293 | 292 | byte[] data = ReadByteLine(); |
294 | | - |
295 | | - if (data == null) return null; |
296 | | - else if (data.StartsWith(preamble)) return encoding.GetString(data.Skip(preamble.Length).ToArray()); |
297 | | - else return encoding.GetString(data); |
| 293 | + return data == null ? null : encoding.GetString(data); |
298 | 294 | } |
299 | 295 |
|
300 | 296 | /// <summary> |
@@ -457,16 +453,38 @@ public async Task<byte[]> ReadByteLineAsync(CancellationToken cancellationToken |
457 | 453 | public async Task<string> ReadLineAsync(CancellationToken cancellationToken = default) |
458 | 454 | { |
459 | 455 | byte[] data = await ReadByteLineAsync(cancellationToken).ConfigureAwait(false); |
460 | | - |
461 | | - if (data == null) return null; |
462 | | - else if (data.StartsWith(preamble)) return encoding.GetString(data.Skip(preamble.Length).ToArray()); |
463 | | - else return encoding.GetString(data); |
| 456 | + return data == null ? null : encoding.GetString(data); |
464 | 457 | } |
465 | 458 |
|
466 | 459 | #endregion |
467 | 460 |
|
468 | 461 | #region Methods |
469 | 462 |
|
| 463 | + /// <summary> |
| 464 | + /// Determines the byte order marking offset (if any) from the |
| 465 | + /// given buffer. |
| 466 | + /// </summary> |
| 467 | + /// <param name="buffer"> |
| 468 | + /// The buffer to examine. |
| 469 | + /// </param> |
| 470 | + /// <returns> |
| 471 | + /// The <see cref="int" /> representing the length of the byte order marking. |
| 472 | + /// </returns> |
| 473 | + private int GetBomOffset(byte[] buffer) |
| 474 | + { |
| 475 | + byte[] bom = encoding.GetPreamble(); |
| 476 | + bool usesBom = true; |
| 477 | + for (int i = 0; i < bom.Length; ++i) |
| 478 | + { |
| 479 | + if (bom[i] != buffer[i]) |
| 480 | + { |
| 481 | + usesBom = false; |
| 482 | + } |
| 483 | + } |
| 484 | + |
| 485 | + return usesBom ? bom.Length : 0; |
| 486 | + } |
| 487 | + |
470 | 488 | /// <summary> |
471 | 489 | /// Reads more data from the stream into the stream stack. |
472 | 490 | /// </summary> |
@@ -518,14 +536,30 @@ private async Task<int> StreamDataAsync(CancellationToken cancellationToken = de |
518 | 536 | private void PushToStack(byte[] buffer, int amountRead) |
519 | 537 | { |
520 | 538 | /* |
521 | | - The logic in this method until August 2025 would eliminate the BOM (also called the encoding preamble). |
522 | | - However, it's important to preserve the BOM when the data is binary, such as the content of a file. |
523 | | - That's why we no longer eliminate the BOM in this method. |
524 | | - The BOM is now eliminated in `ReadLine` and `ReadLineAsync` because we want to get rid of it |
525 | | - when processing string data. |
526 | | - */ |
527 | | - |
528 | | - if (amountRead > 0) streamStack.Push(buffer, 0, amountRead); |
| 539 | + The logic in this method until August 2025 would eliminate the BOM (also called the encoding preamble) |
| 540 | + if it was present at the begining of each and every buffer read from the stream. |
| 541 | +
|
| 542 | + However, we only need to remove the BOM if present at the very begining of the stream. |
| 543 | + In other words: only remove the BOM from the first chunk. |
| 544 | + */ |
| 545 | + |
| 546 | + if (amountRead > 0) |
| 547 | + { |
| 548 | + if (processedChunkCounter == 0) |
| 549 | + { |
| 550 | + int bomOffset = GetBomOffset(buffer); |
| 551 | + if (amountRead - bomOffset > 0) |
| 552 | + { |
| 553 | + streamStack.Push(buffer, bomOffset, amountRead - bomOffset); |
| 554 | + } |
| 555 | + } |
| 556 | + else |
| 557 | + { |
| 558 | + streamStack.Push(buffer, 0, amountRead); |
| 559 | + } |
| 560 | + |
| 561 | + processedChunkCounter++; |
| 562 | + } |
529 | 563 | } |
530 | 564 |
|
531 | 565 | #endregion |
|
0 commit comments