-
-
Notifications
You must be signed in to change notification settings - Fork 119
Expand file tree
/
Copy pathBunitHtmlParser.cs
More file actions
203 lines (172 loc) · 5.77 KB
/
BunitHtmlParser.cs
File metadata and controls
203 lines (172 loc) · 5.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
using System.Collections;
using System.Diagnostics;
using AngleSharp;
using AngleSharp.Css;
using AngleSharp.Dom;
using AngleSharp.Html.Parser;
using Bunit.Diffing;
namespace Bunit.Rendering;
/// <summary>
/// A AngleSharp based HTML Parse that can parse markup strings
/// into a <see cref="INodeList"/>.
/// </summary>
public sealed class BunitHtmlParser : IDisposable
{
private const string TbodySubElements = "TR";
private const string ColgroupSubElement = "COL";
private static readonly string[] TableSubElements = { "CAPTION", "COLGROUP", "TBODY", "TFOOT", "THEAD", };
private static readonly string[] TrSubElements = { "TD", "TH" };
private static readonly string[] SpecialHtmlElements = { "HTML", "HEAD", "BODY", "!DOCTYPE" };
private readonly IBrowsingContext context;
private readonly IHtmlParser htmlParser;
private readonly List<IDocument> documents = new();
/// <summary>
/// Initializes a new instance of the <see cref="BunitHtmlParser"/> class
/// with a AngleSharp context without a <see cref="TestRenderer"/> registered.
/// </summary>
public BunitHtmlParser()
: this(Configuration.Default.WithCss()
.With(new HtmlComparer())) { }
/// <summary>
/// Initializes a new instance of the <see cref="BunitHtmlParser"/> class
/// with a AngleSharp context registered.
/// </summary>
public BunitHtmlParser(HtmlComparer htmlComparer, TestContextBase testContext)
: this(Configuration.Default.WithCss()
.With(htmlComparer ?? throw new ArgumentNullException(nameof(htmlComparer)))
.With(testContext ?? throw new ArgumentNullException(nameof(testContext)))
.With(testContext.Renderer))
{ }
private BunitHtmlParser(IConfiguration angleSharpConfiguration)
{
var config = angleSharpConfiguration
.With(this)
.WithRenderDevice(new DefaultRenderDevice
{
ViewPortWidth = 1920,
ViewPortHeight = 1080,
});
context = BrowsingContext.New(config);
var parseOptions = new HtmlParserOptions
{
IsKeepingSourceReferences = true,
};
htmlParser = new HtmlParser(parseOptions, context);
}
/// <summary>
/// Parses a markup HTML string using AngleSharps HTML5 parser.
/// </summary>
/// <param name="markup">The markup to parse.</param>
/// <returns>The <see cref="INodeList"/>.</returns>
public INodeList Parse([StringSyntax("Html")]string markup)
{
if (markup is null)
throw new ArgumentNullException(nameof(markup));
var document = GetNewDocumentAsync().GetAwaiter().GetResult();
var (ctx, matchedElement) = GetParseContext(markup, document);
return ctx is null && matchedElement is not null
? ParseSpecial(markup, matchedElement)
: htmlParser.ParseFragment(markup, ctx!);
}
private INodeList ParseSpecial(string markup, string matchedElement)
{
var doc = htmlParser.ParseDocument(markup);
return matchedElement switch
{
"HTML" => new SingleNodeNodeList(doc.Body?.ParentElement),
"HEAD" => new SingleNodeNodeList(doc.Head),
"BODY" => new SingleNodeNodeList(doc.Body),
"!DOCTYPE" => doc.ChildNodes,
_ => throw new InvalidOperationException($"{matchedElement} should not be parsed by {nameof(ParseSpecial)}."),
};
}
private static (IElement? Context, string? MatchedElement) GetParseContext(
ReadOnlySpan<char> markup,
IDocument document)
{
var startIndex = markup.IndexOfFirstNonWhitespaceChar();
// verify that first non-whitespace characters is a '<'
if (markup.Length > 0 && markup[startIndex].IsTagStart())
{
return GetParseContextFromTag(markup, startIndex, document);
}
return (Context: document.Body, MatchedElement: null);
}
private static (IElement? Context, string? MatchedElement) GetParseContextFromTag(
ReadOnlySpan<char> markup,
int startIndex,
IDocument document)
{
#if !NET9_0_OR_GREATER
Debug.Assert(document.Body is not null, "Body of the document should never be null at this point.");
#else
Debug.Assert(document.Body is not null);
#endif
IElement? result = null;
if (markup.StartsWithElements(TableSubElements, startIndex, out var matchedElement))
{
result = CreateTable();
}
else if (markup.StartsWithElements(TrSubElements, startIndex, out matchedElement))
{
result = CreateTable().AppendElement(document.CreateElement("tr"));
}
else if (markup.StartsWithElement(TbodySubElements, startIndex))
{
result = CreateTable().AppendElement(document.CreateElement("tbody"));
matchedElement = TbodySubElements;
}
else if (markup.StartsWithElement(ColgroupSubElement, startIndex))
{
result = CreateTable().AppendElement(document.CreateElement("colgroup"));
matchedElement = ColgroupSubElement;
}
else if (markup.StartsWithElements(SpecialHtmlElements, startIndex, out matchedElement))
{
// default case, nothing to do.
}
else
{
result = document.Body;
}
return (Context: result, MatchedElement: matchedElement);
IElement CreateTable() => document.Body.AppendElement(document.CreateElement("table"));
}
private async Task<IDocument> GetNewDocumentAsync()
{
var result = await context.OpenNewAsync().ConfigureAwait(false);
documents.Add(result);
return result;
}
/// <inheritdoc/>
public void Dispose()
{
context.Dispose();
foreach (var doc in documents)
{
doc.Dispose();
}
}
private sealed class SingleNodeNodeList : INodeList, IReadOnlyList<INode>
{
private readonly INode node;
public INode this[int index]
{
get
{
if (index != 0)
throw new IndexOutOfRangeException();
return node;
}
}
public int Length => 1;
public int Count => 1;
public SingleNodeNodeList(INode? node) => this.node = node ?? throw new ArgumentNullException(nameof(node));
public IEnumerator<INode> GetEnumerator()
{
yield return node;
}
public void ToHtml(TextWriter writer, IMarkupFormatter formatter) => node.ToHtml(writer, formatter);
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
}
}