Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions xml/_pipeline.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright 2018-2026 the Deno authors. MIT license.
// This module is browser compatible.

/**
* Internal helper for constructing the tokenizer + parser pair that drives
* both {@linkcode parseXmlStream} and {@linkcode parseXmlRecords}.
*
* @module
*/

import type { ParseStreamOptions, XmlEventCallbacks } from "./types.ts";
import { XmlTokenizer } from "./_tokenizer.ts";
import { XmlEventParser } from "./_parser.ts";

/** A configured tokenizer paired with the parser it feeds. */
export interface XmlPipeline {
/** Tokenizer that consumes raw XML chunks. */
readonly tokenizer: XmlTokenizer;
/** Event parser that receives tokens and invokes user callbacks. */
readonly parser: XmlEventParser;
}

/**
* Constructs a tokenizer/parser pipeline from {@linkcode ParseStreamOptions}
* and {@linkcode XmlEventCallbacks}, applying the canonical defaults used by
* the public streaming APIs.
*
* @param options Stream parse options.
* @param callbacks Event callbacks invoked by the parser.
* @returns The configured tokenizer and parser.
*/
export function createXmlPipeline(
options: ParseStreamOptions,
callbacks: XmlEventCallbacks,
): XmlPipeline {
const trackPosition = options.trackPosition ?? false;
const disallowDoctype = options.disallowDoctype ?? true;
const xml11 = options.xmlVersion === "1.1";
const tokenizer = new XmlTokenizer({ trackPosition, disallowDoctype, xml11 });
const parser = new XmlEventParser(callbacks, options, xml11);
return { tokenizer, parser };
}
7 changes: 4 additions & 3 deletions xml/deno.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
"version": "0.1.1",
"exports": {
".": "./mod.ts",
"./types": "./types.ts",
"./parse-stream": "./parse_stream.ts",
"./parse": "./parse.ts",
"./stringify": "./stringify.ts"
"./parse-records": "./parse_records.ts",
"./parse-stream": "./parse_stream.ts",
"./stringify": "./stringify.ts",
"./types": "./types.ts"
}
}
4 changes: 3 additions & 1 deletion xml/mod.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
*
* ## Parsing APIs
*
* Two parsing APIs are provided for different use cases:
* Three parsing APIs are provided for different use cases:
*
* | API | Use Case | Output |
* |-----|----------|--------|
* | {@linkcode parse} | Parse a complete XML string | Document tree |
* | {@linkcode parseXmlStream} | Streaming with maximum throughput | Direct callbacks |
* | {@linkcode parseXmlRecords} | Streaming records assembled in callbacks | `AsyncGenerator<T>` |
*
* ## Quick Examples
*
Expand Down Expand Up @@ -82,4 +83,5 @@
export * from "./types.ts";
export * from "./parse_stream.ts";
export * from "./parse.ts";
export * from "./parse_records.ts";
export * from "./stringify.ts";
106 changes: 106 additions & 0 deletions xml/parse_records.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright 2018-2026 the Deno authors. MIT license.
// This module is browser compatible.

/**
* Async-generator adapter that turns an XML chunk source into a stream of
* application-defined records assembled inside SAX-style event callbacks.
*
* @module
*/

import type { ParseStreamOptions, XmlEventCallbacks } from "./types.ts";
import { createXmlPipeline } from "./_pipeline.ts";

/**
* Parses an async iterable of XML chunks and yields records assembled inside
* SAX-style event callbacks.
*
* Each input chunk is parsed synchronously and the records emitted from that
* chunk are buffered before any are yielded; the consumer then pulls records
* one at a time. Yield rate (and downstream backpressure) is per-record while
* peak memory is bounded by the records produced by a single chunk.
*
* If parsing throws (XML syntax error or a user callback that throws), the
* iteration rejects immediately; records buffered within the failing chunk
* are discarded. Records yielded by earlier chunks remain visible.
*
* For `pipeThrough` composition, wrap the result with
* {@linkcode ReadableStream.from}.
*
* @typeParam T The type of records yielded by the generator.
*
* @param source An async iterable of XML string chunks (e.g. a
* `ReadableStream<Uint8Array>` piped through {@linkcode TextDecoderStream}).
*
* @param createCallbacks Factory invoked once with an `emit` function; returns
* the SAX-style callbacks that build records and call `emit` per completed
* record.
*
* @param options Parser options forwarded to the underlying tokenizer/parser.
*
* @returns An async generator that yields records as the document is parsed.
*
* @example Parse items from an XML feed
* ```ts
* import { parseXmlRecords } from "@std/xml/parse-records";
* import { assertEquals } from "@std/assert";
*
* const xml = "<feed><item>First</item><item>Second</item></feed>";
*
* const titles: string[] = [];
* for await (
* const title of parseXmlRecords<string>(
* ReadableStream.from([xml]),
* (emit) => {
* let inside = false;
* let text = "";
* return {
* onStartElement(name) {
* if (name === "item") {
* inside = true;
* text = "";
* }
* },
* onText(t) {
* if (inside) text += t;
* },
* onEndElement(name) {
* if (name === "item") {
* emit(text);
* inside = false;
* }
* },
* };
* },
* )
* ) {
* titles.push(title);
* }
*
* assertEquals(titles, ["First", "Second"]);
* ```
*/
export async function* parseXmlRecords<T>(
source: AsyncIterable<string>,
createCallbacks: (emit: (record: T) => void) => XmlEventCallbacks,
options: ParseStreamOptions = {},
): AsyncGenerator<T> {
const buffer: T[] = [];
const callbacks = createCallbacks((record) => buffer.push(record));
const { tokenizer, parser } = createXmlPipeline(options, callbacks);

// Fail-fast contract: parse errors propagate immediately and records
// buffered within the failing chunk are dropped. Wrapping `process` /
// `finalize` in `try { ... } finally { drain }` is tempting but unsafe —
// `iter.return()` from a consumer `break` mid-drain silently swallows
// the pending exception per ECMAScript semantics.
for await (const chunk of source) {
tokenizer.process(chunk, parser);
for (let i = 0; i < buffer.length; i++) yield buffer[i]!;
buffer.length = 0;
}
tokenizer.finalize(parser);
parser.finalize();
for (let i = 0; i < buffer.length; i++) yield buffer[i]!;
buffer.length = 0;
}
Loading
Loading