Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,19 @@ Cloud storage URLs in virtual chunk references are automatically translated:
- `abfs://container@account.dfs.core.windows.net/path` →
`https://account.blob.core.windows.net/container/path`

For S3, addressing follows the repo's virtual-chunk-container config (region,
endpoint, path-style), mirroring the Rust implementation. Buckets whose name
contains a dot must use path-style addressing; when the container config records
a region (repos written by Icechunk do), reads go straight to that regional
endpoint — which serves CORS and works in both Node and the browser, with no
extra configuration.

If no region is known, dotted-name buckets fall back to the global endpoint,
whose region redirect (for buckets outside `us-east-1`) is resolved
automatically in Node but not in browsers — the cross-origin redirect carries no
CORS headers. Supply a [`fetchClient`](#virtual-chunk-authentication) that routes
to the regional endpoint or a CORS-enabled proxy in that case.

### Repository

For direct access to branches, tags, and checkouts.
Expand Down
83 changes: 65 additions & 18 deletions src/format/flatbuffers/repo-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,41 @@ import {

const SUPPORTED_SPEC_VERSION = 2;

/**
* S3 store config carried by a virtual chunk container, mirroring Rust's
* `S3Options`. Drives endpoint/addressing when translating `s3://` chunk
* locations to HTTPS — notably `region`, which lets dotted-name buckets
* address their regional endpoint directly (required for browser CORS, since
* the global endpoint's region redirect carries no CORS headers).
*/
export interface S3ContainerConfig {
region?: string;
endpointUrl?: string;
forcePathStyle?: boolean;
}

/**
* A virtual chunk container from the repo config, mirroring Rust's
* `VirtualChunkContainer`. `name` resolves `vcc://name/...` references;
* `urlPrefix` matches absolute chunk locations (longest prefix wins).
*/
export interface VirtualChunkContainer {
name: string | null;
urlPrefix: string;
s3?: S3ContainerConfig;
}

/** Parsed repo file with cached metadata */
export interface ParsedRepo {
repo: FbsRepo;
specVersion: number;
snapshotsLength: number; // Cached for bounds checking
/**
* Map of Virtual Chunk Container name → `url_prefix` for resolving
* relative `vcc://name/path` chunk locations. Empty when the repo has
* no named containers (unnamed/legacy entries are excluded).
* Virtual chunk containers from the repo config, sorted by descending
* `urlPrefix` length so the most specific container matches an absolute
* location first. Empty when the repo declares no containers.
*/
virtualChunkContainers: Map<string, string>;
virtualChunkContainers: VirtualChunkContainer[];
}

/**
Expand Down Expand Up @@ -73,19 +97,19 @@ export function parseRepo(data: Uint8Array): ParsedRepo {
}

/**
* Extract named Virtual Chunk Containers from the flexbuffers-encoded
* RepositoryConfig on the repo table.
*
* Only containers with a non-null `name` are included — unnamed/legacy
* containers cannot be referenced by `vcc://` URLs.
* Extract virtual chunk containers from the flexbuffers-encoded
* RepositoryConfig, including each container's S3 store config (region/
* endpoint/addressing). Unnamed containers are included too — they still
* match absolute chunk locations by `urlPrefix` even though they can't be
* referenced by `vcc://name`. Returned sorted by descending `urlPrefix`
* length so the most specific container matches first.
*
* Returns an empty map when the config is absent, malformed, or has no
* named containers. Parsing failures are swallowed because virtual chunk
* resolution is a best-effort path; callers that actually hit a `vcc://`
* location with no matching name will surface a clear error at fetch time.
* Returns an empty array when the config is absent or malformed. Parsing
* failures are swallowed because virtual chunk resolution is best-effort; a
* `vcc://` location with no matching name surfaces a clear error at fetch time.
*/
function parseVirtualChunkContainers(repo: FbsRepo): Map<string, string> {
const result = new Map<string, string>();
function parseVirtualChunkContainers(repo: FbsRepo): VirtualChunkContainer[] {
const result: VirtualChunkContainer[] = [];
const configBytes = repo.configArray();
if (!configBytes || configBytes.length === 0) return result;

Expand All @@ -106,14 +130,37 @@ function parseVirtualChunkContainers(repo: FbsRepo): Map<string, string> {

for (const container of Object.values(containers)) {
if (!isRecord(container)) continue;
const { name, url_prefix: urlPrefix } = container;
if (typeof name !== "string" || typeof urlPrefix !== "string") continue;
result.set(name, urlPrefix);
const urlPrefix = container.url_prefix;
if (typeof urlPrefix !== "string") continue;
const name = typeof container.name === "string" ? container.name : null;
const s3 = parseS3Store(container.store);
result.push(s3 ? { name, urlPrefix, s3 } : { name, urlPrefix });
}

// Most specific (longest) url_prefix first, so absolute-location matching
// prefers the narrowest container.
result.sort((a, b) => b.urlPrefix.length - a.urlPrefix.length);
return result;
}

/**
* Parse the S3-family store config from a container's `store`. The `s3`,
* `s3_compatible`, and `tigris` ObjectStoreConfig variants all share Rust's
* `S3Options` shape; gcs/azure/http/local stores yield undefined.
*/
function parseS3Store(store: unknown): S3ContainerConfig | undefined {
if (!isRecord(store)) return undefined;
const s3 = store.s3 ?? store.s3_compatible ?? store.tigris;
if (!isRecord(s3)) return undefined;
const cfg: S3ContainerConfig = {};
if (typeof s3.region === "string") cfg.region = s3.region;
if (typeof s3.endpoint_url === "string") cfg.endpointUrl = s3.endpoint_url;
if (typeof s3.force_path_style === "boolean") {
cfg.forcePathStyle = s3.force_path_style;
}
return cfg;
}

function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value);
}
Expand Down
62 changes: 51 additions & 11 deletions src/reader/range-coalescer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,25 +56,45 @@ function expectedRangeLength(range: RangeQuery): number {
return "suffixLength" in range ? range.suffixLength : range.length;
}

/**
* Map an S3 path-style global-endpoint region redirect to the regional URL to
* retry. For buckets outside `us-east-1`, `s3.amazonaws.com` answers 301 with
* no `Location` header (so `fetch` can't follow it) and reports the region in
* `x-amz-bucket-region`; this rebuilds the URL against `s3.<region>.amazonaws.com`.
* Returns null when the response isn't that redirect.
*/
function regionalS3RedirectUrl(url: string, response: Response): string | null {
if (response.status !== 301 && response.status !== 307) return null;
const region = response.headers.get("x-amz-bucket-region");
if (!region) return null;
Comment on lines +66 to +69

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To make the helper more robust against custom or poorly-mocked fetchClient implementations, it is safer to use optional chaining when accessing response.headers.get.

Suggested change
function regionalS3RedirectUrl(url: string, response: Response): string | null {
if (response.status !== 301 && response.status !== 307) return null;
const region = response.headers.get("x-amz-bucket-region");
if (!region) return null;
function regionalS3RedirectUrl(url: string, response: Response): string | null {
if (response.status !== 301 && response.status !== 307) return null;
const region = response.headers?.get?.("x-amz-bucket-region");
if (!region) return null;

const globalPrefix = "https://s3.amazonaws.com/";
if (!url.startsWith(globalPrefix)) return null;
return `https://s3.${region}.amazonaws.com/${url.slice(globalPrefix.length)}`;
}

/**
* Build a minimal `AsyncReadable` that services every `getRange` by
* fetching the configured URL with the requested byte range. The zarr
* key is ignored — when wrapped by `withRangeCoalescing`, all requests
* converge on the same path and become eligible for range-merging.
*/
export function makeUrlStore(opts: MakeUrlStoreOptions): AsyncReadable {
const { url, fetchClient, conditionalHeaders } = opts;
const { fetchClient, conditionalHeaders } = opts;
// Hint shared across reads: once one read resolves an S3 regional redirect,
// later reads start from the regional host. Each read works off its own
// `requestUrl` copy, so this value is an optimization, not a source of truth.
let pinnedUrl = opts.url;

async function doFetch(init: RequestInit): Promise<Response> {
async function doFetch(target: string, init: RequestInit): Promise<Response> {
return fetchClient
? await fetchClient.fetch(url, init)
: await fetch(url, init);
? await fetchClient.fetch(target, init)
: await fetch(target, init);
}

return {
async get() {
throw new Error(
`Virtual chunk URL store for ${url} only supports ranged reads`,
`Virtual chunk URL store for ${pinnedUrl} only supports ranged reads`,
);
},
async getRange(_key, range, options) {
Expand All @@ -86,16 +106,36 @@ export function makeUrlStore(opts: MakeUrlStoreOptions): AsyncReadable {
? `bytes=-${range.suffixLength}`
: `bytes=${range.offset}-${range.offset + range.length - 1}`;

const response = await doFetch({ headers, signal: options?.signal });
// Copy the endpoint locally so a concurrent read updating the shared pin
// can't change which URL this request detects and retries against.
let requestUrl = pinnedUrl;
let response = await doFetch(requestUrl, {
headers,
signal: options?.signal,
});

// Resolve a global-endpoint region redirect and retry once on the
// regional host, re-pinning it for later reads.
const regionalUrl = regionalS3RedirectUrl(requestUrl, response);
if (regionalUrl) {
// Free the discarded redirect body so Node can reuse the connection.
response.body?.cancel().catch(() => {});
requestUrl = regionalUrl;
pinnedUrl = regionalUrl;
response = await doFetch(requestUrl, {
headers,
signal: options?.signal,
});
}
Comment on lines +119 to +129

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

When a 301/307 redirect is received, the original response is discarded and overwritten with the regional fetch response. In some environments (like Node.js), discarding a response without consuming or canceling its body can lead to socket/resource leaks and prevent connection reuse (keep-alive).

To prevent this, we should cancel the response body of the redirect before performing the retry.

      const regionalUrl = regionalS3RedirectUrl(requestUrl, response);
      if (regionalUrl) {
        if (response.body && typeof response.body.cancel === "function") {
          response.body.cancel().catch(() => {});
        }
        requestUrl = regionalUrl;
        pinnedUrl = regionalUrl;
        response = await doFetch(requestUrl, {
          headers,
          signal: options?.signal,
        });
      }


if (response.status === 412) {
throw new Error(
`Virtual chunk at ${url} failed integrity check — data has been modified since snapshot was created`,
`Virtual chunk at ${requestUrl} failed integrity check — data has been modified since snapshot was created`,
);
}
if (response.status !== 200 && response.status !== 206) {
throw new Error(
`Failed to fetch virtual chunk from ${url}: ${response.status} ${response.statusText}`,
`Failed to fetch virtual chunk from ${requestUrl}: ${response.status} ${response.statusText}`,
);
}

Expand All @@ -108,7 +148,7 @@ export function makeUrlStore(opts: MakeUrlStoreOptions): AsyncReadable {
const expected = expectedRangeLength(range);
if (data.length === expected) return data;
throw new Error(
`Virtual range response size mismatch for ${url}: expected ${expected} bytes, got ${data.length}`,
`Virtual range response size mismatch for ${requestUrl}: expected ${expected} bytes, got ${data.length}`,
);
}

Expand All @@ -119,15 +159,15 @@ export function makeUrlStore(opts: MakeUrlStoreOptions): AsyncReadable {
const end = range.offset + range.length;
if (data.length >= end) return data.slice(range.offset, end);
throw new Error(
`Virtual range request not honored for ${url}: need at least ${end} bytes for fallback slicing, got ${data.length}`,
`Virtual range request not honored for ${requestUrl}: need at least ${end} bytes for fallback slicing, got ${data.length}`,
);
}
// Suffix-length on a 200 fallback: take the trailing suffixLength bytes.
if (data.length >= range.suffixLength) {
return data.slice(data.length - range.suffixLength);
}
throw new Error(
`Virtual suffix range request not honored for ${url}: need at least ${range.suffixLength} bytes for fallback slicing, got ${data.length}`,
`Virtual suffix range request not honored for ${requestUrl}: need at least ${range.suffixLength} bytes for fallback slicing, got ${data.length}`,
);
},
};
Expand Down
Loading
Loading