Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ npm install @textfilters/core
```ts
import {
createCachedTextProcessor,
createPreparedText,
createTextRangePipeline,
createTextPipeline,
lowerNfkc,
type AllocationAwareRangeScanner,
type TextCensor,
type TextRangeScanner,
} from "@textfilters/core";
Expand All @@ -63,15 +65,31 @@ const scanner: TextRangeScanner = ({ text }) =>
const rangeSafeText = createTextRangePipeline()
.use(scanner)
.censor("secret message");

const allocationAwareScanner: AllocationAwareRangeScanner = {
allocationAware: true,
check: (input) => input.hints.hasDot,
scan: (input, sink) => {
const index = input.codePoints.indexOf(".");
if (index >= 0) sink({ range: [index, index + 1] });
},
};

const prepared = createPreparedText("a.b");
const hasRange = allocationAwareScanner.check(prepared);
```

## API

- `createTextPipeline()`
- `createTextRangePipeline()`
- `checkTextRanges(value, scanners)`
- `createPreparedText(value)`
- `createTextHints(text, codePoints)`
- `createTextScanInput(value)`
- `createTextRangeScanResult(ranges, metadata)`
- `runTextRangeScanner(scanner, input)`
- `scanPreparedTextRanges(scanner, input, sink)`
- `scanTextRanges(value, scanners)`
- `createCachedTextProcessor(processor, options)`
- `normalizeTextInput(value)`
Expand Down Expand Up @@ -140,6 +158,22 @@ code point ranges before masking. A scanner can be a function or an object with
a `scan()` method. Scanners receive `TextScanInput`, which contains the
normalized source text and its code point array.

`PreparedText` extends that input with reusable `TextHints`, including generic
length, ASCII, digit, whitespace, punctuation, and common delimiter facts.
These hints are computed once by `createPreparedText()` and reused across
registered scanners. They are intentionally generic; URL, email, phone,
profanity, spam, and future packages keep their own package-specific detection
logic.

`AllocationAwareRangeScanner` separates a cheap pre-scan `check()` gate from
sink-based `scan()`. A true `check()` result means the scanner is eligible to
scan the prepared input; it is not itself proof that a range exists.
`scan()` streams `RangeMatch` values into a `RangeMatchSink`; returning `false`
from the sink requests early stop. Use `createTextRangePipeline().check()` or
`scanPreparedTextRanges()` when callers need to confirm an actual emitted
range. Legacy scanner functions and scanner objects remain supported and
continue to return range arrays or `TextRangeScanResult`.

`createTextRangePipeline()` collects ranges from registered scanners, merges
overlaps in code point order, and masks once with
`censorCodePointRanges()`. This keeps scanner packages independent while
Expand Down
49 changes: 45 additions & 4 deletions src/contracts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,43 @@ export interface TextPipeline {
export type TextRange = readonly [start: number, end: number];
export type TextCodePointRange = readonly [start: number, end: number];

export interface TextHints {
readonly textLength: number;
readonly codePointLength: number;
readonly isEmpty: boolean;
readonly hasAsciiOnly: boolean;
readonly hasNonAscii: boolean;
readonly hasDigit: boolean;
readonly digitCount: number;
readonly hasAsciiLetter: boolean;
readonly hasWhitespace: boolean;
readonly hasPunctuation: boolean;
readonly punctuationCount: number;
readonly hasAtSign: boolean;
readonly hasDot: boolean;
readonly hasSlash: boolean;
readonly hasColon: boolean;
readonly hasPlus: boolean;
}

export interface TextScanInput {
readonly text: string;
readonly codePoints: readonly string[];
}

export interface PreparedText extends TextScanInput {
readonly hints: TextHints;
}

export type TextRangeScanMetadata = Readonly<Record<string, unknown>>;

export interface RangeMatch {
readonly range: TextCodePointRange;
readonly metadata?: TextRangeScanMetadata;
}

export type RangeMatchSink = (match: RangeMatch) => boolean | void;

export interface TextRangeScanResult {
readonly ranges: readonly TextCodePointRange[];
readonly metadata?: TextRangeScanMetadata;
Expand All @@ -81,12 +111,22 @@ export type TextRangeScannerFunction = (
input: TextScanInput,
) => TextRangeScannerOutput;

export interface AllocationAwareRangeScanner {
readonly name?: string;
readonly allocationAware: true;
check(input: PreparedText): boolean;
scan(input: PreparedText, sink: RangeMatchSink): boolean | void;
}

export interface LegacyTextRangeScanner {
readonly name?: string;
scan(input: TextScanInput): TextRangeScannerOutput;
}

export type TextRangeScanner =
| TextRangeScannerFunction
| {
readonly name?: string;
scan(input: TextScanInput): TextRangeScannerOutput;
};
| LegacyTextRangeScanner
| AllocationAwareRangeScanner;

export interface TextRangePipelineScanResult {
readonly text: string;
Expand All @@ -103,6 +143,7 @@ export interface TextRangePipelineCensorResult {

export interface TextRangePipeline {
use(scanner: TextRangeScanner): TextRangePipeline;
check(value: unknown): boolean;
scan(value: unknown): TextRangePipelineScanResult;
censor(value: unknown, mask?: string): string;
process(value: unknown, mask?: string): TextRangePipelineCensorResult;
Expand Down
10 changes: 10 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
export type {
AllocationAwareRangeScanner,
CachedTextProcessor,
CachedTextProcessorOptions,
LegacyTextRangeScanner,
PreparedText,
RangeMatch,
RangeMatchSink,
TextCensor,
TextCodePointRange,
TextGuard,
Expand All @@ -13,6 +18,7 @@ export type {
TextPipelineProcessedResult,
TextPipelineProcessResult,
TextRange,
TextHints,
TextRangePipeline,
TextRangePipelineCensorResult,
TextRangePipelineScanResult,
Expand Down Expand Up @@ -43,9 +49,13 @@ export {
export { createTextPipeline } from "./pipeline.js";
export { mergeCodePointRanges, mergeRanges } from "./ranges.js";
export {
checkTextRanges,
createPreparedText,
createTextHints,
createTextRangePipeline,
createTextRangeScanResult,
createTextScanInput,
runTextRangeScanner,
scanPreparedTextRanges,
scanTextRanges,
} from "./scanner.js";
Loading