Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions hindsight-api-slim/hindsight_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,11 @@ def parse_gemini_service_tier(value: str | None) -> str | None:
ENV_RERANKER_OPENROUTER_MODEL = "HINDSIGHT_API_RERANKER_OPENROUTER_MODEL"
ENV_RERANKER_OPENROUTER_BASE_URL = "HINDSIGHT_API_RERANKER_OPENROUTER_BASE_URL"

# Requesty configuration (OpenAI-compatible gateway; embeddings)
ENV_REQUESTY_API_KEY = "HINDSIGHT_API_REQUESTY_API_KEY"
ENV_EMBEDDINGS_REQUESTY_API_KEY = "HINDSIGHT_API_EMBEDDINGS_REQUESTY_API_KEY"
ENV_EMBEDDINGS_REQUESTY_MODEL = "HINDSIGHT_API_EMBEDDINGS_REQUESTY_MODEL"

# ZeroEntropy configuration (embeddings)
ENV_EMBEDDINGS_ZEROENTROPY_API_KEY = "HINDSIGHT_API_EMBEDDINGS_ZEROENTROPY_API_KEY"
ENV_EMBEDDINGS_ZEROENTROPY_MODEL = "HINDSIGHT_API_EMBEDDINGS_ZEROENTROPY_MODEL"
Expand Down Expand Up @@ -642,6 +647,7 @@ def parse_gemini_service_tier(value: str | None) -> str | None:
"bedrock": "us.amazon.nova-2-lite-v1:0",
"volcano": "doubao-pro-32k",
"openrouter": "qwen/qwen3.5-9b",
"requesty": "openai/gpt-4o-mini",
"fireworks": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"nous": "deepseek/deepseek-v4-flash",
}
Expand Down Expand Up @@ -798,6 +804,9 @@ def _parse_strategy_boosts(raw: str | None) -> dict[str, str]:
DEFAULT_RERANKER_OPENROUTER_MODEL = "cohere/rerank-v3.5"
DEFAULT_RERANKER_OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1/rerank"

# Requesty defaults
DEFAULT_EMBEDDINGS_REQUESTY_MODEL = "openai/text-embedding-3-small"

# ZeroEntropy defaults
DEFAULT_EMBEDDINGS_ZEROENTROPY_MODEL = "zembed-1"
# Shared between embeddings (zembed-1) and reranker (zerank-*) — the host is the same.
Expand Down Expand Up @@ -1586,6 +1595,8 @@ class HindsightConfig:
embeddings_cohere_output_dimensions: int | None
embeddings_openrouter_api_key: str | None
embeddings_openrouter_model: str
embeddings_requesty_api_key: str | None
embeddings_requesty_model: str
embeddings_litellm_api_base: str
embeddings_litellm_api_key: str | None
embeddings_litellm_model: str
Expand Down Expand Up @@ -2398,6 +2409,11 @@ def from_env(cls) -> "HindsightConfig":
or os.getenv(ENV_OPENROUTER_API_KEY)
or os.getenv(ENV_LLM_API_KEY),
embeddings_openrouter_model=os.getenv(ENV_EMBEDDINGS_OPENROUTER_MODEL, DEFAULT_EMBEDDINGS_OPENROUTER_MODEL),
# Requesty embeddings (with fallback to shared Requesty key, then LLM key)
embeddings_requesty_api_key=os.getenv(ENV_EMBEDDINGS_REQUESTY_API_KEY)
or os.getenv(ENV_REQUESTY_API_KEY)
or os.getenv(ENV_LLM_API_KEY),
embeddings_requesty_model=os.getenv(ENV_EMBEDDINGS_REQUESTY_MODEL, DEFAULT_EMBEDDINGS_REQUESTY_MODEL),
# ZeroEntropy embeddings
embeddings_zeroentropy_api_key=os.getenv(ENV_EMBEDDINGS_ZEROENTROPY_API_KEY)
or os.getenv("ZEROENTROPY_API_KEY"),
Expand Down
16 changes: 15 additions & 1 deletion hindsight-api-slim/hindsight_api/engine/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1638,6 +1638,20 @@ def create_embeddings_from_env() -> Embeddings:
batch_size=config.embeddings_openai_batch_size,
dimensions=config.embeddings_openai_dimensions,
)
elif provider == "requesty":
api_key = config.embeddings_requesty_api_key
if not api_key:
raise ValueError(
"HINDSIGHT_API_EMBEDDINGS_REQUESTY_API_KEY, HINDSIGHT_API_REQUESTY_API_KEY, "
f"or {ENV_LLM_API_KEY} is required when {ENV_EMBEDDINGS_PROVIDER} is 'requesty'"
)
return OpenAIEmbeddings(
api_key=api_key,
model=config.embeddings_requesty_model,
base_url="https://router.requesty.ai/v1",
batch_size=config.embeddings_openai_batch_size,
dimensions=config.embeddings_openai_dimensions,
)
elif provider == "zeroentropy":
api_key = config.embeddings_zeroentropy_api_key
if not api_key:
Expand Down Expand Up @@ -1701,6 +1715,6 @@ def create_embeddings_from_env() -> Embeddings:
else:
raise ValueError(
f"Unknown embeddings provider: {provider}. "
f"Supported: 'local', 'onnx', 'tei', 'openai', 'openai-codex', 'openrouter', 'cohere', 'google', "
f"Supported: 'local', 'onnx', 'tei', 'openai', 'openai-codex', 'openrouter', 'requesty', 'cohere', 'google', "
f"'zeroentropy', 'litellm', 'litellm-sdk'"
)
4 changes: 4 additions & 0 deletions hindsight-api-slim/hindsight_api/engine/llm_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,7 @@ def create_llm_provider(
"deepseek",
"volcano",
"openrouter",
"requesty",
"zai",
"opencode-go",
"atlas",
Expand Down Expand Up @@ -587,6 +588,7 @@ def __init__(
"bedrock",
"volcano",
"openrouter",
"requesty",
"zai",
"opencode-go",
"atlas",
Expand All @@ -612,6 +614,8 @@ def __init__(
self.base_url = "https://api.deepseek.com"
elif self.provider == "openrouter":
self.base_url = "https://openrouter.ai/api/v1"
elif self.provider == "requesty":
self.base_url = "https://router.requesty.ai/v1"
elif self.provider == "zai":
self.base_url = "https://api.z.ai/api/coding/paas/v4"
elif self.provider == "opencode-go":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ def __init__(
"deepseek",
"volcano",
"openrouter",
"requesty",
"zai",
"opencode-go",
"atlas",
Expand All @@ -473,6 +474,8 @@ def __init__(
self.base_url = "https://api.deepseek.com"
elif self.provider == "openrouter":
self.base_url = "https://openrouter.ai/api/v1"
elif self.provider == "requesty":
self.base_url = "https://router.requesty.ai/v1"
elif self.provider == "zai":
self.base_url = "https://api.z.ai/api/coding/paas/v4"
elif self.provider == "opencode-go":
Expand All @@ -497,6 +500,7 @@ def __init__(
"minimax",
"deepseek",
"openrouter",
"requesty",
"zai",
"opencode-go",
"atlas",
Expand Down
11 changes: 9 additions & 2 deletions hindsight-docs/docs/developer/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ For non-English banks (especially CJK) and the language/extraction-language trad

| Variable | Description | Default |
|----------|-------------|---------|
| `HINDSIGHT_API_LLM_PROVIDER` | Provider: `openai`, `openai-codex`, `claude-code`, `anthropic`, `gemini`, `groq`, `minimax`, `deepseek`, `zai`, `opencode-go`, `nous`, `fireworks`, `ollama`, `ollama-cloud`, `lmstudio`, `llamacpp`, `vertexai`, `bedrock`, `litellm`, `litellmrouter`, `volcano`, `openrouter`, `none` | `openai` |
| `HINDSIGHT_API_LLM_PROVIDER` | Provider: `openai`, `openai-codex`, `claude-code`, `anthropic`, `gemini`, `groq`, `minimax`, `deepseek`, `zai`, `opencode-go`, `nous`, `fireworks`, `ollama`, `ollama-cloud`, `lmstudio`, `llamacpp`, `vertexai`, `bedrock`, `litellm`, `litellmrouter`, `volcano`, `openrouter`, `requesty`, `none` | `openai` |
| `HINDSIGHT_API_LLM_API_KEY` | API key for LLM provider | - |
| `HINDSIGHT_API_LLM_MODEL` | Model name | `gpt-5-mini` |
| `HINDSIGHT_API_LLM_BASE_URL` | Custom LLM endpoint | Provider default |
Expand Down Expand Up @@ -270,6 +270,11 @@ export HINDSIGHT_API_LLM_PROVIDER=openrouter
export HINDSIGHT_API_LLM_API_KEY=your-openrouter-api-key
export HINDSIGHT_API_LLM_MODEL=qwen/qwen3.5-9b

# Requesty (OpenAI-compatible gateway)
export HINDSIGHT_API_LLM_PROVIDER=requesty
export HINDSIGHT_API_LLM_API_KEY=your-requesty-api-key
export HINDSIGHT_API_LLM_MODEL=openai/gpt-4o-mini

# DeepSeek (OpenAI-compatible, https://api.deepseek.com)
export HINDSIGHT_API_LLM_PROVIDER=deepseek
export HINDSIGHT_API_LLM_API_KEY=sk-xxxxxxxxxxxx
Expand Down Expand Up @@ -516,7 +521,7 @@ two slots that retain/consolidation cannot consume.

| Variable | Description | Default |
|----------|-------------|---------|
| `HINDSIGHT_API_EMBEDDINGS_PROVIDER` | Provider: `local`, `onnx`, `tei`, `openai`, `openai-codex`, `openrouter`, `cohere`, `google`, `zeroentropy`, `litellm`, or `litellm-sdk` | `local` |
| `HINDSIGHT_API_EMBEDDINGS_PROVIDER` | Provider: `local`, `onnx`, `tei`, `openai`, `openai-codex`, `openrouter`, `requesty`, `cohere`, `google`, `zeroentropy`, `litellm`, or `litellm-sdk` | `local` |
| `HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL` | Model for local provider | `BAAI/bge-small-en-v1.5` |
| `HINDSIGHT_API_EMBEDDINGS_LOCAL_TRUST_REMOTE_CODE` | Allow loading models with custom code (security risk, disabled by default) | `false` |
| `HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU` | Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS) | `false` |
Expand All @@ -538,6 +543,8 @@ two slots that retain/consolidation cannot consume.
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE` | Max inputs per `embeddings.create` call for `openai`/`openrouter` providers — lower this when the upstream endpoint enforces stricter limits (e.g. DashScope caps at 10) | `100` |
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_DIMENSIONS` | Optional requested output dimensions for OpenAI `text-embedding-3` models (e.g., `384` to match an existing pgvector schema) | - |
| `HINDSIGHT_API_EMBEDDINGS_OPENROUTER_API_KEY` | OpenRouter API key for embeddings (falls back to `HINDSIGHT_API_OPENROUTER_API_KEY`, then `HINDSIGHT_API_LLM_API_KEY`) | - |
| `HINDSIGHT_API_EMBEDDINGS_REQUESTY_API_KEY` | Requesty API key for embeddings (falls back to `HINDSIGHT_API_REQUESTY_API_KEY`, then `HINDSIGHT_API_LLM_API_KEY`) | - |
| `HINDSIGHT_API_EMBEDDINGS_REQUESTY_MODEL` | Requesty embedding model | `openai/text-embedding-3-small` |
| `HINDSIGHT_API_EMBEDDINGS_OPENROUTER_MODEL` | OpenRouter embedding model | `perplexity/pplx-embed-v1-0.6b` |
| `HINDSIGHT_API_EMBEDDINGS_ZEROENTROPY_API_KEY` | ZeroEntropy API key for embeddings | - |
| `HINDSIGHT_API_EMBEDDINGS_ZEROENTROPY_MODEL` | ZeroEntropy embedding model | `zembed-1` |
Expand Down
1 change: 1 addition & 0 deletions hindsight-docs/src/data/llmProviders.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
{"id": "atlas", "label": "Atlas Cloud", "iconKey": "openai-compatible", "defaultModel": "deepseek-ai/deepseek-v4-pro"},
{"id": "volcano", "label": "Volcano Engine", "iconKey": "zap", "defaultModel": "doubao-pro-32k"},
{"id": "openrouter", "label": "OpenRouter", "iconKey": "globe", "defaultModel": "qwen/qwen3.5-9b"},
{"id": "requesty", "label": "Requesty", "iconKey": "openai-compatible", "defaultModel": "openai/gpt-4o-mini"},
{"id": "openai-codex", "label": "OpenAI Codex", "iconKey": "openai", "defaultModel": "gpt-5.4-mini"},
{"id": "claude-code", "label": "Claude Code", "iconKey": "anthropic", "defaultModel": "claude-sonnet-4-5-20250929"},
{"id": "bedrock", "label": "AWS Bedrock", "iconKey": "cloud", "defaultModel": "us.amazon.nova-2-lite-v1:0"},
Expand Down