NVIDIA · mmilutinovic371 · Jun 5, 2026 · johntmyers · Jun 12, 2026
@@ -154,6 +154,17 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
     passthrough_headers: &["x-model-id"],
 };
 
+static DEEPINFRA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
+    provider_type: "deepinfra",
+    default_base_url: "https://api.deepinfra.com/v1/openai",
+    protocols: OPENAI_PROTOCOLS,
+    credential_key_names: &["DEEPINFRA_API_KEY"],
+    base_url_config_keys: &["DEEPINFRA_BASE_URL"],
+    auth: AuthHeader::Bearer,
+    default_headers: &[],
+    passthrough_headers: &["x-model-id"],
+};
+
 /// Canonicalize an inference provider type string to a well-known identifier.
 ///
 /// Returns `Some(canonical_name)` for recognized inference providers,
@@ -166,6 +177,7 @@ pub fn normalize_inference_provider_type(input: &str) -> Option<&'static str> {
         "openai" => Some("openai"),
         "anthropic" => Some("anthropic"),
         "nvidia" => Some("nvidia"),
+        "deepinfra" => Some("deepinfra"),
         "google-vertex-ai" | "vertex" | "vertex-ai" | "google-vertex" | "gcp-vertex" => {
             Some("google-vertex-ai")
         }
@@ -182,6 +194,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf
         "openai" => Some(&OPENAI_PROFILE),
         "anthropic" => Some(&ANTHROPIC_PROFILE),
         "nvidia" => Some(&NVIDIA_PROFILE),
+        "deepinfra" => Some(&DEEPINFRA_PROFILE),
         "google-vertex-ai" => Some(&VERTEX_AI_PROFILE),
         _ => None,
     }
@@ -302,9 +315,21 @@ mod tests {
         assert!(profile_for("openai").is_some());
         assert!(profile_for("anthropic").is_some());
         assert!(profile_for("nvidia").is_some());
+        assert!(profile_for("deepinfra").is_some());
         assert!(profile_for("OpenAI").is_some()); // case insensitive
     }
 
+    #[test]
+    fn profile_for_deepinfra() {
+        let profile = profile_for("deepinfra").expect("deepinfra profile should exist");
+        assert_eq!(profile.provider_type, "deepinfra");
+        assert_eq!(
+            profile.default_base_url,
+            "https://api.deepinfra.com/v1/openai"
+        );
+        assert_eq!(profile.auth, AuthHeader::Bearer);
+    }
+
     #[test]
     fn profile_for_unknown_types() {
         assert!(profile_for("github").is_none());

@@ -116,6 +116,7 @@ impl ProviderRegistry {
         registry.register(providers::openai::SPEC);
         registry.register(providers::anthropic::SPEC);
         registry.register(providers::nvidia::SPEC);
+        registry.register(providers::deepinfra::SPEC);
         registry.register(providers::gitlab::SPEC);
         registry.register(providers::github::SPEC);
         registry.register(providers::outlook::OutlookProvider);

@@ -23,6 +23,7 @@ const BUILT_IN_PROFILE_YAMLS: &[&str] = &[
     include_str!("../../../providers/cursor.yaml"),
     include_str!("../../../providers/github.yaml"),
     include_str!("../../../providers/google-vertex-ai.yaml"),
+    include_str!("../../../providers/deepinfra.yaml"),
     include_str!("../../../providers/nvidia.yaml"),
     include_str!("../../../providers/pypi.yaml"),
 ];

@@ -0,0 +1,15 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::ProviderDiscoverySpec;
+
+pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
+    id: "deepinfra",
+    credential_env_vars: &["DEEPINFRA_API_KEY"],
+};
+
+test_discovers_env_credential!(
+    discovers_deepinfra_env_credentials,
+    "DEEPINFRA_API_KEY",
+    "di-test123"
+);
@@ -33,6 +33,7 @@ macro_rules! test_discovers_env_credential {
 pub mod anthropic;
 pub mod claude;
 pub mod codex;
+pub mod deepinfra;
 pub mod copilot;
 pub mod generic;
 pub mod github;

@@ -642,7 +642,11 @@ fn build_provider_url(
 
 fn build_backend_url(endpoint: &str, path: &str) -> String {
     let base = endpoint.trim_end_matches('/');
-    if base.ends_with("/v1") && (path == "/v1" || path.starts_with("/v1/")) {
+    // Strip the /v1 prefix from the request path when the base URL already
+    // contains a /v1 segment — either ending with it (e.g. openai, nvidia)
+    // or containing it internally (e.g. deepinfra: /v1/openai).
+    let base_has_v1 = base.ends_with("/v1") || base.contains("/v1/");
+    if base_has_v1 && (path == "/v1" || path.starts_with("/v1/")) {
         return format!("{base}{}", &path[3..]);
     }
 
@@ -704,6 +708,19 @@ mod tests {
         );
     }
 
+    #[test]
+    fn build_backend_url_dedupes_v1_for_base_with_v1_subpath() {
+        // DeepInfra base URL contains /v1/ internally — /v1 in the request
+        // path must still be stripped so chat/completions is not doubled.
+        assert_eq!(
+            build_backend_url(
+                "https://api.deepinfra.com/v1/openai",
+                "/v1/chat/completions"
+            ),
+            "https://api.deepinfra.com/v1/openai/chat/completions"
+        );
+    }
+
     fn test_route(endpoint: &str, protocols: &[&str], auth: AuthHeader) -> ResolvedRoute {
         ResolvedRoute {
             name: "inference.local".to_string(),

@@ -253,6 +253,7 @@ The following provider types are supported.
 | `generic` | User-defined | Any service with custom credentials |
 | `github` | `GITHUB_TOKEN`, `GH_TOKEN` | GitHub API and `gh` CLI. Refer to [GitHub Sandbox](/get-started/tutorials/github-sandbox). |
 | `gitlab` | `GITLAB_TOKEN`, `GLAB_TOKEN`, `CI_JOB_TOKEN` | GitLab API, `glab` CLI |
+| `deepinfra` | `DEEPINFRA_API_KEY` | DeepInfra inference API |
 | `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog |
 | `openai` | `OPENAI_API_KEY` | Any OpenAI-compatible endpoint. Set `--config OPENAI_BASE_URL` to point to the provider. Refer to [Inference Routing](/sandboxes/inference-routing). |
 | `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | OpenCode |
@@ -278,7 +279,7 @@ The following providers have been tested with `inference.local`. Any provider th
 | Google Vertex AI | `vertex-prod` | `google-vertex-ai` | Regional, global, or multi-region Vertex endpoint | `GOOGLE_VERTEX_AI_TOKEN` or `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN` |
 | Baseten | `baseten` | `openai` | `https://inference.baseten.co/v1` | `OPENAI_API_KEY` |
 | Bitdeer AI | `bitdeer` | `openai` | `https://api-inference.bitdeer.ai/v1` | `OPENAI_API_KEY` |
-| Deepinfra | `deepinfra` | `openai` | `https://api.deepinfra.com/v1/openai` | `OPENAI_API_KEY` |
+| DeepInfra | `deepinfra` | `deepinfra` | `https://api.deepinfra.com/v1/openai` | `DEEPINFRA_API_KEY` |
 | Groq | `groq` | `openai` | `https://api.groq.com/openai/v1` | `OPENAI_API_KEY` |
 | Ollama (local) | `ollama` | `openai` | `http://host.openshell.internal:11434/v1` | `OPENAI_API_KEY` |
 | LM Studio (local) | `lmstudio` | `openai` | `http://host.openshell.internal:1234/v1` | `OPENAI_API_KEY` |

@@ -98,6 +98,7 @@ Built-in Providers v2 profiles currently include:
 | `cursor` | `agent` | None |
 | `github` | `source_control` | `GITHUB_TOKEN`, `GH_TOKEN` |
 | `google-vertex-ai` | `inference` | `GOOGLE_SERVICE_ACCOUNT_KEY`, `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN`, `VERTEX_AI_SERVICE_ACCOUNT_TOKEN`, `GOOGLE_VERTEX_AI_TOKEN`, `VERTEX_AI_TOKEN` |
+| `deepinfra` | `inference` | `DEEPINFRA_API_KEY` |
 | `nvidia` | `inference` | `NVIDIA_API_KEY` |
 | `pypi` | `data` | None |
 

@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+id: deepinfra
+display_name: DeepInfra
+description: DeepInfra inference endpoints
+category: inference
+inference_capable: true
+credentials:
+  - name: api_key
+    description: DeepInfra API key
+    env_vars: [DEEPINFRA_API_KEY]
+    required: true
+    auth_style: bearer
+    header_name: authorization
+discovery:
+  credentials: [api_key]
+endpoints:
+  - host: api.deepinfra.com
+    port: 443
+    protocol: rest
+    access: read-write
+    enforcement: enforce
+binaries: [/usr/bin/curl, /usr/local/bin/curl]