lm-sys · panpan0000 · May 26, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,17 @@
+__pycache__
+*.py[cod]
+*.so
+.git
+.gitignore
+.github
+.eggs
+*.egg-info
+build/
+dist/
+*.npy
+assets/
+benchmarks/
+*.md
+!README.md
+.pre-commit-config.yaml
+docker/
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
@@ -0,0 +1,59 @@
+name: Build and Publish Docker Image
+
+on:
+  push:
+    branches: [main, master]
+    tags: ['v*']
+  pull_request:
+    branches: [main, master]
+  workflow_dispatch:
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Container Registry
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=sha,prefix=,format=short
+            type=raw,value=latest,enable={{is_default_branch}}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: docker/Dockerfile
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/docker/DEPLOY.md b/docker/DEPLOY.md
@@ -0,0 +1,107 @@
+# RouteLLM — Docker / K8S Deployment
+
+## Docker Image
+
+### Build
+
+```bash
+docker build -f docker/Dockerfile -t routellm:latest .
+```
+
+Customize via build args:
+
+```bash
+docker build -f docker/Dockerfile \
+  --build-arg ROUTERS="sw_ranking causal_llm" \
+  --build-arg STRONG_MODEL="gpt-4o" \
+  --build-arg PORT=6060 \
+  -t routellm:latest .
+```
+
+### Run
+
+```bash
+docker run -d -p 6060:6060 \
+  -e OPENAI_API_KEY="sk-..." \
+  -e HF_TOKEN="hf_..." \
+  --name routellm routellm:latest
+```
+
+With a custom LLM endpoint (e.g. self-hosted proxy):
+
+```bash
+docker run -d -p 6060:6060 \
+  -e OPENAI_API_KEY="sk-..." \
+  -e BASE_URL="http://your-proxy:8080/v1" \
+  -e STRONG_MODEL="openai/your-strong-model" \
+  -e WEAK_MODEL="openai/your-weak-model" \
+  --name routellm routellm:latest
+```
+
+With a custom router config file:
+
+```bash
+docker run -d -p 6060:6060 \
+  -v $(pwd)/config.yaml:/app/config.yaml \
+  -e CONFIG_FILE=/app/config.yaml \
+  -e OPENAI_API_KEY="sk-..." \
+  --name routellm routellm:latest
+```
+
+Check: `curl http://localhost:6060/health`  →  `{"status":"online"}`
+
+### Environment Variables
+
+| Variable         | Description                                    | Required                   |
+|------------------|------------------------------------------------|----------------------------|
+| `OPENAI_API_KEY` | API key for OpenAI-compatible LLMs             | yes                        |
+| `HF_TOKEN`       | HuggingFace token (needed when pulling HF models) | no (see note below)     |
+| `BASE_URL`       | Base URL for LLM endpoint (default: OpenAI)    | no                         |
+| `ROUTERS`        | Space-separated router names                   | no                         |
+| `PORT`           | Server port (default 6060)                     | no                         |
+| `STRONG_MODEL`   | Strong model ID                                | no                         |
+| `WEAK_MODEL`     | Weak model ID                                  | no                         |
+| `CONFIG_FILE`    | Path to custom config YAML                     | no                         |
+
+`HF_TOKEN` is only required when routers download models from HuggingFace Hub
+at startup. If you use `ROUTERS=random` or connect to an external LLM endpoint
+via `BASE_URL`, it is not needed.
+
+---
+
+## K8S Deployment
+
+All manifests are in `docker/deploy/`.
+
+```bash
+kubectl apply -f docker/deploy/secret.yaml
+kubectl apply -f docker/deploy/configmap.yaml
+kubectl apply -f docker/deploy/deployment.yaml
+kubectl apply -f docker/deploy/service.yaml
+```
+
+Verify:
+
+```bash
+kubectl get pods -l app=routellm
+kubectl port-forward svc/routellm 6060:6060
+curl http://localhost:6060/health
+```
+
+### Customizing
+
+- Set `OPENAI_API_KEY` and `HF_TOKEN` in `docker/deploy/secret.yaml`.
+- Adjust `STRONG_MODEL` / `WEAK_MODEL` / `ROUTERS` / `BASE_URL` in `docker/deploy/deployment.yaml`.
+- If you don't need a custom router config, the ConfigMap is optional
+  (the pod will use sensible defaults).
+
+---
+
+## CI (GitHub Actions)
+
+Workflow: `.github/workflows/docker-publish.yml`
+
+- Push to default branch → builds & pushes `:latest` + `:main` + `:<sha>`
+- Tag `v*` → pushes `:v1.0.0`, `:1.0`, `:1`
+- Pull request → build-only (validates Dockerfile)
+- Images published to `ghcr.io/lm-sys/routellm`
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -0,0 +1,51 @@
+FROM python:3.10-slim AS builder
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+RUN apt-get update -y && \
+	apt-get install -y --no-install-recommends git && \
+	rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY pyproject.toml README.md ./
+
+RUN pip install --no-cache-dir ".[serve]"
+
+COPY routellm/ routellm/
+
+RUN find /usr/local -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null; exit 0
+
+FROM python:3.10-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+RUN apt-get update -y && \
+	apt-get install -y --no-install-recommends --no-install-suggests ca-certificates && \
+	rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+WORKDIR /app
+
+COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+COPY --from=builder /app/routellm /app/routellm
+COPY --from=builder /app/pyproject.toml /app/pyproject.toml
+
+ARG ROUTERS=random
+ARG PORT=6060
+ARG STRONG_MODEL=gpt-4-1106-preview
+ARG WEAK_MODEL=anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1
+
+ENV ROUTERS=${ROUTERS}
+ENV PORT=${PORT}
+ENV STRONG_MODEL=${STRONG_MODEL}
+ENV WEAK_MODEL=${WEAK_MODEL}
+
+EXPOSE ${PORT}
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
+	CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:${PORT}/health')" || exit 1
+
+ENTRYPOINT ["sh", "-c", "exec python -m routellm.openai_server --port ${PORT} --routers ${ROUTERS} --strong-model ${STRONG_MODEL} --weak-model ${WEAK_MODEL} ${BASE_URL:+--base-url ${BASE_URL}} ${OPENAI_API_KEY:+--api-key ${OPENAI_API_KEY}} ${CONFIG_FILE:+--config ${CONFIG_FILE}}"]
diff --git a/docker/deploy/configmap.yaml b/docker/deploy/configmap.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: routellm-config
+data:
+  config.yaml: |
+    sw_ranking:
+      arena_battle_datasets:
+        - lmsys/lmsys-arena-human-preference-55k
+        - routellm/gpt4_judge_battles
+      arena_embedding_datasets:
+        - routellm/arena_battles_embeddings
+        - routellm/gpt4_judge_battles_embeddings
+    causal_llm:
+      checkpoint_path: routellm/causal_llm_gpt4_augmented
diff --git a/docker/deploy/deployment.yaml b/docker/deploy/deployment.yaml
@@ -0,0 +1,68 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: routellm
+  labels:
+    app: routellm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: routellm
+  template:
+    metadata:
+      labels:
+        app: routellm
+    spec:
+      containers:
+        - name: routellm
+          image: ghcr.io/lm-sys/routellm:latest
+          ports:
+            - containerPort: 6060
+          env:
+            - name: OPENAI_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: routellm-secrets
+                  key: openai-api-key
+            - name: HF_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: routellm-secrets
+                  key: hf-token
+            - name: BASE_URL
+              value: ""
+            - name: ROUTERS
+              value: "sw_ranking causal_llm"
+            - name: STRONG_MODEL
+              value: "gpt-4-1106-preview"
+            - name: WEAK_MODEL
+              value: "anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1"
+          volumeMounts:
+            - name: config
+              mountPath: /app/config.yaml
+              subPath: config.yaml
+          resources:
+            requests:
+              memory: "8Gi"
+              cpu: "2"
+            limits:
+              memory: "16Gi"
+              cpu: "4"
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: 6060
+            initialDelaySeconds: 120
+            periodSeconds: 30
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: 6060
+            initialDelaySeconds: 30
+            periodSeconds: 10
+      volumes:
+        - name: config
+          configMap:
+            name: routellm-config
+            optional: true
diff --git a/docker/deploy/secret.yaml b/docker/deploy/secret.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: routellm-secrets
+type: Opaque
+stringData:
+  openai-api-key: "sk-..."
+  hf-token: "hf_..."
diff --git a/docker/deploy/service.yaml b/docker/deploy/service.yaml
@@ -0,0 +1,11 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: routellm
+spec:
+  selector:
+    app: routellm
+  ports:
+    - port: 6060
+      targetPort: 6060
+  type: ClusterIP