diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..a414e77 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,17 @@ +__pycache__ +*.py[cod] +*.so +.git +.gitignore +.github +.eggs +*.egg-info +build/ +dist/ +*.npy +assets/ +benchmarks/ +*.md +!README.md +.pre-commit-config.yaml +docker/ diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..eb4bd19 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,59 @@ +name: Build and Publish Docker Image + +on: + push: + branches: [main, master] + tags: ['v*'] + pull_request: + branches: [main, master] + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=sha,prefix=,format=short + type=raw,value=latest,enable={{is_default_branch}} + + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: . + file: docker/Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/docker/DEPLOY.md b/docker/DEPLOY.md new file mode 100644 index 0000000..b3cd623 --- /dev/null +++ b/docker/DEPLOY.md @@ -0,0 +1,107 @@ +# RouteLLM — Docker / K8S Deployment + +## Docker Image + +### Build + +```bash +docker build -f docker/Dockerfile -t routellm:latest . +``` + +Customize via build args: + +```bash +docker build -f docker/Dockerfile \ + --build-arg ROUTERS="sw_ranking causal_llm" \ + --build-arg STRONG_MODEL="gpt-4o" \ + --build-arg PORT=6060 \ + -t routellm:latest . +``` + +### Run + +```bash +docker run -d -p 6060:6060 \ + -e OPENAI_API_KEY="sk-..." \ + -e HF_TOKEN="hf_..." \ + --name routellm routellm:latest +``` + +With a custom LLM endpoint (e.g. self-hosted proxy): + +```bash +docker run -d -p 6060:6060 \ + -e OPENAI_API_KEY="sk-..." \ + -e BASE_URL="http://your-proxy:8080/v1" \ + -e STRONG_MODEL="openai/your-strong-model" \ + -e WEAK_MODEL="openai/your-weak-model" \ + --name routellm routellm:latest +``` + +With a custom router config file: + +```bash +docker run -d -p 6060:6060 \ + -v $(pwd)/config.yaml:/app/config.yaml \ + -e CONFIG_FILE=/app/config.yaml \ + -e OPENAI_API_KEY="sk-..." \ + --name routellm routellm:latest +``` + +Check: `curl http://localhost:6060/health` → `{"status":"online"}` + +### Environment Variables + +| Variable | Description | Required | +|------------------|------------------------------------------------|----------------------------| +| `OPENAI_API_KEY` | API key for OpenAI-compatible LLMs | yes | +| `HF_TOKEN` | HuggingFace token (needed when pulling HF models) | no (see note below) | +| `BASE_URL` | Base URL for LLM endpoint (default: OpenAI) | no | +| `ROUTERS` | Space-separated router names | no | +| `PORT` | Server port (default 6060) | no | +| `STRONG_MODEL` | Strong model ID | no | +| `WEAK_MODEL` | Weak model ID | no | +| `CONFIG_FILE` | Path to custom config YAML | no | + +`HF_TOKEN` is only required when routers download models from HuggingFace Hub +at startup. If you use `ROUTERS=random` or connect to an external LLM endpoint +via `BASE_URL`, it is not needed. + +--- + +## K8S Deployment + +All manifests are in `docker/deploy/`. + +```bash +kubectl apply -f docker/deploy/secret.yaml +kubectl apply -f docker/deploy/configmap.yaml +kubectl apply -f docker/deploy/deployment.yaml +kubectl apply -f docker/deploy/service.yaml +``` + +Verify: + +```bash +kubectl get pods -l app=routellm +kubectl port-forward svc/routellm 6060:6060 +curl http://localhost:6060/health +``` + +### Customizing + +- Set `OPENAI_API_KEY` and `HF_TOKEN` in `docker/deploy/secret.yaml`. +- Adjust `STRONG_MODEL` / `WEAK_MODEL` / `ROUTERS` / `BASE_URL` in `docker/deploy/deployment.yaml`. +- If you don't need a custom router config, the ConfigMap is optional + (the pod will use sensible defaults). + +--- + +## CI (GitHub Actions) + +Workflow: `.github/workflows/docker-publish.yml` + +- Push to default branch → builds & pushes `:latest` + `:main` + `:` +- Tag `v*` → pushes `:v1.0.0`, `:1.0`, `:1` +- Pull request → build-only (validates Dockerfile) +- Images published to `ghcr.io/lm-sys/routellm` diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..95bbaef --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,51 @@ +FROM python:3.10-slim AS builder + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +RUN apt-get update -y && \ + apt-get install -y --no-install-recommends git && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY pyproject.toml README.md ./ + +RUN pip install --no-cache-dir ".[serve]" + +COPY routellm/ routellm/ + +RUN find /usr/local -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null; exit 0 + +FROM python:3.10-slim + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +RUN apt-get update -y && \ + apt-get install -y --no-install-recommends --no-install-suggests ca-certificates && \ + rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* + +WORKDIR /app + +COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages +COPY --from=builder /usr/local/bin /usr/local/bin +COPY --from=builder /app/routellm /app/routellm +COPY --from=builder /app/pyproject.toml /app/pyproject.toml + +ARG ROUTERS=random +ARG PORT=6060 +ARG STRONG_MODEL=gpt-4-1106-preview +ARG WEAK_MODEL=anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1 + +ENV ROUTERS=${ROUTERS} +ENV PORT=${PORT} +ENV STRONG_MODEL=${STRONG_MODEL} +ENV WEAK_MODEL=${WEAK_MODEL} + +EXPOSE ${PORT} + +HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:${PORT}/health')" || exit 1 + +ENTRYPOINT ["sh", "-c", "exec python -m routellm.openai_server --port ${PORT} --routers ${ROUTERS} --strong-model ${STRONG_MODEL} --weak-model ${WEAK_MODEL} ${BASE_URL:+--base-url ${BASE_URL}} ${OPENAI_API_KEY:+--api-key ${OPENAI_API_KEY}} ${CONFIG_FILE:+--config ${CONFIG_FILE}}"] diff --git a/docker/deploy/configmap.yaml b/docker/deploy/configmap.yaml new file mode 100644 index 0000000..4fe4d9a --- /dev/null +++ b/docker/deploy/configmap.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: routellm-config +data: + config.yaml: | + sw_ranking: + arena_battle_datasets: + - lmsys/lmsys-arena-human-preference-55k + - routellm/gpt4_judge_battles + arena_embedding_datasets: + - routellm/arena_battles_embeddings + - routellm/gpt4_judge_battles_embeddings + causal_llm: + checkpoint_path: routellm/causal_llm_gpt4_augmented diff --git a/docker/deploy/deployment.yaml b/docker/deploy/deployment.yaml new file mode 100644 index 0000000..9102fbb --- /dev/null +++ b/docker/deploy/deployment.yaml @@ -0,0 +1,68 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: routellm + labels: + app: routellm +spec: + replicas: 1 + selector: + matchLabels: + app: routellm + template: + metadata: + labels: + app: routellm + spec: + containers: + - name: routellm + image: ghcr.io/lm-sys/routellm:latest + ports: + - containerPort: 6060 + env: + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: routellm-secrets + key: openai-api-key + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: routellm-secrets + key: hf-token + - name: BASE_URL + value: "" + - name: ROUTERS + value: "sw_ranking causal_llm" + - name: STRONG_MODEL + value: "gpt-4-1106-preview" + - name: WEAK_MODEL + value: "anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1" + volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + resources: + requests: + memory: "8Gi" + cpu: "2" + limits: + memory: "16Gi" + cpu: "4" + livenessProbe: + httpGet: + path: /health + port: 6060 + initialDelaySeconds: 120 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /health + port: 6060 + initialDelaySeconds: 30 + periodSeconds: 10 + volumes: + - name: config + configMap: + name: routellm-config + optional: true diff --git a/docker/deploy/secret.yaml b/docker/deploy/secret.yaml new file mode 100644 index 0000000..58a8b49 --- /dev/null +++ b/docker/deploy/secret.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: routellm-secrets +type: Opaque +stringData: + openai-api-key: "sk-..." + hf-token: "hf_..." diff --git a/docker/deploy/service.yaml b/docker/deploy/service.yaml new file mode 100644 index 0000000..ce86026 --- /dev/null +++ b/docker/deploy/service.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service +metadata: + name: routellm +spec: + selector: + app: routellm + ports: + - port: 6060 + targetPort: 6060 + type: ClusterIP