diff --git a/.github/actions/docker-build-ecr/action.yml b/.github/actions/docker-build-ecr/action.yml new file mode 100644 index 00000000000..34854c5e6c6 --- /dev/null +++ b/.github/actions/docker-build-ecr/action.yml @@ -0,0 +1,101 @@ +name: Docker build and push to ECR (with ECR registry cache) +description: | + Build a Docker image and push to AWS ECR, using an ECR-stored layer cache + rather than the GHA cache backend. This is a temporary inline replacement + for `cardstack/gh-actions/.github/workflows/docker-ecr.yml` while we + iterate on which cache backend works best for boxel's large pnpm-fetch + layers (the GHA backend transfers them too slowly to be a net win). + + Cache lives at `:buildcache` in the same ECR repository as the + image tags. ECR pulls inside the same AWS region are typically much faster + than the GHA cache service. + +inputs: + repository: + required: true + description: ECR repository name (without registry prefix) + environment: + required: true + description: Deployment environment (staging or production) + dockerfile: + required: false + default: "Dockerfile" + description: Path to the Dockerfile + context: + required: false + default: "." + description: Docker build context + build-args: + required: false + default: "" + description: Build args passed to docker/build-push-action + platforms: + required: false + default: "linux/amd64" + description: Target platform + +outputs: + image: + description: Final image tag (sha-tagged) suitable for ECS task-def update + value: ${{ steps.tags.outputs.tag_sha }} + +runs: + using: composite + steps: + - uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0 + + - name: Set up AWS role for environment + shell: bash + env: + INPUT_ENVIRONMENT: ${{ inputs.environment }} + run: | + if [ "$INPUT_ENVIRONMENT" = "production" ]; then + echo "AWS_ROLE_ARN=arn:aws:iam::120317779495:role/github" >> "$GITHUB_ENV" + elif [ "$INPUT_ENVIRONMENT" = "staging" ]; then + echo "AWS_ROLE_ARN=arn:aws:iam::680542703984:role/github" >> "$GITHUB_ENV" + else + echo "unrecognized environment: $INPUT_ENVIRONMENT" + exit 1 + fi + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@d979d5b3a71173a29b74b5b88418bfda9437d885 # v6.1.1 + with: + role-to-assume: ${{ env.AWS_ROLE_ARN }} + aws-region: us-east-1 + + - id: login-ecr + uses: aws-actions/amazon-ecr-login@fa648b43de3d4d023bcb3f89ed6940096949c419 # v2.1.5 + + - id: tags + shell: bash + env: + REGISTRY: ${{ steps.login-ecr.outputs.registry }} + REPOSITORY: ${{ inputs.repository }} + ENVIRONMENT: ${{ inputs.environment }} + run: | + TAG_PREFIX="$REGISTRY/$REPOSITORY" + { + echo "tag_sha=${TAG_PREFIX}:${GITHUB_SHA::7}" + echo "tag_env=${TAG_PREFIX}:${ENVIRONMENT}" + echo "tag_latest=${TAG_PREFIX}:latest" + echo "tag_buildcache=${TAG_PREFIX}:buildcache" + } >> "$GITHUB_OUTPUT" + + - name: Build and push + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0 + with: + context: ${{ inputs.context }} + file: ${{ inputs.dockerfile }} + push: true + # `image-manifest=true,oci-mediatypes=true` is required for the + # registry cache to round-trip through ECR — ECR rejects the + # default cache manifest format buildx uses for other registries. + cache-from: type=registry,ref=${{ steps.tags.outputs.tag_buildcache }} + cache-to: type=registry,ref=${{ steps.tags.outputs.tag_buildcache }},mode=max,image-manifest=true,oci-mediatypes=true + platforms: ${{ inputs.platforms }} + build-args: ${{ inputs.build-args }} + tags: | + ${{ steps.tags.outputs.tag_latest }} + ${{ steps.tags.outputs.tag_sha }} + ${{ steps.tags.outputs.tag_env }} diff --git a/.github/workflows/manual-deploy.yml b/.github/workflows/manual-deploy.yml index 09daa9d672a..764ba6f2136 100644 --- a/.github/workflows/manual-deploy.yml +++ b/.github/workflows/manual-deploy.yml @@ -73,14 +73,29 @@ jobs: description: 'Deployment started', }); + # Temporarily inlined (CS-11143): the reusable docker-ecr workflow in + # cardstack/gh-actions caches via `type=gha`, which is a net loss for our + # large pnpm-fetch layers — observed cache transfer ~230s vs ~30s to just + # rerun the fetch. We call a local composite action instead that uses an + # ECR registry cache (same AWS region as the build runner, much faster). + # Once we're confident in the approach we can fold it back into the shared + # action. build-ai-bot: name: Build ai-bot Docker image - uses: cardstack/gh-actions/.github/workflows/docker-ecr.yml@main - secrets: inherit - with: - repository: "boxel-ai-bot-${{ inputs.environment }}" - environment: ${{ inputs.environment }} - dockerfile: "packages/ai-bot/Dockerfile" + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + outputs: + image: ${{ steps.build.outputs.image }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - id: build + uses: ./.github/actions/docker-build-ecr + with: + repository: "boxel-ai-bot-${{ inputs.environment }}" + environment: ${{ inputs.environment }} + dockerfile: "packages/ai-bot/Dockerfile" deploy-ai-bot: needs: [build-ai-bot, post-migrate-db] @@ -97,12 +112,20 @@ jobs: build-bot-runner: name: Build bot-runner Docker image - uses: cardstack/gh-actions/.github/workflows/docker-ecr.yml@main - secrets: inherit - with: - repository: "boxel-bot-runner-${{ inputs.environment }}" - environment: ${{ inputs.environment }} - dockerfile: "packages/bot-runner/Dockerfile" + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + outputs: + image: ${{ steps.build.outputs.image }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - id: build + uses: ./.github/actions/docker-build-ecr + with: + repository: "boxel-bot-runner-${{ inputs.environment }}" + environment: ${{ inputs.environment }} + dockerfile: "packages/bot-runner/Dockerfile" deploy-bot-runner: needs: [build-bot-runner, post-migrate-db] @@ -142,56 +165,96 @@ jobs: build-realm-server: name: Build realm-server Docker image - uses: cardstack/gh-actions/.github/workflows/docker-ecr.yml@main - secrets: inherit - with: - repository: "boxel-realm-server-${{ inputs.environment }}" - environment: ${{ inputs.environment }} - dockerfile: "packages/realm-server/realm-server.Dockerfile" - build-args: | - "realm_server_script=start:${{ inputs.environment }}" + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + outputs: + image: ${{ steps.build.outputs.image }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - id: build + uses: ./.github/actions/docker-build-ecr + with: + repository: "boxel-realm-server-${{ inputs.environment }}" + environment: ${{ inputs.environment }} + dockerfile: "packages/realm-server/realm-server.Dockerfile" + build-args: | + "realm_server_script=start:${{ inputs.environment }}" build-prerender-manager: name: Build prerender manager Docker image - uses: cardstack/gh-actions/.github/workflows/docker-ecr.yml@main - secrets: inherit - with: - repository: "boxel-prerender-manager-${{ inputs.environment }}" - environment: ${{ inputs.environment }} - dockerfile: "packages/realm-server/prerender-manager.Dockerfile" - build-args: | - "prerender_manager_script=start:prerender-manager" + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + outputs: + image: ${{ steps.build.outputs.image }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - id: build + uses: ./.github/actions/docker-build-ecr + with: + repository: "boxel-prerender-manager-${{ inputs.environment }}" + environment: ${{ inputs.environment }} + dockerfile: "packages/realm-server/prerender-manager.Dockerfile" + build-args: | + "prerender_manager_script=start:prerender-manager" build-prerender: name: Build prerender Docker image - uses: cardstack/gh-actions/.github/workflows/docker-ecr.yml@main - secrets: inherit - with: - repository: "boxel-prerender-server-${{ inputs.environment }}" - environment: ${{ inputs.environment }} - dockerfile: "packages/realm-server/prerender.Dockerfile" - build-args: | - "prerender_script=start:prerender-${{ inputs.environment }}" + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + outputs: + image: ${{ steps.build.outputs.image }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - id: build + uses: ./.github/actions/docker-build-ecr + with: + repository: "boxel-prerender-server-${{ inputs.environment }}" + environment: ${{ inputs.environment }} + dockerfile: "packages/realm-server/prerender.Dockerfile" + build-args: | + "prerender_script=start:prerender-${{ inputs.environment }}" build-worker: name: Build worker Docker image - uses: cardstack/gh-actions/.github/workflows/docker-ecr.yml@main - secrets: inherit - with: - repository: "boxel-worker-${{ inputs.environment }}" - environment: ${{ inputs.environment }} - dockerfile: "packages/realm-server/worker.Dockerfile" - build-args: | - "worker_script=start:worker-${{ inputs.environment }}" + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + outputs: + image: ${{ steps.build.outputs.image }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - id: build + uses: ./.github/actions/docker-build-ecr + with: + repository: "boxel-worker-${{ inputs.environment }}" + environment: ${{ inputs.environment }} + dockerfile: "packages/realm-server/worker.Dockerfile" + build-args: | + "worker_script=start:worker-${{ inputs.environment }}" build-pg-migration: name: Build pg-migration Docker image - uses: cardstack/gh-actions/.github/workflows/docker-ecr.yml@main - secrets: inherit - with: - repository: "boxel-pg-migration-${{ inputs.environment }}" - environment: ${{ inputs.environment }} - dockerfile: "packages/postgres/Dockerfile" + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + outputs: + image: ${{ steps.build.outputs.image }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - id: build + uses: ./.github/actions/docker-build-ecr + with: + repository: "boxel-pg-migration-${{ inputs.environment }}" + environment: ${{ inputs.environment }} + dockerfile: "packages/postgres/Dockerfile" migrate-db: # use "deploy-host" and "build-realm-server" as deps so we can run diff --git a/packages/postgres/Dockerfile b/packages/postgres/Dockerfile index 4387e42ed42..887a4381d62 100644 --- a/packages/postgres/Dockerfile +++ b/packages/postgres/Dockerfile @@ -6,8 +6,17 @@ ARG PNPM_VERSION RUN apt-get update && apt-get install -y postgresql RUN npm install -g pnpm@${PNPM_VERSION} WORKDIR /boxel + +# Cache-friendly dependency fetch: this layer only re-runs when the lockfile +# (or patches it references) changes, not on every source edit. `pnpm fetch` +# populates the global pnpm store in $HOME from the lockfile alone, so the +# subsequent `pnpm install --offline` doesn't need the registry. +COPY pnpm-lock.yaml pnpm-workspace.yaml ./ +COPY patches/ ./patches +RUN CI=1 pnpm fetch + COPY . . -RUN pnpm install --frozen-lockfile +RUN CI=1 pnpm install -r --offline WORKDIR /boxel/packages/postgres diff --git a/packages/realm-server/prerender-manager.Dockerfile b/packages/realm-server/prerender-manager.Dockerfile index c168bbdd568..87c59b2bc95 100644 --- a/packages/realm-server/prerender-manager.Dockerfile +++ b/packages/realm-server/prerender-manager.Dockerfile @@ -9,14 +9,15 @@ WORKDIR /realm-server RUN apt-get update && apt-get install -y ca-certificates curl unzip jq RUN npm install -g pnpm@11.0.9 -COPY pnpm-lock.yaml ./ - +# Cache-friendly dependency fetch: this layer only re-runs when the lockfile +# (or patches it references) changes, not on every source edit. `pnpm fetch` +# populates the global pnpm store in $HOME from the lockfile alone, so the +# subsequent `pnpm install --offline` doesn't need the registry. +COPY pnpm-lock.yaml pnpm-workspace.yaml ./ COPY patches/ ./patches -COPY vendor/ ./vendor - -ADD . ./ - RUN CI=1 pnpm fetch + +COPY . ./ RUN CI=1 pnpm install -r --offline EXPOSE 4222 diff --git a/packages/realm-server/prerender.Dockerfile b/packages/realm-server/prerender.Dockerfile index 9963e43b4e3..f1a24eac451 100644 --- a/packages/realm-server/prerender.Dockerfile +++ b/packages/realm-server/prerender.Dockerfile @@ -60,14 +60,15 @@ ENV PUPPETEER_CHROME_ARGS="--disable-dev-shm-usage" RUN mkdir -p /home/pptruser/Downloads "${PUPPETEER_CACHE_DIR}" -COPY pnpm-lock.yaml ./ - +# Cache-friendly dependency fetch: this layer only re-runs when the lockfile +# (or patches it references) changes, not on every source edit. `pnpm fetch` +# populates the global pnpm store in $HOME from the lockfile alone, so the +# subsequent `pnpm install --offline` doesn't need the registry. +COPY pnpm-lock.yaml pnpm-workspace.yaml ./ COPY patches/ ./patches -COPY vendor/ ./vendor - -ADD . ./ - RUN CI=1 pnpm fetch + +COPY . ./ RUN CI=1 pnpm install -r --offline RUN chown -R pptruser:pptruser /home/pptruser /realm-server diff --git a/packages/realm-server/realm-server.Dockerfile b/packages/realm-server/realm-server.Dockerfile index 1d4f3cf0715..4f236466384 100644 --- a/packages/realm-server/realm-server.Dockerfile +++ b/packages/realm-server/realm-server.Dockerfile @@ -9,14 +9,15 @@ WORKDIR /realm-server RUN apt-get update && apt-get install -y ca-certificates curl unzip postgresql jq rsync git RUN npm install -g pnpm@11.0.9 -COPY pnpm-lock.yaml ./ - +# Cache-friendly dependency fetch: this layer only re-runs when the lockfile +# (or patches it references) changes, not on every source edit. `pnpm fetch` +# populates the global pnpm store in $HOME from the lockfile alone, so the +# subsequent `pnpm install --offline` doesn't need the registry. +COPY pnpm-lock.yaml pnpm-workspace.yaml ./ COPY patches/ ./patches -COPY vendor/ ./vendor - -ADD . ./ - RUN CI=1 pnpm fetch + +COPY . ./ RUN CI=1 pnpm install -r --offline EXPOSE 3000 diff --git a/packages/realm-server/worker.Dockerfile b/packages/realm-server/worker.Dockerfile index be8b5438423..fcca828d42c 100644 --- a/packages/realm-server/worker.Dockerfile +++ b/packages/realm-server/worker.Dockerfile @@ -9,14 +9,15 @@ WORKDIR /realm-server RUN apt-get update && apt-get install -y ca-certificates curl unzip postgresql jq RUN npm install -g pnpm@11.0.9 -COPY pnpm-lock.yaml ./ - +# Cache-friendly dependency fetch: this layer only re-runs when the lockfile +# (or patches it references) changes, not on every source edit. `pnpm fetch` +# populates the global pnpm store in $HOME from the lockfile alone, so the +# subsequent `pnpm install --offline` doesn't need the registry. +COPY pnpm-lock.yaml pnpm-workspace.yaml ./ COPY patches/ ./patches -COPY vendor/ ./vendor - -ADD . ./ - RUN CI=1 pnpm fetch + +COPY . ./ RUN CI=1 pnpm install -r --offline EXPOSE 3000