diff --git a/.gitignore b/.gitignore index a50919e22..34961aeff 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,4 @@ env* # For integration tests. .tmptest/ +.golangci.yml diff --git a/agent/cmd/cmd.go b/agent/cmd/cmd.go index 2aec0a22b..c91cc03da 100644 --- a/agent/cmd/cmd.go +++ b/agent/cmd/cmd.go @@ -390,8 +390,12 @@ func (a *App) Run(ctx context.Context) error { "registry_server": nginx.GetServer( a.config.Registry.Docker.HTTP.Net, a.config.Registry.Docker.HTTP.Addr), "agent_server": fmt.Sprintf("127.0.0.1:%d", a.flags.AgentServerPort), - "registry_backup": a.config.RegistryBackup}, - nginx.WithTLS(a.config.TLS)) + "registry_backup": a.config.RegistryBackup, + // Pass timeout parameters from agent server config + "download_timeout": nginx.FormatDurationForNginx(a.config.AgentServer.DownloadTimeout), + "container_runtime_timeout": nginx.FormatDurationForNginx(a.config.AgentServer.ContainerRuntimeTimeout), + "readiness_timeout": nginx.FormatDurationForNginx(a.config.AgentServer.ReadinessTimeout), + }, nginx.WithTLS(a.config.TLS)) nginxDone <- err }() diff --git a/config/agent/base.yaml b/config/agent/base.yaml index 69c183888..ffb8e5a25 100644 --- a/config/agent/base.yaml +++ b/config/agent/base.yaml @@ -58,6 +58,15 @@ registry: peer_id_factory: addr_hash +agentserver: + # Timeout configurations (also used by nginx) + download_timeout: 15m # nginx proxy_read_timeout for downloads + container_runtime_timeout: 10m # nginx timeout for container operations (pull/preload) + readiness_timeout: 30s # nginx timeout for health checks + + # Request configuration + enable_request_logging: false # Enable detailed request logging + # Allow agent to only serve localhost and Docker default bridge requests. allowed_cidrs: - 127.0.0.1 diff --git a/config/origin/base.yaml b/config/origin/base.yaml index 9e98701d9..978fbd81a 100644 --- a/config/origin/base.yaml +++ b/config/origin/base.yaml @@ -48,6 +48,17 @@ blobserver: listener: net: unix addr: /tmp/kraken-origin.sock + + # Timeout configurations (also used by nginx) + download_timeout: 15m # nginx proxy_read_timeout for downloads + upload_timeout: 10m # nginx proxy_read_timeout/send_timeout for uploads + replication_timeout: 3m # nginx timeout for replication operations + backend_timeout: 2m # nginx proxy_connect_timeout + readiness_timeout: 30s # internal readiness check timeout + + # Concurrency limits + max_concurrent_downloads: 10 + max_concurrent_uploads: 5 nginx: name: kraken-origin diff --git a/config/tracker/base.yaml b/config/tracker/base.yaml index ddeb07c45..aead50c1f 100644 --- a/config/tracker/base.yaml +++ b/config/tracker/base.yaml @@ -40,6 +40,16 @@ trackerserver: listener: net: unix addr: /tmp/kraken-tracker.sock + + # Timeout configurations (also used by nginx) + metainfo_timeout: 2m # nginx proxy_read_timeout for metainfo requests to origins + announce_timeout: 30s # nginx proxy_read_timeout for announce operations + readiness_timeout: 30s # nginx timeout for health checks + + # Rate limiting + get_metainfo_limit: 1s # Limits unique metainfo requests per namespace/digest + announce_limit: 50 # Maximum peers returned on each announce + announce_interval: 3s # How often peers should announce nginx: name: kraken-tracker diff --git a/nginx/config/agent.go b/nginx/config/agent.go index cc425539e..414117871 100644 --- a/nginx/config/agent.go +++ b/nginx/config/agent.go @@ -40,13 +40,44 @@ server { gzip on; gzip_types text/plain test/csv application/json; + # Timeout configurations from agent server config + proxy_connect_timeout {{.readiness_timeout}}; + proxy_send_timeout {{.download_timeout}}; + proxy_read_timeout {{.download_timeout}}; + location ~ ^/(health|readiness)$ { proxy_pass http://agent-server; + + # Use shorter timeout for health checks + proxy_read_timeout {{.readiness_timeout}}; + proxy_send_timeout {{.readiness_timeout}}; + } + + # Container runtime operations (preload/pull) need longer timeouts + location ~ ^/preload/ { + proxy_pass http://agent-server; + + # Use container runtime timeout for these operations + proxy_read_timeout {{.container_runtime_timeout}}; + proxy_send_timeout {{.container_runtime_timeout}}; + } + + # Download operations + location ~ ^/namespace/.*/blobs/ { + proxy_pass http://agent-server; + + # Use download timeout for blob operations + proxy_read_timeout {{.download_timeout}}; + proxy_send_timeout {{.download_timeout}}; } location / { proxy_pass http://registry-backend; proxy_next_upstream error timeout http_404 http_500; + + # Standard timeouts for registry operations + proxy_read_timeout {{.download_timeout}}; + proxy_send_timeout {{.download_timeout}}; } } ` diff --git a/nginx/config/origin.go b/nginx/config/origin.go index c1ed89ddb..f5e5f719b 100644 --- a/nginx/config/origin.go +++ b/nginx/config/origin.go @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, @@ -28,8 +28,65 @@ server { gzip on; gzip_types text/plain test/csv application/json; + # Timeout configurations from origin server config + proxy_connect_timeout {{.backend_timeout}}; + proxy_send_timeout {{.upload_timeout}}; + proxy_read_timeout {{.download_timeout}}; + + # Disable buffering for large blob transfers + # + # proxy_buffering off: Stream responses directly from upstream to client + # instead of buffering entire response in nginx memory/disk. Critical for + # large container image layers (multi-GB) to avoid memory exhaustion and + # provide immediate streaming to clients. + # + # proxy_request_buffering off: Stream request body directly to upstream + # instead of buffering entire request. Enables immediate upload streaming + # for large image pushes without requiring disk space for temporary files. + # + # Without these settings, nginx would buffer entire blobs before forwarding, + # causing high memory usage, storage requirements, and delayed transfers. + proxy_buffering off; + proxy_request_buffering off; + location / { proxy_pass http://{{.server}}; + + # Pass original client info + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Special handling for upload operations with longer timeout + location ~ ^/namespace/.*/blobs/.*/uploads { + proxy_pass http://{{.server}}; + + # Use upload timeout for these operations + proxy_read_timeout {{.upload_timeout}}; + proxy_send_timeout {{.upload_timeout}}; + + # Pass original client info + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Replication operations with their own timeout + location ~ ^/namespace/.*/blobs/.*/remote { + proxy_pass http://{{.server}}; + + # Use replication timeout for these operations + proxy_read_timeout {{.replication_timeout}}; + proxy_send_timeout {{.replication_timeout}}; + + # Pass original client info + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; } } ` diff --git a/nginx/config/tracker.go b/nginx/config/tracker.go index e2854e289..1ae639c12 100644 --- a/nginx/config/tracker.go +++ b/nginx/config/tracker.go @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, @@ -29,10 +29,40 @@ server { access_log {{.access_log_path}}; error_log {{.error_log_path}}; + # Timeout configurations from tracker server config + # + # proxy_connect_timeout: Maximum time to establish connection to tracker backend. + # Uses readiness_timeout since connection establishment should be fast for + # healthy tracker instances. Slow connections indicate network or server issues. + proxy_connect_timeout {{.readiness_timeout}}; + proxy_send_timeout {{.announce_timeout}}; + proxy_read_timeout {{.announce_timeout}}; + location / { proxy_pass http://tracker; + + # Pass original client info + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Health and readiness checks with shorter timeout + location ~ ^/(health|readiness)$ { + proxy_pass http://tracker; + + proxy_read_timeout {{.readiness_timeout}}; + proxy_send_timeout {{.readiness_timeout}}; + + # Pass original client info + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; } + # Metainfo requests need longer timeout (cached) location ~* ^/namespace/.*/blobs/.*/metainfo$ { proxy_pass http://tracker; @@ -41,6 +71,31 @@ server { proxy_cache_valid 200 5m; proxy_cache_valid any 1s; proxy_cache_lock on; + + # Use metainfo timeout for these operations + proxy_read_timeout {{.metainfo_timeout}}; + proxy_send_timeout {{.metainfo_timeout}}; + + # Pass original client info + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Announce operations + location ~ ^/announce { + proxy_pass http://tracker; + + # Use announce timeout for these operations + proxy_read_timeout {{.announce_timeout}}; + proxy_send_timeout {{.announce_timeout}}; + + # Pass original client info + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; } } ` diff --git a/nginx/nginx.go b/nginx/nginx.go index 3133ecdac..afd1361b3 100644 --- a/nginx/nginx.go +++ b/nginx/nginx.go @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, @@ -23,6 +23,7 @@ import ( "path" "path/filepath" "text/template" + "time" "github.com/uber/kraken/nginx/config" "github.com/uber/kraken/utils/httputil" @@ -169,7 +170,7 @@ func Run(config Config, params map[string]interface{}, opts ...Option) error { } // Create root directory for generated files for nginx. - if err := os.MkdirAll(_genDir, 0775); err != nil { + if err := os.MkdirAll(_genDir, 0o775); err != nil { return err } @@ -197,7 +198,7 @@ func Run(config Config, params map[string]interface{}, opts ...Option) error { cabundle.Close() } - if err := os.MkdirAll(config.CacheDir, 0775); err != nil { + if err := os.MkdirAll(config.CacheDir, 0o775); err != nil { return err } @@ -211,11 +212,11 @@ func Run(config Config, params map[string]interface{}, opts ...Option) error { } conf := filepath.Join(_genDir, config.Name) - if err := ioutil.WriteFile(conf, src, 0755); err != nil { + if err := ioutil.WriteFile(conf, src, 0o755); err != nil { return fmt.Errorf("write src: %s", err) } - stdout, err := os.OpenFile(config.StdoutLogPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + stdout, err := os.OpenFile(config.StdoutLogPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) if err != nil { return fmt.Errorf("open stdout log: %s", err) } @@ -249,3 +250,33 @@ func GetServer(net, addr string) string { } return addr } + +// FormatDurationForNginx converts a Go time.Duration to an nginx-compatible timeout string. +// +// This function adds a 30-second buffer to the input duration to ensure that the Go server +// times out before nginx does. This approach provides better observability and error handling +// because the Go application can return structured error responses with proper HTTP status codes, +// rather than nginx returning generic 504 Gateway Timeout errors. +// +// Timeout Strategy: +// - Go server timeout: d (original duration) +// - Nginx timeout: d + 30s (buffered duration) +// - This ensures Go responds with proper errors before nginx cuts the connection +// +// Format: Always returns seconds format (e.g., "60s", "150s", "3600s") for simplicity. +// Nginx accepts both seconds and minutes formats, so this approach works universally. +// +// Examples: +// +// FormatDurationForNginx(5 * time.Minute) // "330s" (5m + 30s = 330s) +// FormatDurationForNginx(2 * time.Minute) // "150s" (2m + 30s = 150s) +// FormatDurationForNginx(30 * time.Second) // "60s" (30s + 30s = 60s) +// FormatDurationForNginx(10 * time.Second) // "40s" (10s + 30s = 40s) +// FormatDurationForNginx(500 * time.Millisecond) // "30s" (500ms + 30s = 30.5s → 30s) +// +// Note: Nginx accepts both "60s" and "1m" formats. This function uses seconds for consistency. +func FormatDurationForNginx(d time.Duration) string { + bufferedDuration := d + (30 * time.Second) + seconds := int(bufferedDuration.Seconds()) + return fmt.Sprintf("%ds", seconds) +} diff --git a/origin/cmd/cmd.go b/origin/cmd/cmd.go index 436d26fba..97a7816ba 100644 --- a/origin/cmd/cmd.go +++ b/origin/cmd/cmd.go @@ -410,8 +410,12 @@ func startServices(config Config, flags *Flags, server *blobserver.Server, sched log.Fatal(nginx.Run( config.Nginx, map[string]interface{}{ - "port": flags.BlobServerPort, - "server": nginx.GetServer(config.BlobServer.Listener.Net, config.BlobServer.Listener.Addr), + "port": flags.BlobServerPort, + "server": nginx.GetServer(config.BlobServer.Listener.Net, config.BlobServer.Listener.Addr), + "download_timeout": nginx.FormatDurationForNginx(config.BlobServer.DownloadTimeout), + "upload_timeout": nginx.FormatDurationForNginx(config.BlobServer.UploadTimeout), + "backend_timeout": nginx.FormatDurationForNginx(config.BlobServer.BackendTimeout), + "replication_timeout": nginx.FormatDurationForNginx(config.BlobServer.ReplicationTimeout), }, nginx.WithTLS(config.TLS))) } diff --git a/tracker/cmd/cmd.go b/tracker/cmd/cmd.go index bcb783155..9889f9231 100644 --- a/tracker/cmd/cmd.go +++ b/tracker/cmd/cmd.go @@ -158,6 +158,11 @@ func Run(flags *Flags, opts ...Option) { log.Fatal(nginx.Run(config.Nginx, map[string]interface{}{ "port": flags.Port, "server": nginx.GetServer( - config.TrackerServer.Listener.Net, config.TrackerServer.Listener.Addr)}, + config.TrackerServer.Listener.Net, config.TrackerServer.Listener.Addr), + // Pass timeout parameters from tracker server config + "metainfo_timeout": nginx.FormatDurationForNginx(config.TrackerServer.MetaInfoTimeout), + "announce_timeout": nginx.FormatDurationForNginx(config.TrackerServer.AnnounceTimeout), + "readiness_timeout": nginx.FormatDurationForNginx(config.TrackerServer.ReadinessTimeout), + }, nginx.WithTLS(config.TLS))) } diff --git a/tracker/trackerserver/config.go b/tracker/trackerserver/config.go index b7376b109..2f07be5a0 100644 --- a/tracker/trackerserver/config.go +++ b/tracker/trackerserver/config.go @@ -30,6 +30,11 @@ type Config struct { AnnounceInterval time.Duration `yaml:"announce_interval"` Listener listener.Config `yaml:"listener"` + + // Timeout configurations + MetaInfoTimeout time.Duration `yaml:"metainfo_timeout"` // Timeout for metainfo requests to origins + AnnounceTimeout time.Duration `yaml:"announce_timeout"` // Timeout for announce operations + ReadinessTimeout time.Duration `yaml:"readiness_timeout"` // Timeout for readiness checks } func (c Config) applyDefaults() Config { @@ -42,5 +47,14 @@ func (c Config) applyDefaults() Config { if c.AnnounceInterval == 0 { c.AnnounceInterval = 3 * time.Second } + if c.MetaInfoTimeout == 0 { + c.MetaInfoTimeout = 2 * time.Minute + } + if c.AnnounceTimeout == 0 { + c.AnnounceTimeout = 30 * time.Second + } + if c.ReadinessTimeout == 0 { + c.ReadinessTimeout = 30 * time.Second + } return c }