diff --git a/config/config.go b/config/config.go index 8764dc02..6a01890b 100644 --- a/config/config.go +++ b/config/config.go @@ -32,9 +32,10 @@ type ( } Pool struct { - Min int `default:"2"` - Max int `default:"4"` - MinAge time.Duration `default:"55m" split_words:"true"` + Min int `default:"2"` + Max int `default:"4"` + MinAge time.Duration `default:"55m" split_words:"true"` + MinIdle time.Duration `default:"0" split_words:"true"` } Check struct { diff --git a/config/load_test.go b/config/load_test.go index 77a05f93..b0739c52 100644 --- a/config/load_test.go +++ b/config/load_test.go @@ -35,6 +35,9 @@ func TestDefaults(t *testing.T) { if got, want := conf.Pool.MinAge, time.Minute*55; got != want { t.Errorf("Want default DRONE_POOL_MIN_AGE of %d, got %d", want, got) } + if got, want := conf.Pool.MinIdle, time.Minute*0; got != want { + t.Errorf("Want default DRONE_POOL_MIN_IDLE of %d, got %d", want, got) + } if got, want := conf.Check.Interval, time.Minute; got != want { t.Errorf("Want default DRONE_INSTALL_CHECK_INTERVAL of %s, got %s", want, got) @@ -74,6 +77,7 @@ func TestLoad(t *testing.T) { "DRONE_LOGS_PRETTY": "true", "DRONE_CAPACITY_BUFFER": "3", "DRONE_POOL_MIN_AGE": "1h", + "DRONE_POOL_MIN_IDLE": "15m", "DRONE_POOL_MIN": "1", "DRONE_POOL_MAX": "5", "DRONE_SERVER_HOST": "drone.company.com", @@ -207,7 +211,8 @@ var jsonConfig = []byte(`{ "Pool": { "Min": 1, "Max": 5, - "MinAge": 3600000000000 + "MinAge": 3600000000000, + "MinIdle": 900000000000 }, "Server": { "Host": "drone.company.com", diff --git a/engine/engine.go b/engine/engine.go index f32e1347..716e500e 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -102,6 +102,7 @@ func New( kernel: config.Agent.Kernel, buffer: config.CapacityBuffer, ttu: config.Pool.MinAge, + tti: config.Pool.MinIdle, min: config.Pool.Min, max: config.Pool.Max, cap: config.Agent.Concurrency, diff --git a/engine/planner.go b/engine/planner.go index ef9470ba..f1c4fe4d 100644 --- a/engine/planner.go +++ b/engine/planner.go @@ -30,6 +30,7 @@ type planner struct { cap int // capacity per-server buffer int // buffer capacity to have warm and ready ttu time.Duration // minimum server age + tti time.Duration // minimum server idle time labels map[string]string client drone.Client @@ -76,6 +77,16 @@ func (p *planner) Plan(ctx context.Context) error { ctx = logger.WithContext(ctx, log) + // if MinIdle is being used, track busy servers + if p.tti > 0 { + _, err = p.updateBusy(ctx) + if err != nil { + log.WithError(err). + Errorln("cannot check for busy servers") + return err + } + } + free := max(capacity-running-p.buffer, 0) diff := serverDiff(pending, free, p.cap) @@ -104,6 +115,45 @@ func (p *planner) Plan(ctx context.Context) error { return nil } + +// helper function checks for busy running instances and updates idle timer +func (p *planner) updateBusy(ctx context.Context) (count int, err error) { + logger := logger.FromContext(ctx) + + servers, err := p.servers.ListState(ctx, autoscaler.StateRunning) + if err != nil { + logger.WithError(err). + Errorln("cannot fetch server list") + return count, err + } + + // check for busy servers to update idle timers + busy, err := p.listBusy(ctx) + if err != nil { + logger.WithError(err). + Errorln("cannot ascertain busy server list") + return count, err + } + + for _, server := range servers { + if _, ok := busy[server.Name]; ok { + err := p.servers.Busy(ctx, server) + if err != nil { + logger.WithError(err). + WithField("server", server.Name). + WithField("updated", server.Updated). + Errorln("cannot update server as busy") + } + logger.WithField("server", server.Name). + Debugln("updated busy server") + count++ + } + } + logger.Debugf("found %d busy servers", count) + return count, nil +} + + // helper function allocates n new server instances. func (p *planner) alloc(ctx context.Context, n int) error { logger := logger.FromContext(ctx) @@ -186,6 +236,16 @@ func (p *planner) mark(ctx context.Context, n int) error { continue } + // skip servers that have not reached a min idle time + if time.Now().Before(time.Unix(server.LastBusy, 0).Add(p.tti)) { + logger. + WithField("server", server.Name). + WithField("idle", timeDiff(time.Now(), time.Unix(server.LastBusy, 0))). + WithField("min-idle", p.tti). + Debugln("server min-idle not reached") + continue + } + idle = append(idle, server) logger.WithField("server", server.Name). Debugln("server is idle") diff --git a/engine/planner_test.go b/engine/planner_test.go index 1ee59a80..36456266 100644 --- a/engine/planner_test.go +++ b/engine/planner_test.go @@ -399,6 +399,51 @@ func TestScale_MinAge(t *testing.T) { } } +// This test verifies that idle servers are not +// garbage collected until the min-idle is reached. +func TestScale_MinIdle(t *testing.T) { + controller := gomock.NewController(t) + defer controller.Finish() + + // x2 capacity + servers := []*autoscaler.Server{ + {Name: "server1", Capacity: 1, State: autoscaler.StateRunning, Created: time.Now().Unix(), LastBusy: time.Now().Add(time.Minute * 10 * -1).Unix()}, + {Name: "server2", Capacity: 1, State: autoscaler.StateRunning, Created: time.Now().Unix(), LastBusy: time.Now().Add(time.Minute * 60 * -1).Unix()}, + } + + // x0 running builds + // x0 pending builds + builds := []*drone.Stage{} + + store := mocks.NewMockServerStore(controller) + store.EXPECT().List(gomock.Any()).Return(servers, nil) + store.EXPECT().ListState(gomock.Any(), autoscaler.StateRunning).Return(servers, nil) + store.EXPECT().ListState(gomock.Any(), autoscaler.StateRunning).Return(servers, nil) + // we should expect a call to shut down only server2 since the last busy + // time of 1 hour ago satisfies the MinIdle (tti) of 30 minutes. + store.EXPECT().Update(gomock.Any(), servers[1]).Return(nil) + + client := mocks.NewMockClient(controller) + client.EXPECT().Queue().Return(builds, nil) + client.EXPECT().Queue().Return(builds, nil) + client.EXPECT().Queue().Return(builds, nil) + + p := planner{ + cap: 2, + min: 1, + max: 4, + ttu: 0, + tti: time.Minute * 30, + client: client, + servers: store, + } + + err := p.Plan(context.TODO()) + if err != nil { + t.Error(err) + } +} + func TestPlan_ShutdownIdle(t *testing.T) { controller := gomock.NewController(t) defer controller.Finish() diff --git a/mocks/mock_server.go b/mocks/mock_server.go index 7a6064a3..780c93d3 100644 --- a/mocks/mock_server.go +++ b/mocks/mock_server.go @@ -134,3 +134,17 @@ func (mr *MockServerStoreMockRecorder) Update(arg0, arg1 interface{}) *gomock.Ca mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Update", reflect.TypeOf((*MockServerStore)(nil).Update), arg0, arg1) } + +// Busy mocks base method +func (m *MockServerStore) Busy(arg0 context.Context, arg1 *autoscaler.Server) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Busy", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// Busy indicates an expected call of Busy +func (mr *MockServerStoreMockRecorder) Busy(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Busy", reflect.TypeOf((*MockServerStore)(nil).Busy), arg0, arg1) +} diff --git a/server.go b/server.go index 5f5e1c90..4a0063fa 100644 --- a/server.go +++ b/server.go @@ -52,6 +52,9 @@ type ServerStore interface { // Update the server record in the store. Update(context.Context, *Server) error + // Update the server record that it is busy. + Busy(context.Context, *Server) error + // Delete the server record from the store. Delete(context.Context, *Server) error @@ -81,4 +84,5 @@ type Server struct { Updated int64 `db:"server_updated" json:"updated"` Started int64 `db:"server_started" json:"started"` Stopped int64 `db:"server_stopped" json:"stopped"` + LastBusy int64 `db:"server_lastbusy" json:"lastbusy"` } diff --git a/store/migrate/mysql/ddl_gen.go b/store/migrate/mysql/ddl_gen.go index f9545ded..b75fba65 100644 --- a/store/migrate/mysql/ddl_gen.go +++ b/store/migrate/mysql/ddl_gen.go @@ -20,6 +20,10 @@ var migrations = []struct { name: "create-index-server-state", stmt: createIndexServerState, }, + { + name: "alter-table-servers-add-column-server-lastbusy", + stmt: alterTableServersAddColumnServerLastbusy, + }, } // Migrate performs the database migration. If the migration fails @@ -131,3 +135,11 @@ CREATE INDEX ix_servers_id ON servers (server_id); var createIndexServerState = ` CREATE INDEX ix_servers_state ON servers (server_state); ` + +// +// 002_add_column_lastbusy.sql +// + +var alterTableServersAddColumnServerLastbusy = ` +ALTER TABLE servers ADD COLUMN server_lastbusy INTEGER NOT NULL DEFAULT 0; +` diff --git a/store/migrate/mysql/files/002_add_column_lastbusy.sql b/store/migrate/mysql/files/002_add_column_lastbusy.sql new file mode 100644 index 00000000..0a0a69dc --- /dev/null +++ b/store/migrate/mysql/files/002_add_column_lastbusy.sql @@ -0,0 +1,3 @@ +-- name: alter-table-servers-add-column-server-lastbusy + +ALTER TABLE servers ADD COLUMN server_lastbusy INTEGER NOT NULL DEFAULT 0; diff --git a/store/migrate/postgres/ddl_gen.go b/store/migrate/postgres/ddl_gen.go index 63c5d2ff..d749401e 100644 --- a/store/migrate/postgres/ddl_gen.go +++ b/store/migrate/postgres/ddl_gen.go @@ -20,6 +20,10 @@ var migrations = []struct { name: "create-index-server-state", stmt: createIndexServerState, }, + { + name: "alter-table-servers-add-column-server-lastbusy", + stmt: alterTableServersAddColumnServerLastbusy, + }, } // Migrate performs the database migration. If the migration fails @@ -131,3 +135,11 @@ CREATE INDEX ix_servers_id ON servers (server_id); var createIndexServerState = ` CREATE INDEX ix_servers_state ON servers (server_state); ` + +// +// 002_add_column_lastbusy.sql +// + +var alterTableServersAddColumnServerLastbusy = ` +ALTER TABLE servers ADD COLUMN server_lastbusy INTEGER NOT NULL DEFAULT 0; +` diff --git a/store/migrate/postgres/files/002_add_column_lastbusy.sql b/store/migrate/postgres/files/002_add_column_lastbusy.sql new file mode 100644 index 00000000..0a0a69dc --- /dev/null +++ b/store/migrate/postgres/files/002_add_column_lastbusy.sql @@ -0,0 +1,3 @@ +-- name: alter-table-servers-add-column-server-lastbusy + +ALTER TABLE servers ADD COLUMN server_lastbusy INTEGER NOT NULL DEFAULT 0; diff --git a/store/migrate/sqlite/ddl_gen.go b/store/migrate/sqlite/ddl_gen.go index 049aef14..c46b73f7 100644 --- a/store/migrate/sqlite/ddl_gen.go +++ b/store/migrate/sqlite/ddl_gen.go @@ -20,6 +20,10 @@ var migrations = []struct { name: "create-index-server-state", stmt: createIndexServerState, }, + { + name: "alter-table-servers-add-column-server-lastbusy", + stmt: alterTableServersAddColumnServerLastbusy, + }, } // Migrate performs the database migration. If the migration fails @@ -131,3 +135,11 @@ CREATE INDEX IF NOT EXISTS ix_servers_id ON servers (server_id); var createIndexServerState = ` CREATE INDEX IF NOT EXISTS ix_servers_state ON servers (server_state); ` + +// +// 002_add_column_lastbusy.sql +// + +var alterTableServersAddColumnServerLastbusy = ` +ALTER TABLE servers ADD COLUMN server_lastbusy INTEGER NOT NULL DEFAULT 0; +` diff --git a/store/migrate/sqlite/files/002_add_column_lastbusy.sql b/store/migrate/sqlite/files/002_add_column_lastbusy.sql new file mode 100644 index 00000000..0a0a69dc --- /dev/null +++ b/store/migrate/sqlite/files/002_add_column_lastbusy.sql @@ -0,0 +1,3 @@ +-- name: alter-table-servers-add-column-server-lastbusy + +ALTER TABLE servers ADD COLUMN server_lastbusy INTEGER NOT NULL DEFAULT 0; diff --git a/store/servers.go b/store/servers.go index ed044334..1af10aa0 100644 --- a/store/servers.go +++ b/store/servers.go @@ -121,6 +121,34 @@ func (s *serverStore) update(server *autoscaler.Server) error { return err } +func (s *serverStore) Busy(_ context.Context, server *autoscaler.Server) error { + return retry.Do( + func() error { + if err := s.busy(server); isConnReset(err) { + return err + } else { + return retry.Unrecoverable(err) + } + }, + retry.Attempts(5), + retry.MaxDelay(time.Second*5), + retry.LastErrorOnly(true), + ) +} + +func (s *serverStore) busy(server *autoscaler.Server) error { + s.mu.Lock() + defer s.mu.Unlock() + + server.LastBusy = time.Now().Unix() + stmt, args, err := s.db.BindNamed(serverUpdateStmt, server) + if err != nil { + return err + } + _, err = s.db.ExecContext(noContext, stmt, args...) + return err +} + func (s *serverStore) Delete(_ context.Context, server *autoscaler.Server) error { s.mu.Lock() defer s.mu.Unlock() @@ -167,6 +195,7 @@ SELECT ,server_updated ,server_started ,server_stopped +,server_lastbusy FROM servers WHERE server_name=:server_name ` @@ -219,6 +248,7 @@ SELECT ,server_updated ,server_started ,server_stopped +,server_lastbusy FROM servers WHERE server_state=:server_state ORDER BY server_created ASC @@ -246,6 +276,7 @@ INSERT INTO servers ( ,server_updated ,server_started ,server_stopped +,server_lastbusy ) VALUES ( :server_name ,:server_id @@ -267,6 +298,7 @@ INSERT INTO servers ( ,:server_updated ,:server_started ,:server_stopped +,:server_lastbusy ) ` @@ -290,6 +322,7 @@ UPDATE servers SET ,server_updated=:server_updated ,server_started=:server_started ,server_stopped=:server_stopped +,server_lastbusy=:server_lastbusy WHERE server_name=:server_name `