Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions cmd/drone-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,11 @@ func main() {
Fatalln("Invalid or missing hosting provider")
}

collector := metrics.New()

// instruments the provider with prometheus metrics.
provider = metrics.ServerCreate(provider)
provider = metrics.ServerDelete(provider)
provider = metrics.ServerDelete(provider, collector)

db, err := store.Connect(
conf.Database.Driver,
Expand Down Expand Up @@ -95,7 +97,7 @@ func main() {
conf,
servers,
provider,
metrics.New(),
collector,
)

//
Expand Down
2 changes: 2 additions & 0 deletions engine/alloc.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,5 +123,7 @@ func (a *allocator) allocate(ctx context.Context, server *autoscaler.Server) err
return err
}

a.metrics.RegisterKnownInstance(instance)

return nil
}
62 changes: 62 additions & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,27 @@ package metrics

import (
"context"
"os"
"strconv"
"time"

"github.com/prometheus/client_golang/prometheus"

"github.com/drone/autoscaler"
Comment thread
iainlane marked this conversation as resolved.
)

var noContext = context.Background()

// this is a feature flag that can be used to enable
// metrics to track registering/unregistering of servers
var registerKnownServers = false

func init() {
registerKnownServers, _ = strconv.ParseBool(
os.Getenv("DRONE_AUTOSCALER_REGISTER_KNOWN_SERVERS"),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add this to https://github.com/drone/autoscaler/blob/master/config/config.go & inject the config struct, similar to how we do it elsewhere please

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay, pushed, can you re-review please?

)
}

// Collector defines a metrics collector.
type Collector interface {
// TrackServerCreateTime registers the elapsed time it takes
Expand Down Expand Up @@ -40,6 +54,10 @@ type Collector interface {
// IncrServerSetupError keeps a count of errors encountered
// when installing software on servers.
IncrServerSetupError()

RegisterKnownInstance(instance *autoscaler.Instance)

UnregisterKnownInstance(instance *autoscaler.Instance)
}

// Prometheus is a Prometheus metrics collector.
Expand All @@ -50,6 +68,7 @@ type Prometheus struct {
countServerCreateErr prometheus.Counter
countServerInitErr prometheus.Counter
countServerSetupErr prometheus.Counter
knownInstance *prometheus.GaugeVec
}

// New returns a new Prometheus metrics provider.
Expand Down Expand Up @@ -82,12 +101,25 @@ func New() *Prometheus {
Name: "drone_server_install_errors_total",
Help: "Total number of errors installing software on a server.",
})
p.knownInstance = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "drone_server_known_instance",
Help: "Known server instances.",
},
[]string{
"name",
"provider",
"region",
"size",
})
prometheus.MustRegister(p.trackServerCreateTime)
prometheus.MustRegister(p.trackServerInitTime)
prometheus.MustRegister(p.trackServerSetupTime)
prometheus.MustRegister(p.countServerCreateErr)
prometheus.MustRegister(p.countServerInitErr)
prometheus.MustRegister(p.countServerSetupErr)
if registerKnownServers {
prometheus.MustRegister(p.knownInstance)
}
return p
}

Expand Down Expand Up @@ -135,6 +167,30 @@ func (m *Prometheus) IncrServerSetupError() {
m.countServerSetupErr.Inc()
}

// RegisterKnownInstance registers that we know about a server.
func (m *Prometheus) RegisterKnownInstance(instance *autoscaler.Instance) {
if registerKnownServers {
m.knownInstance.With(prometheus.Labels{
"name": instance.Name,
"provider": string(instance.Provider),
"region": instance.Region,
"size": instance.Size,
}).Set(1)
}
}

// UnregisterKnownInstance forgets a server we once knew.
func (m *Prometheus) UnregisterKnownInstance(instance *autoscaler.Instance) {
if registerKnownServers {
m.knownInstance.Delete(prometheus.Labels{
"name": instance.Name,
"provider": string(instance.Provider),
"region": instance.Region,
"size": instance.Size,
})
}
}

// NopCollector provides a no-op metrics collector.
type NopCollector struct{}

Expand Down Expand Up @@ -163,3 +219,9 @@ func (*NopCollector) IncrServerInitError() {}
// IncrServerSetupError keeps a count of errors encountered
// when installing software on servers.
func (*NopCollector) IncrServerSetupError() {}

// RegisterKnownInstance registers that we know about a server.
func (*NopCollector) RegisterKnownInstance(instance *autoscaler.Instance) {}

// UnregisterKnownInstance forgets a server we once knew.
func (*NopCollector) UnregisterKnownInstance(instance *autoscaler.Instance) {}
15 changes: 9 additions & 6 deletions metrics/server_delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
)

// ServerDelete provides metrics for servers deleted.
func ServerDelete(provider autoscaler.Provider) autoscaler.Provider {
func ServerDelete(provider autoscaler.Provider, collector Collector) autoscaler.Provider {
created := prometheus.NewCounter(prometheus.CounterOpts{
Name: "drone_servers_deleted",
Help: "Total number of servers deleted.",
Expand All @@ -24,17 +24,19 @@ func ServerDelete(provider autoscaler.Provider) autoscaler.Provider {
prometheus.MustRegister(created)
prometheus.MustRegister(errors)
return &providerWrapDestroy{
Provider: provider,
created: created,
errors: errors,
Provider: provider,
collector: collector,
created: created,
errors: errors,
}
}

// instruments the Provider to count server destroy events.
type providerWrapDestroy struct {
autoscaler.Provider
created prometheus.Counter
errors prometheus.Counter
collector Collector
created prometheus.Counter
errors prometheus.Counter
}

func (p *providerWrapDestroy) Destroy(ctx context.Context, instance *autoscaler.Instance) error {
Expand All @@ -44,5 +46,6 @@ func (p *providerWrapDestroy) Destroy(ctx context.Context, instance *autoscaler.
} else {
p.errors.Add(1)
}
p.collector.UnregisterKnownInstance(instance)
return err
}
15 changes: 8 additions & 7 deletions metrics/server_delete_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ func TestServerDelete(t *testing.T) {
provider.EXPECT().Destroy(noContext, instance).Times(3).Return(nil)
provider.EXPECT().Destroy(noContext, instance).Return(errors.New("error"))

providerInst := ServerDelete(provider)
collector := New()
providerInst := ServerDelete(provider, collector)
for i := 0; i < 3; i++ {
err := providerInst.Destroy(noContext, instance)
if err != nil {
Expand All @@ -52,20 +53,20 @@ func TestServerDelete(t *testing.T) {
t.Error(err)
return
}
if want, got := len(metrics), 2; want != got {
t.Errorf("Expect registered metric")
if want, got := len(metrics), 8; want != got {
t.Errorf("Expect registered metric %d, got %d", want, got)
return
}
if got, want := metrics[0].GetName(), "drone_servers_deleted"; want != got {
if got, want := metrics[6].GetName(), "drone_servers_deleted"; want != got {
t.Errorf("Expect metric name %s, got %s", want, got)
}
if got, want := metrics[0].Metric[0].Counter.GetValue(), float64(3); want != got {
if got, want := metrics[6].Metric[0].Counter.GetValue(), float64(3); want != got {
t.Errorf("Expect metric value %f, got %f", want, got)
}
if got, want := metrics[1].GetName(), "drone_servers_deleted_err"; want != got {
if got, want := metrics[7].GetName(), "drone_servers_deleted_err"; want != got {
t.Errorf("Expect metric name %s, got %s", want, got)
}
if got, want := metrics[1].Metric[0].Counter.GetValue(), float64(1); want != got {
if got, want := metrics[7].Metric[0].Counter.GetValue(), float64(1); want != got {
t.Errorf("Expect metric value %f, got %f", want, got)
}
}