-
Notifications
You must be signed in to change notification settings - Fork 94
Add metrics for tracking live servers #113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,13 +6,27 @@ package metrics | |
|
|
||
| import ( | ||
| "context" | ||
| "os" | ||
| "strconv" | ||
| "time" | ||
|
|
||
| "github.com/prometheus/client_golang/prometheus" | ||
|
|
||
| "github.com/drone/autoscaler" | ||
| ) | ||
|
|
||
| var noContext = context.Background() | ||
|
|
||
| // this is a feature flag that can be used to enable | ||
| // metrics to track registering/unregistering of servers | ||
| var registerKnownServers = false | ||
|
|
||
| func init() { | ||
| registerKnownServers, _ = strconv.ParseBool( | ||
| os.Getenv("DRONE_AUTOSCALER_REGISTER_KNOWN_SERVERS"), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add this to https://github.com/drone/autoscaler/blob/master/config/config.go & inject the config struct, similar to how we do it elsewhere please
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. okay, pushed, can you re-review please? |
||
| ) | ||
| } | ||
|
|
||
| // Collector defines a metrics collector. | ||
| type Collector interface { | ||
| // TrackServerCreateTime registers the elapsed time it takes | ||
|
|
@@ -40,6 +54,10 @@ type Collector interface { | |
| // IncrServerSetupError keeps a count of errors encountered | ||
| // when installing software on servers. | ||
| IncrServerSetupError() | ||
|
|
||
| RegisterKnownInstance(instance *autoscaler.Instance) | ||
|
|
||
| UnregisterKnownInstance(instance *autoscaler.Instance) | ||
| } | ||
|
|
||
| // Prometheus is a Prometheus metrics collector. | ||
|
|
@@ -50,6 +68,7 @@ type Prometheus struct { | |
| countServerCreateErr prometheus.Counter | ||
| countServerInitErr prometheus.Counter | ||
| countServerSetupErr prometheus.Counter | ||
| knownInstance *prometheus.GaugeVec | ||
| } | ||
|
|
||
| // New returns a new Prometheus metrics provider. | ||
|
|
@@ -82,12 +101,25 @@ func New() *Prometheus { | |
| Name: "drone_server_install_errors_total", | ||
| Help: "Total number of errors installing software on a server.", | ||
| }) | ||
| p.knownInstance = prometheus.NewGaugeVec(prometheus.GaugeOpts{ | ||
| Name: "drone_server_known_instance", | ||
| Help: "Known server instances.", | ||
| }, | ||
| []string{ | ||
| "name", | ||
| "provider", | ||
| "region", | ||
| "size", | ||
| }) | ||
| prometheus.MustRegister(p.trackServerCreateTime) | ||
| prometheus.MustRegister(p.trackServerInitTime) | ||
| prometheus.MustRegister(p.trackServerSetupTime) | ||
| prometheus.MustRegister(p.countServerCreateErr) | ||
| prometheus.MustRegister(p.countServerInitErr) | ||
| prometheus.MustRegister(p.countServerSetupErr) | ||
| if registerKnownServers { | ||
| prometheus.MustRegister(p.knownInstance) | ||
| } | ||
| return p | ||
| } | ||
|
|
||
|
|
@@ -135,6 +167,30 @@ func (m *Prometheus) IncrServerSetupError() { | |
| m.countServerSetupErr.Inc() | ||
| } | ||
|
|
||
| // RegisterKnownInstance registers that we know about a server. | ||
| func (m *Prometheus) RegisterKnownInstance(instance *autoscaler.Instance) { | ||
| if registerKnownServers { | ||
| m.knownInstance.With(prometheus.Labels{ | ||
| "name": instance.Name, | ||
| "provider": string(instance.Provider), | ||
| "region": instance.Region, | ||
| "size": instance.Size, | ||
| }).Set(1) | ||
| } | ||
| } | ||
|
|
||
| // UnregisterKnownInstance forgets a server we once knew. | ||
| func (m *Prometheus) UnregisterKnownInstance(instance *autoscaler.Instance) { | ||
| if registerKnownServers { | ||
| m.knownInstance.Delete(prometheus.Labels{ | ||
| "name": instance.Name, | ||
| "provider": string(instance.Provider), | ||
| "region": instance.Region, | ||
| "size": instance.Size, | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| // NopCollector provides a no-op metrics collector. | ||
| type NopCollector struct{} | ||
|
|
||
|
|
@@ -163,3 +219,9 @@ func (*NopCollector) IncrServerInitError() {} | |
| // IncrServerSetupError keeps a count of errors encountered | ||
| // when installing software on servers. | ||
| func (*NopCollector) IncrServerSetupError() {} | ||
|
|
||
| // RegisterKnownInstance registers that we know about a server. | ||
| func (*NopCollector) RegisterKnownInstance(instance *autoscaler.Instance) {} | ||
|
|
||
| // UnregisterKnownInstance forgets a server we once knew. | ||
| func (*NopCollector) UnregisterKnownInstance(instance *autoscaler.Instance) {} | ||
Uh oh!
There was an error while loading. Please reload this page.