Skip to content

Commit 6b266b0

Browse files
authored
Fix Prometheus response time units (#1104)
1 parent caf90da commit 6b266b0

7 files changed

Lines changed: 121 additions & 17 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,7 @@ To enable Prometheus integration set:
11051105
2. `PROMETHEUS_ADDR`: The port to listen on for Prometheus metrics. Defaults to `:9090`
11061106
3. `PROMETHEUS_PATH`: The path to listen on for Prometheus metrics. Defaults to `/metrics`
11071107
4. `PROMETHEUS_MAPPER_YAML`: The path to the YAML file that defines the mapping from statsd to prometheus metrics.
1108+
5. `PROMETHEUS_RESPONSE_TIME_AS_MILLISECONDS`: `true` to keep the legacy millisecond behavior for `ratelimit_server.*.response_time` in the built-in mapper. Ignored when `PROMETHEUS_MAPPER_YAML` is set.
11081109
11091110
Define the mapping from statsd to prometheus metrics in a YAML file.
11101111
Find more information about the mapping in the [Metric Mapping and Configuration](https://github.com/prometheus/statsd_exporter?tab=readme-ov-file#metric-mapping-and-configuration).
@@ -1181,6 +1182,7 @@ mappings: # Requires statsd exporter >= v0.6.0 since it uses the "drop" action.
11811182
- match: "ratelimit_server.*.response_time"
11821183
name: "ratelimit_service_response_time_seconds"
11831184
timer_type: histogram
1185+
scale: 0.001
11841186
labels:
11851187
grpc_method: "$1"
11861188

src/service_cmd/runner/runner.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ func NewRunner(s settings.Settings) Runner {
6868
}
6969
logger.Info("Stats initialized for Prometheus")
7070
store = gostats.NewStore(prom.NewPrometheusSink(prom.WithAddr(s.PrometheusAddr),
71-
prom.WithPath(s.PrometheusPath), prom.WithMapperYamlPath(s.PrometheusMapperYaml)), false)
71+
prom.WithPath(s.PrometheusPath), prom.WithMapperYamlPath(s.PrometheusMapperYaml),
72+
prom.WithResponseTimeAsMilliseconds(s.PrometheusResponseTimeAsMilliseconds)), false)
7273
default:
7374
logger.Info("Stats initialized for stdout")
7475
store = gostats.NewStore(gostats.NewLoggingSink(), false)

src/settings/settings.go

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,18 +82,19 @@ type Settings struct {
8282
XdsClientGrpcOptionsMaxMsgSizeInBytes int `envconfig:"XDS_CLIENT_MAX_MSG_SIZE_IN_BYTES" default:""`
8383

8484
// Stats-related settings
85-
UseDogStatsd bool `envconfig:"USE_DOG_STATSD" default:"false"`
86-
UseDogStatsdMogrifiers []string `envconfig:"USE_DOG_STATSD_MOGRIFIERS" default:""`
87-
UseStatsd bool `envconfig:"USE_STATSD" default:"true"`
88-
StatsdHost string `envconfig:"STATSD_HOST" default:"localhost"`
89-
StatsdPort int `envconfig:"STATSD_PORT" default:"8125"`
90-
ExtraTags map[string]string `envconfig:"EXTRA_TAGS" default:""`
91-
StatsFlushInterval time.Duration `envconfig:"STATS_FLUSH_INTERVAL" default:"10s"`
92-
DisableStats bool `envconfig:"DISABLE_STATS" default:"false"`
93-
UsePrometheus bool `envconfig:"USE_PROMETHEUS" default:"false"`
94-
PrometheusAddr string `envconfig:"PROMETHEUS_ADDR" default:":9090"`
95-
PrometheusPath string `envconfig:"PROMETHEUS_PATH" default:"/metrics"`
96-
PrometheusMapperYaml string `envconfig:"PROMETHEUS_MAPPER_YAML" default:""`
85+
UseDogStatsd bool `envconfig:"USE_DOG_STATSD" default:"false"`
86+
UseDogStatsdMogrifiers []string `envconfig:"USE_DOG_STATSD_MOGRIFIERS" default:""`
87+
UseStatsd bool `envconfig:"USE_STATSD" default:"true"`
88+
StatsdHost string `envconfig:"STATSD_HOST" default:"localhost"`
89+
StatsdPort int `envconfig:"STATSD_PORT" default:"8125"`
90+
ExtraTags map[string]string `envconfig:"EXTRA_TAGS" default:""`
91+
StatsFlushInterval time.Duration `envconfig:"STATS_FLUSH_INTERVAL" default:"10s"`
92+
DisableStats bool `envconfig:"DISABLE_STATS" default:"false"`
93+
UsePrometheus bool `envconfig:"USE_PROMETHEUS" default:"false"`
94+
PrometheusAddr string `envconfig:"PROMETHEUS_ADDR" default:":9090"`
95+
PrometheusPath string `envconfig:"PROMETHEUS_PATH" default:"/metrics"`
96+
PrometheusMapperYaml string `envconfig:"PROMETHEUS_MAPPER_YAML" default:""`
97+
PrometheusResponseTimeAsMilliseconds bool `envconfig:"PROMETHEUS_RESPONSE_TIME_AS_MILLISECONDS" default:"false"`
9798

9899
// Settings for rate limit configuration
99100
RuntimePath string `envconfig:"RUNTIME_ROOT" default:"/srv/runtime_data/current"`

src/settings/settings_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,31 @@ import (
77
"github.com/stretchr/testify/assert"
88
)
99

10+
const prometheusResponseTimeAsMillisecondsEnv = "PROMETHEUS_RESPONSE_TIME_AS_MILLISECONDS"
11+
1012
func TestSettingsTlsConfigUnmodified(t *testing.T) {
1113
settings := NewSettings()
1214
assert.NotNil(t, settings.RedisTlsConfig)
1315
assert.Nil(t, settings.RedisTlsConfig.RootCAs)
1416
}
1517

18+
func TestPrometheusResponseTimeAsMillisecondsDefault(t *testing.T) {
19+
os.Unsetenv(prometheusResponseTimeAsMillisecondsEnv)
20+
21+
settings := NewSettings()
22+
23+
assert.False(t, settings.PrometheusResponseTimeAsMilliseconds)
24+
}
25+
26+
func TestPrometheusResponseTimeAsMillisecondsEnabled(t *testing.T) {
27+
os.Setenv(prometheusResponseTimeAsMillisecondsEnv, "true")
28+
defer os.Unsetenv(prometheusResponseTimeAsMillisecondsEnv)
29+
30+
settings := NewSettings()
31+
32+
assert.True(t, settings.PrometheusResponseTimeAsMilliseconds)
33+
}
34+
1635
// Tests for RedisPoolOnEmptyBehavior
1736
func TestRedisPoolOnEmptyBehavior_Default(t *testing.T) {
1837
os.Unsetenv("REDIS_POOL_ON_EMPTY_BEHAVIOR")

src/stats/prom/default_mapper.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ mappings:
8686
- match: "ratelimit_server.*.response_time"
8787
name: "ratelimit_service_response_time_seconds"
8888
timer_type: histogram
89+
scale: 0.001
8990
labels:
9091
grpc_method: "$1"
9192

src/stats/prom/prometheus_sink.go

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package prom
33
import (
44
_ "embed"
55
"net/http"
6+
"strings"
67

78
"github.com/go-kit/log"
89
gostats "github.com/lyft/gostats"
@@ -64,9 +65,10 @@ var (
6465

6566
type prometheusSink struct {
6667
config struct {
67-
addr string
68-
path string
69-
mapperYamlPath string
68+
addr string
69+
path string
70+
mapperYamlPath string
71+
responseTimeAsMilliseconds bool
7072
}
7173
mapper *mapper.MetricMapper
7274
events chan event.Events
@@ -93,6 +95,20 @@ func WithMapperYamlPath(mapperYamlPath string) prometheusSinkOption {
9395
}
9496
}
9597

98+
func WithResponseTimeAsMilliseconds(responseTimeAsMilliseconds bool) prometheusSinkOption {
99+
return func(sink *prometheusSink) {
100+
sink.config.responseTimeAsMilliseconds = responseTimeAsMilliseconds
101+
}
102+
}
103+
104+
func (s *prometheusSink) mapperConfig() string {
105+
if s.config.responseTimeAsMilliseconds {
106+
return strings.Replace(defaultMapper, " scale: 0.001\n", "", 1)
107+
}
108+
109+
return defaultMapper
110+
}
111+
96112
// NewPrometheusSink returns a Sink that flushes stats to os.StdErr.
97113
func NewPrometheusSink(opts ...prometheusSinkOption) gostats.Sink {
98114
promRegistry := prometheus.DefaultRegisterer
@@ -119,7 +135,7 @@ func NewPrometheusSink(opts ...prometheusSinkOption) gostats.Sink {
119135
if sink.config.mapperYamlPath != "" {
120136
_ = sink.mapper.InitFromFile(sink.config.mapperYamlPath)
121137
} else {
122-
_ = sink.mapper.InitFromYAMLString(defaultMapper)
138+
_ = sink.mapper.InitFromYAMLString(sink.mapperConfig())
123139
}
124140

125141
sink.exp = exporter.NewExporter(promRegistry,

src/stats/prom/prometheus_sink_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,67 @@ func TestFlushTimer(t *testing.T) {
121121
*m.Metric[0].Histogram.SampleSum == 1.0
122122
}, time.Second, time.Millisecond)
123123
}
124+
125+
func TestFlushResponseTimeConvertsMillisecondsToSeconds(t *testing.T) {
126+
s.FlushTimer("ratelimit_server.ShouldRateLimit.response_time", 1000)
127+
assert.Eventually(t, func() bool {
128+
metricFamilies, err := prometheus.DefaultGatherer.Gather()
129+
if err != nil {
130+
return false
131+
}
132+
133+
metrics := make(map[string]*dto.MetricFamily)
134+
for _, metricFamily := range metricFamilies {
135+
metrics[*metricFamily.Name] = metricFamily
136+
}
137+
138+
m, ok := metrics["ratelimit_service_response_time_seconds"]
139+
if !ok || len(m.Metric) != 1 {
140+
return false
141+
}
142+
143+
return *m.Metric[0].Histogram.SampleCount == uint64(1) &&
144+
reflect.DeepEqual(toMap(m.Metric[0].Label), map[string]string{
145+
"grpc_method": "ShouldRateLimit",
146+
}) &&
147+
*m.Metric[0].Histogram.SampleSum == 1.0
148+
}, time.Second, time.Millisecond)
149+
}
150+
151+
func TestFlushResponseTimeCanUseLegacyMilliseconds(t *testing.T) {
152+
oldRegisterer := prometheus.DefaultRegisterer
153+
oldGatherer := prometheus.DefaultGatherer
154+
reg := prometheus.NewRegistry()
155+
prometheus.DefaultRegisterer = reg
156+
prometheus.DefaultGatherer = reg
157+
defer func() {
158+
prometheus.DefaultRegisterer = oldRegisterer
159+
prometheus.DefaultGatherer = oldGatherer
160+
}()
161+
162+
legacySink := NewPrometheusSink(WithAddr(":0"), WithPath("/metrics-legacy"), WithResponseTimeAsMilliseconds(true))
163+
legacySink.FlushTimer("ratelimit_server.ShouldRateLimit.response_time", 1000)
164+
165+
assert.Eventually(t, func() bool {
166+
metricFamilies, err := reg.Gather()
167+
if err != nil {
168+
return false
169+
}
170+
171+
metrics := make(map[string]*dto.MetricFamily)
172+
for _, metricFamily := range metricFamilies {
173+
metrics[*metricFamily.Name] = metricFamily
174+
}
175+
176+
m, ok := metrics["ratelimit_service_response_time_seconds"]
177+
if !ok || len(m.Metric) != 1 {
178+
return false
179+
}
180+
181+
return *m.Metric[0].Histogram.SampleCount == uint64(1) &&
182+
reflect.DeepEqual(toMap(m.Metric[0].Label), map[string]string{
183+
"grpc_method": "ShouldRateLimit",
184+
}) &&
185+
*m.Metric[0].Histogram.SampleSum == 1000.0
186+
}, time.Second, time.Millisecond)
187+
}

0 commit comments

Comments
 (0)