Skip to content

Commit b181aa5

Browse files
yinjipingkylewanginchina
authored andcommitted
fix: rename kick thread config to nice
Rename the kick thread scheduling configuration to `kick_kern_nice` and clarify Linux nice semantics as `[-20, 19]`, where lower values indicate higher priority. Remove references to SCHED_FIFO, as this setting now controls a nice-based priority. Realtime scheduling requires extra privileges and host tuning (e.g., `kernel.sched_rt_runtime_us`) and can interfere with production workloads. Using nice ensures safe, container-compatible thread prioritization.
1 parent 45fda85 commit b181aa5

File tree

10 files changed

+77
-72
lines changed

10 files changed

+77
-72
lines changed

agent/src/config/config.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,7 +1238,7 @@ pub struct EbpfProfile {
12381238
pub struct EbpfTunning {
12391239
pub collector_queue_size: usize,
12401240
pub userspace_worker_threads: i32,
1241-
pub kick_kern_sched_priority: u32,
1241+
pub kick_kern_nice: i32,
12421242
pub perf_pages_count: u32,
12431243
pub kernel_ring_size: u32,
12441244
pub max_socket_entries: u32,
@@ -1251,7 +1251,7 @@ impl Default for EbpfTunning {
12511251
Self {
12521252
collector_queue_size: 65535,
12531253
userspace_worker_threads: 1,
1254-
kick_kern_sched_priority: 1,
1254+
kick_kern_nice: 0,
12551255
perf_pages_count: 128,
12561256
kernel_ring_size: 65536,
12571257
max_socket_entries: 131072,
@@ -1263,10 +1263,10 @@ impl Default for EbpfTunning {
12631263

12641264
impl EbpfTunning {
12651265
pub(crate) fn validate(&self) -> Result<(), String> {
1266-
if !(1..=99).contains(&self.kick_kern_sched_priority) {
1266+
if !(-20..=19).contains(&self.kick_kern_nice) {
12671267
return Err(format!(
1268-
"kick_kern_sched_priority {} not in [1, 99]",
1269-
self.kick_kern_sched_priority
1268+
"kick_kern_nice {} not in [-20, 19]",
1269+
self.kick_kern_nice
12701270
));
12711271
}
12721272

agent/src/config/handler.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3813,12 +3813,12 @@ impl ConfigHandler {
38133813
tunning.kernel_ring_size = new_tunning.kernel_ring_size;
38143814
restart_agent = !first_run;
38153815
}
3816-
if tunning.kick_kern_sched_priority != new_tunning.kick_kern_sched_priority {
3816+
if tunning.kick_kern_nice != new_tunning.kick_kern_nice {
38173817
info!(
3818-
"Update inputs.ebpf.tunning.kick_kern_sched_priority from {:?} to {:?}.",
3819-
tunning.kick_kern_sched_priority, new_tunning.kick_kern_sched_priority
3818+
"Update inputs.ebpf.tunning.kick_kern_nice from {:?} to {:?}.",
3819+
tunning.kick_kern_nice, new_tunning.kick_kern_nice
38203820
);
3821-
tunning.kick_kern_sched_priority = new_tunning.kick_kern_sched_priority;
3821+
tunning.kick_kern_nice = new_tunning.kick_kern_nice;
38223822
restart_agent = !first_run;
38233823
}
38243824
if tunning.max_socket_entries != new_tunning.max_socket_entries {

agent/src/ebpf/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ extern "C" {
648648
// false Define a map without preallocated memory
649649
pub fn set_bpf_map_prealloc(enabled: bool) -> c_void;
650650

651-
pub fn set_kick_kern_sched_priority(priority: c_uint) -> c_int;
651+
pub fn set_kick_kern_nice(nice: c_int) -> c_int;
652652

653653
// Parameter descriptions:
654654
// callback: Callback interface from Rust to C; return values refer to definitions of TRACER_CALLBACK_FLAG_*.

agent/src/ebpf/user/config.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -328,11 +328,11 @@ enum cfg_feature_idx {
328328
*/
329329
#define KICK_KERN_PERIOD 40000000 // Set default interval to 40 milliseconds
330330
/*
331-
* Default realtime scheduling priority for per-CPU kick threads.
331+
* Default CFS nice value for per-CPU kick threads.
332332
*
333-
* The actual priority can be overridden at startup by configuration.
333+
* The actual nice value can be overridden at startup by configuration.
334334
*/
335-
#define KICK_KERN_SCHED_PRIORITY 1
335+
#define KICK_KERN_NICE 0
336336

337337
/*
338338
* A special value should be assigned to indicate the case where no data has

agent/src/ebpf/user/tracer.c

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ char linux_release[128]; // Record the contents of 'uname -r'
6666
* Used to manage or inspect per-CPU kick threads.
6767
*/
6868
kick_thread_info_t kick_threads[MAX_CPU_NR];
69-
static uint32_t kick_kern_sched_priority = KICK_KERN_SCHED_PRIORITY;
69+
static int32_t kick_kern_nice = KICK_KERN_NICE;
7070
volatile uint32_t *tracers_lock;
7171
extern volatile uint64_t sys_boot_time_ns; // System boot time in nanoseconds
7272
volatile uint64_t prev_sys_boot_time_ns; // The last updated system boot time, in nanoseconds
@@ -1648,13 +1648,13 @@ static int boot_time_update(void)
16481648
* The following method triggers a timeout check on all CPUs
16491649
* to push data residing in the eBPF buffer.
16501650
*/
1651-
int set_kick_kern_sched_priority(uint32_t priority)
1651+
int set_kick_kern_nice(int32_t nice)
16521652
{
1653-
if (priority == 0)
1653+
if (nice < -20 || nice > 19)
16541654
return ETR_INVAL;
16551655

1656-
kick_kern_sched_priority = priority;
1657-
ebpf_info("Set kick thread SCHED_FIFO priority to %u.\n", priority);
1656+
kick_kern_nice = nice;
1657+
ebpf_info("Set kick thread nice value to %d.\n", nice);
16581658
return 0;
16591659
}
16601660

@@ -1722,25 +1722,24 @@ static void *kick_kern_push_data(void *arg)
17221722
}
17231723

17241724
/*
1725-
* Use a low SCHED_FIFO priority from startup configuration for the per-CPU
1726-
* kick thread.
1725+
* Keep the kick thread on the default SCHED_OTHER/CFS policy and adjust
1726+
* only its Linux nice value.
17271727
*
1728-
* This thread wakes up periodically and issues a lightweight syscall to
1729-
* trigger kernel-side timeout checks that flush batched eBPF data. Under
1730-
* CPU contention, the default CFS scheduler may delay the dispatch of this
1731-
* thread after the timer expires, which increases the actual kick interval.
1732-
* Switching to SCHED_FIFO with a low priority helps reduce this wakeup
1733-
* latency while limiting interference with other workloads.
1728+
* A smaller nice value gives the thread more scheduling preference under
1729+
* CPU contention, while a larger value gives it less. Negative values may
1730+
* require CAP_SYS_NICE or a sufficient RLIMIT_NICE.
17341731
*/
1735-
struct sched_param sched_param = {
1736-
.sched_priority = kick_kern_sched_priority,
1737-
};
1738-
if (pthread_setschedparam(pthread_self(), SCHED_FIFO, &sched_param) != 0) {
1739-
ebpf_warning("Kick thread %d failed to set SCHED_FIFO priority %d: %s(%d)\n",
1740-
tid, sched_param.sched_priority, strerror(errno), errno);
1732+
if (setpriority(PRIO_PROCESS, tid, kick_kern_nice) != 0) {
1733+
if (kick_kern_nice < 0 && (errno == EACCES || errno == EPERM)) {
1734+
ebpf_warning("Kick thread %d failed to set nice value %d and will continue with default CFS priority; CAP_SYS_NICE or RLIMIT_NICE may be required: %s(%d)\n",
1735+
tid, kick_kern_nice, strerror(errno), errno);
1736+
} else {
1737+
ebpf_warning("Kick thread %d failed to set nice value %d and will continue with default CFS priority: %s(%d)\n",
1738+
tid, kick_kern_nice, strerror(errno), errno);
1739+
}
17411740
} else {
1742-
ebpf_info("Kick thread %d set SCHED_FIFO priority %d.\n",
1743-
tid, sched_param.sched_priority);
1741+
ebpf_info("Kick thread %d set nice value %d.\n", tid,
1742+
kick_kern_nice);
17441743
}
17451744

17461745
struct epoll_event events[1];

agent/src/ebpf/user/tracer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,7 @@ bool php_profiler_enabled(void);
630630
bool v8_profiler_enabled(void);
631631
bool python_profiler_enabled(void);
632632
int bpf_tracer_init(const char *log_file, bool is_stdout);
633-
int set_kick_kern_sched_priority(uint32_t priority);
633+
int set_kick_kern_nice(int32_t nice);
634634
int tracer_bpf_load(struct bpf_tracer *tracer);
635635
int tracer_probes_init(struct bpf_tracer *tracer);
636636
int tracer_hooks_attach(struct bpf_tracer *tracer);

agent/src/ebpf_dispatcher.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,15 +1021,13 @@ impl EbpfCollector {
10211021

10221022
if let Err(e) = config.ebpf.tunning.validate() {
10231023
warn!(
1024-
"skip setting kick thread SCHED_FIFO priority to {}: {}",
1025-
config.ebpf.tunning.kick_kern_sched_priority, e
1024+
"skip setting kick thread nice value to {}: {}",
1025+
config.ebpf.tunning.kick_kern_nice, e
10261026
);
1027-
} else if ebpf::set_kick_kern_sched_priority(config.ebpf.tunning.kick_kern_sched_priority)
1028-
!= 0
1029-
{
1027+
} else if ebpf::set_kick_kern_nice(config.ebpf.tunning.kick_kern_nice) != 0 {
10301028
warn!(
1031-
"failed to set kick thread SCHED_FIFO priority to {}",
1032-
config.ebpf.tunning.kick_kern_sched_priority
1029+
"failed to set kick thread nice value to {}",
1030+
config.ebpf.tunning.kick_kern_nice
10331031
);
10341032
}
10351033

server/agent_config/README-CH.md

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5593,44 +5593,45 @@ inputs:
55935593

55945594
参与用户态数据处理的工作线程数量。实际最大值为主机 CPU 逻辑核心数。
55955595

5596-
#### Kick 线程 FIFO 优先级 {#inputs.ebpf.tunning.kick_kern_sched_priority}
5596+
#### Kick 线程 Nice 值 {#inputs.ebpf.tunning.kick_kern_nice}
55975597

55985598
**标签**:
55995599

56005600
<mark>agent_restart</mark>
56015601

56025602
**FQCN**:
56035603

5604-
`inputs.ebpf.tunning.kick_kern_sched_priority`
5604+
`inputs.ebpf.tunning.kick_kern_nice`
56055605

56065606
**默认值**:
56075607
```yaml
56085608
inputs:
56095609
ebpf:
56105610
tunning:
5611-
kick_kern_sched_priority: 1
5611+
kick_kern_nice: 0
56125612
```
56135613

56145614
**模式**:
56155615
| Key | Value |
56165616
| ---- | ---------------------------- |
56175617
| Type | int |
5618-
| Range | [1, 99] |
5618+
| Range | [-20, 19] |
56195619

56205620
**详细描述**:
56215621

5622-
控制每个 CPU 上 kick 线程的 SCHED_FIFO 优先级
5622+
控制每个 CPU 上 kick 线程使用的 Linux nice 值
56235623

56245624
这些线程会在周期性定时器到期后唤醒,并通过轻量级 syscall
56255625
触发内核侧超时检查,将批量 eBPF 数据从缓冲区中推送出来。
56265626

56275627
当“指标中心”中 `deepflow_tenant -> deepflow_agent_ebpf_collector`
56285628
下的 `metrics.period_push_max_delay` 达到 199ms 时,需要关注这个
5629-
配置项。这说明周期性 push 延迟已经触发超限标记,此时可以适当提高
5630-
该配置项的取值。
5629+
配置项。这说明周期性 push 延迟已经触发超限标记,此时可以适当降低
5630+
该配置项的取值,以提高 kick 线程的调度倾向
56315631

5632-
更高的取值可以在 CPU 竞争时降低调度延迟,但也会增加对其他负载
5633-
造成干扰的风险。
5632+
更小的 nice 值意味着更高的调度倾向,更大的 nice 值意味着更低的
5633+
调度倾向。取值范围为 -20 到 19。负值可能需要 CAP_SYS_NICE 或
5634+
足够的 RLIMIT_NICE。该配置仍然可能对其他负载产生影响。
56345635

56355636
#### Perf Page 数量 {#inputs.ebpf.tunning.perf_pages_count}
56365637

server/agent_config/README.md

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5744,33 +5744,33 @@ The number of worker threads refers to how many threads participate
57445744
in data processing in user-space. The actual maximal value is the number
57455745
of CPU logical cores on the host.
57465746

5747-
#### Kick Thread FIFO Priority {#inputs.ebpf.tunning.kick_kern_sched_priority}
5747+
#### Kick Thread Nice Value {#inputs.ebpf.tunning.kick_kern_nice}
57485748

57495749
**Tags**:
57505750

57515751
<mark>agent_restart</mark>
57525752

57535753
**FQCN**:
57545754

5755-
`inputs.ebpf.tunning.kick_kern_sched_priority`
5755+
`inputs.ebpf.tunning.kick_kern_nice`
57565756

57575757
**Default value**:
57585758
```yaml
57595759
inputs:
57605760
ebpf:
57615761
tunning:
5762-
kick_kern_sched_priority: 1
5762+
kick_kern_nice: 0
57635763
```
57645764

57655765
**Schema**:
57665766
| Key | Value |
57675767
| ---- | ---------------------------- |
57685768
| Type | int |
5769-
| Range | [1, 99] |
5769+
| Range | [-20, 19] |
57705770

57715771
**Description**:
57725772

5773-
Controls the SCHED_FIFO priority of per-CPU kick threads.
5773+
Controls the Linux nice value of per-CPU kick threads.
57745774

57755775
These threads wake up after the periodic timer expires and issue a
57765776
lightweight syscall to trigger kernel-side timeout checks that flush
@@ -5779,10 +5779,13 @@ batched eBPF data.
57795779
Pay attention to this option when `metrics.period_push_max_delay`
57805780
under `deepflow_tenant -> deepflow_agent_ebpf_collector` in Metrics
57815781
Center reaches 199 ms. This means the periodic push delay has hit
5782-
the exceeded marker, and the value can be increased appropriately.
5782+
the exceeded marker, and the value can be decreased appropriately to
5783+
give the kick threads more scheduling preference.
57835784

5784-
Higher values can reduce scheduling delay under CPU contention, but
5785-
also increase the risk of interfering with other workloads.
5785+
Smaller nice values mean higher scheduling preference. Larger nice
5786+
values mean lower scheduling preference. Valid values range from
5787+
-20 to 19. A negative value may require CAP_SYS_NICE or a sufficient
5788+
RLIMIT_NICE. This can still affect other workloads.
57865789

57875790
#### Perf Pages Count {#inputs.ebpf.tunning.perf_pages_count}
57885791

server/agent_config/template.yaml

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4257,16 +4257,16 @@ inputs:
42574257
userspace_worker_threads: 1
42584258
# type: int
42594259
# name:
4260-
# en: Kick Thread FIFO Priority
4261-
# ch: Kick 线程 FIFO 优先级
4260+
# en: Kick Thread Nice Value
4261+
# ch: Kick 线程 Nice 值
42624262
# unit:
4263-
# range: [1, 99]
4263+
# range: [-20, 19]
42644264
# enum_options: []
42654265
# modification: agent_restart
42664266
# ee_feature: false
42674267
# description:
42684268
# en: |-
4269-
# Controls the SCHED_FIFO priority of per-CPU kick threads.
4269+
# Controls the Linux nice value of per-CPU kick threads.
42704270
#
42714271
# These threads wake up after the periodic timer expires and issue a
42724272
# lightweight syscall to trigger kernel-side timeout checks that flush
@@ -4275,24 +4275,28 @@ inputs:
42754275
# Pay attention to this option when `metrics.period_push_max_delay`
42764276
# under `deepflow_tenant -> deepflow_agent_ebpf_collector` in Metrics
42774277
# Center reaches 199 ms. This means the periodic push delay has hit
4278-
# the exceeded marker, and the value can be increased appropriately.
4278+
# the exceeded marker, and the value can be decreased appropriately to
4279+
# give the kick threads more scheduling preference.
42794280
#
4280-
# Higher values can reduce scheduling delay under CPU contention, but
4281-
# also increase the risk of interfering with other workloads.
4281+
# Smaller nice values mean higher scheduling preference. Larger nice
4282+
# values mean lower scheduling preference. Valid values range from
4283+
# -20 to 19. A negative value may require CAP_SYS_NICE or a sufficient
4284+
# RLIMIT_NICE. This can still affect other workloads.
42824285
# ch: |-
4283-
# 控制每个 CPU 上 kick 线程的 SCHED_FIFO 优先级
4286+
# 控制每个 CPU 上 kick 线程使用的 Linux nice 值
42844287
#
42854288
# 这些线程会在周期性定时器到期后唤醒,并通过轻量级 syscall
42864289
# 触发内核侧超时检查,将批量 eBPF 数据从缓冲区中推送出来。
42874290
#
42884291
# 当“指标中心”中 `deepflow_tenant -> deepflow_agent_ebpf_collector`
42894292
# 下的 `metrics.period_push_max_delay` 达到 199ms 时,需要关注这个
4290-
# 配置项。这说明周期性 push 延迟已经触发超限标记,此时可以适当提高
4291-
# 该配置项的取值。
4293+
# 配置项。这说明周期性 push 延迟已经触发超限标记,此时可以适当降低
4294+
# 该配置项的取值,以提高 kick 线程的调度倾向
42924295
#
4293-
# 更高的取值可以在 CPU 竞争时降低调度延迟,但也会增加对其他负载
4294-
# 造成干扰的风险。
4295-
kick_kern_sched_priority: 1
4296+
# 更小的 nice 值意味着更高的调度倾向,更大的 nice 值意味着更低的
4297+
# 调度倾向。取值范围为 -20 到 19。负值可能需要 CAP_SYS_NICE 或
4298+
# 足够的 RLIMIT_NICE。该配置仍然可能对其他负载产生影响。
4299+
kick_kern_nice: 0
42964300
# type: int
42974301
# name:
42984302
# en: Perf Pages Count

0 commit comments

Comments
 (0)