Skip to content

Commit baac4c6

Browse files
committed
512 default queue size, user can set it
1 parent 6d24a7e commit baac4c6

11 files changed

Lines changed: 62 additions & 34 deletions

File tree

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Lightweight MicroVM engine with dual hypervisor backends: [Cloud Hypervisor](htt
1010
- **UEFI boot** — CLOUDHV.fd firmware by default; direct kernel boot for OCI images (auto-detected)
1111
- **COW overlays** — copy-on-write disks backed by shared base images (raw for OCI, qcow2 for cloud images)
1212
- **CNI networking** — automatic NIC creation via CNI plugins, multi-NIC support, per-VM IP allocation
13-
- **Multi-queue virtio-net** — TAP devices created with per-vCPU queue pairs; TSO/UFO/csum offload enabled by default
13+
- **Multi-queue virtio-net** — TAP devices created with per-vCPU queue pairs; configurable ring depth (`--queue-size`, default 512); TSO/UFO/csum offload enabled by default
1414
- **TC redirect I/O path** — veth ↔ TAP wired via ingress qdisc + mirred redirect (no bridge in the data path)
1515
- **DNS configuration** — custom DNS servers injected into VMs via kernel cmdline (OCI) or cloud-init network-config (cloudimg)
1616
- **Cloud-init metadata** — automatic NoCloud cidata FAT12 disk for cloudimg VMs (hostname, root password, multi-NIC Netplan v2 network-config); cidata is automatically skipped on subsequent boots
@@ -178,6 +178,7 @@ Applies to `cocoon vm create`, `cocoon vm run`, and `cocoon vm debug`:
178178
| `--memory` | `1G` | Memory size (e.g., 512M, 2G) |
179179
| `--storage` | `10G` | COW disk size (e.g., 10G, 20G) |
180180
| `--nics` | `1` | Number of network interfaces (0 = no network) |
181+
| `--queue-size` | `0` (default 512) | Virtio-net ring depth per queue (larger = better bulk throughput, smaller = better RPC latency; CH only, ignored by FC) |
181182
| `--network` | empty (default) | CNI conflist name (empty = first conflist) |
182183
| `--windows` | `false` | Windows guest (UEFI boot, kvm_hyperv=on, no cidata) |
183184

@@ -192,6 +193,7 @@ Applies to `cocoon vm clone`:
192193
| `--memory` | empty (inherit) | Memory size (must be >= snapshot value) |
193194
| `--storage` | empty (inherit) | COW disk size (must be >= snapshot value) |
194195
| `--nics` | `0` (inherit) | Number of NICs (must be >= snapshot value) |
196+
| `--queue-size` | `0` (inherit) | Virtio-net ring depth per queue (0 = inherit from snapshot) |
195197
| `--network` | empty (inherit) | CNI conflist name (empty = inherit from source VM) |
196198

197199
### Snapshot Flags
@@ -273,7 +275,7 @@ Cocoon uses [CNI](https://www.cni.dev/) for VM networking. Each NIC is backed by
273275
Guest virtio-net ←→ TAP (multi-queue) ←TC redirect→ veth ←→ CNI bridge/overlay
274276
```
275277

276-
- **Multi-queue**: each TAP device is created with one queue pair per boot vCPU (`num_queues = 2 × vCPU` in Cloud Hypervisor), enabling per-CPU TX/RX rings for better throughput
278+
- **Multi-queue**: each TAP device is created with one queue pair per boot vCPU (`num_queues = 2 × vCPU` in Cloud Hypervisor), enabling per-CPU TX/RX rings for better throughput. Ring depth per queue is configurable via `--queue-size` (default 512; larger values improve bulk download throughput, smaller values improve RPC latency)
277279
- **Offload**: TSO, UFO, and checksum offload are enabled on the virtio-net device; TAP uses `VNET_HDR` for zero-copy GSO passthrough
278280
- **MAC passthrough**: the guest NIC inherits the CNI veth's MAC address, satisfying anti-spoofing requirements of Cilium, Calico eBPF, and VPC ENI plugins
279281
- **MTU sync**: TAP MTU is automatically synced to the veth to prevent silent large-packet drops in overlay or jumbo-frame setups
@@ -284,6 +286,7 @@ Guest virtio-net ←→ TAP (multi-queue) ←TC redirect→ veth ←→ CN
284286
- **No network**: `--nics 0` creates a VM with no network interfaces
285287
- **Multi-NIC**: `--nics N` creates N interfaces; for cloudimg VMs all NICs are auto-configured via Netplan, for OCI images all NICs are auto-configured via kernel `ip=` parameters
286288
- **Multi-network**: `--network <name>` selects a specific CNI conflist by name (e.g., `--network macvlan`); omitting uses the first conflist alphabetically. The network name is stored in the VM record for recovery after host reboot. Clone allows `--network` override; restore reuses the existing network.
289+
- **Bridge mode**: `--bridge <device>` creates TAP devices directly on an existing Linux bridge (e.g., `--bridge cni0`), bypassing CNI and TC redirect. VMs get IP via DHCP from the bridge. Mutually exclusive with `--network`
287290
- **DNS**: Use `--dns` to set custom DNS servers (comma separated)
288291

289292
### CNI Configuration

cmd/core/helpers.go

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ func VMConfigFromFlags(cmd *cobra.Command, image string) (*types.VMConfig, error
250250
cpu, _ := cmd.Flags().GetInt("cpu")
251251
memStr, _ := cmd.Flags().GetString("memory")
252252
storStr, _ := cmd.Flags().GetString("storage")
253+
queueSize, _ := cmd.Flags().GetInt("queue-size")
253254
network, _ := cmd.Flags().GetString("network")
254255
windows, _ := cmd.Flags().GetBool("windows")
255256

@@ -267,13 +268,14 @@ func VMConfigFromFlags(cmd *cobra.Command, image string) (*types.VMConfig, error
267268
}
268269

269270
cfg := &types.VMConfig{
270-
Name: vmName,
271-
CPU: cpu,
272-
Memory: memBytes,
273-
Storage: storBytes,
274-
Image: image,
275-
Network: network,
276-
Windows: windows,
271+
Name: vmName,
272+
CPU: cpu,
273+
Memory: memBytes,
274+
Storage: storBytes,
275+
QueueSize: queueSize,
276+
Image: image,
277+
Network: network,
278+
Windows: windows,
277279
}
278280
if err := cfg.Validate(); err != nil {
279281
return nil, err
@@ -290,20 +292,25 @@ func CloneVMConfigFromFlags(cmd *cobra.Command, snapCfg *types.SnapshotConfig) (
290292
if network == "" {
291293
network = snapCfg.Network
292294
}
295+
queueSize, _ := cmd.Flags().GetInt("queue-size")
296+
if queueSize == 0 {
297+
queueSize = snapCfg.QueueSize
298+
}
293299

294300
cpu, memBytes, storBytes, err := mergeResourceFlags(cmd, snapCfg.CPU, snapCfg.Memory, snapCfg.Storage, snapCfg)
295301
if err != nil {
296302
return nil, err
297303
}
298304

299305
cfg := &types.VMConfig{
300-
Name: vmName,
301-
CPU: cpu,
302-
Memory: memBytes,
303-
Storage: storBytes,
304-
Image: snapCfg.Image,
305-
Network: network,
306-
Windows: snapCfg.Windows,
306+
Name: vmName,
307+
CPU: cpu,
308+
Memory: memBytes,
309+
Storage: storBytes,
310+
QueueSize: queueSize,
311+
Image: snapCfg.Image,
312+
Network: network,
313+
Windows: snapCfg.Windows,
307314
}
308315
if err := cfg.Validate(); err != nil {
309316
return nil, err

cmd/vm/commands.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ func addVMFlags(cmd *cobra.Command) {
155155
cmd.Flags().String("memory", "1G", "memory size") //nolint:mnd
156156
cmd.Flags().String("storage", "10G", "COW disk size") //nolint:mnd
157157
cmd.Flags().Int("nics", 1, "number of network interfaces (0 = no network); multiple NICs with auto IP config only works for cloudimg; OCI images auto-configure only the last NIC, others require manual setup inside the guest")
158+
cmd.Flags().Int("queue-size", 0, "virtio-net ring depth per queue (0 = default 512; tradeoff: larger improves download throughput, smaller improves RPC latency)") //nolint:mnd
158159
cmd.Flags().String("network", "", "CNI conflist name (empty = default); mutually exclusive with --bridge")
159160
cmd.Flags().String("bridge", "", "use TAP-on-bridge instead of CNI (value is bridge device, e.g. cni0); VM gets IP via DHCP from the bridge")
160161
cmd.Flags().Bool("windows", false, "Windows guest (UEFI boot, kvm_hyperv=on, no cidata)")
@@ -166,6 +167,7 @@ func addCloneFlags(cmd *cobra.Command) {
166167
cmd.Flags().String("memory", "", "memory size (empty = inherit from snapshot)")
167168
cmd.Flags().String("storage", "", "COW disk size (empty = inherit from snapshot)")
168169
cmd.Flags().Int("nics", 0, "number of NICs (0 = inherit from snapshot)")
170+
cmd.Flags().Int("queue-size", 0, "virtio-net ring depth per queue (0 = inherit from snapshot)") //nolint:mnd
169171
cmd.Flags().String("network", "", "CNI conflist name (empty = inherit from source VM)")
170172
cmd.Flags().String("bridge", "", "use TAP-on-bridge instead of CNI (value is bridge device, e.g. cni0)")
171173
}

hypervisor/cloudhypervisor/snapshot.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ func (ch *CloudHypervisor) Snapshot(ctx context.Context, ref string) (*types.Sna
131131
Memory: rec.Config.Memory,
132132
Storage: rec.Config.Storage,
133133
NICs: len(rec.NetworkConfigs),
134+
QueueSize: rec.Config.QueueSize,
134135
Network: rec.Config.Network,
135136
Windows: rec.Config.Windows,
136137
}

hypervisor/firecracker/snapshot.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ func (fc *Firecracker) Snapshot(ctx context.Context, ref string) (*types.Snapsho
123123
Memory: rec.Config.Memory,
124124
Storage: rec.Config.Storage,
125125
NICs: len(rec.NetworkConfigs),
126+
QueueSize: rec.Config.QueueSize,
126127
Network: rec.Config.Network,
127128
}
128129
if rec.ImageBlobIDs != nil {

network/bridge/bridge_linux.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ func (b *Bridge) Config(ctx context.Context, vmID string, numNICs int, vmCfg *ty
122122
Tap: name,
123123
Mac: mac,
124124
NumQueues: queues,
125-
QueueSize: network.NetQueueSize,
125+
QueueSize: network.ResolveQueueSize(vmCfg.QueueSize),
126126
Backend: typ,
127127
BridgeDev: b.bridgeDev,
128128
// NetnsPath: empty — TAP is in host netns.

network/cni/create.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ func (c *CNI) Config(ctx context.Context, vmID string, numNICs int, vmCfg *types
9898
Tap: tapName,
9999
Mac: mac,
100100
NumQueues: network.NetNumQueues(vmCfg.CPU),
101-
QueueSize: network.NetQueueSize,
101+
QueueSize: network.ResolveQueueSize(vmCfg.QueueSize),
102102
Backend: typ,
103103
NetnsPath: nsPath,
104104
Network: netInfo,

network/utils.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ const (
44
vmIDPrefixLen = 8
55

66
// NetQueueSize is the default virtio-net ring depth per queue.
7-
// 1024 doubles the CH default (256) to allow more in-flight descriptors
8-
// per epoll wakeup, reducing eventfd round-trips under high throughput.
9-
NetQueueSize = 1024
7+
// 512 balances download throughput (favors larger rings) against
8+
// request-response latency (favors smaller rings).
9+
NetQueueSize = 512
1010
)
1111

1212
// NetNumQueues returns the virtio-net queue count for the given CPU count.
@@ -18,6 +18,14 @@ func NetNumQueues(cpu int) int {
1818
return cpu * 2 //nolint:mnd
1919
}
2020

21+
// ResolveQueueSize returns qs if positive, otherwise the default NetQueueSize.
22+
func ResolveQueueSize(qs int) int {
23+
if qs > 0 {
24+
return qs
25+
}
26+
return NetQueueSize
27+
}
28+
2129
// VMIDPrefix returns the first 8 characters of a VM ID, matching the
2230
// truncation used by both bridge and CNI TAP device naming.
2331
func VMIDPrefix(vmID string) string {

snapshot/localfile/localfile.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ func snapshotRecordToConfig(rec *snapshot.SnapshotRecord) *types.SnapshotConfig
271271
Memory: rec.Memory,
272272
Storage: rec.Storage,
273273
NICs: rec.NICs,
274+
QueueSize: rec.QueueSize,
274275
Network: rec.Network,
275276
Windows: rec.Windows,
276277
}

types/snapshot.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@ type SnapshotConfig struct {
1414

1515
// Original VM resource config, populated during snapshot creation.
1616
// Used by clone for parameter inheritance and validation.
17-
CPU int `json:"cpu,omitempty"`
18-
Memory int64 `json:"memory,omitempty"` // bytes
19-
Storage int64 `json:"storage,omitempty"` // bytes
20-
NICs int `json:"nics,omitempty"`
21-
Network string `json:"network,omitempty"`
22-
Windows bool `json:"windows,omitempty"`
17+
CPU int `json:"cpu,omitempty"`
18+
Memory int64 `json:"memory,omitempty"` // bytes
19+
Storage int64 `json:"storage,omitempty"` // bytes
20+
NICs int `json:"nics,omitempty"`
21+
QueueSize int `json:"queue_size,omitempty"`
22+
Network string `json:"network,omitempty"`
23+
Windows bool `json:"windows,omitempty"`
2324
}
2425

2526
// Snapshot is the public record for a snapshot.

0 commit comments

Comments
 (0)