Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 185 additions & 0 deletions internal/discovery/memleak_test.go
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to reproduce the current faulty behavior. It'd be fine to just see if the patched behavior works as expected. Tests implore maintenance too, so we'd benefit from testing traits in a way that balances manageable maintenance (PTAL at my other comment regarding reuse) with reasonable coverage, and dropping any extraneous ones that are arguably redundant in the long-term.

Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*
Copyright 2026 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package discovery

import (
"context"
"fmt"
"runtime"
"sync"
"testing"
"time"

"k8s.io/apimachinery/pkg/runtime/schema"
)

// appendToMapBuggy reproduces the pre-fix AppendToMap behaviour for comparison.
func appendToMapBuggy(r *CRDiscoverer, gvkps ...groupVersionKindPlural) {
if r.Map == nil {
r.Map = map[string]map[string][]kindPlural{}
}
if r.GVKToReflectorStopChanMap == nil {
r.GVKToReflectorStopChanMap = map[string]chan struct{}{}
}
for _, gvkp := range gvkps {
if _, ok := r.Map[gvkp.Group]; !ok {
r.Map[gvkp.Group] = map[string][]kindPlural{}
}
if _, ok := r.Map[gvkp.Group][gvkp.Version]; !ok {
r.Map[gvkp.Group][gvkp.Version] = []kindPlural{}
}
r.Map[gvkp.Group][gvkp.Version] = append(r.Map[gvkp.Group][gvkp.Version], kindPlural{Kind: gvkp.Kind, Plural: gvkp.Plural})
r.GVKToReflectorStopChanMap[gvkp.GroupVersionKind.String()] = make(chan struct{})
}
}

func makeGVKPs(n int) []groupVersionKindPlural {
gvkps := make([]groupVersionKindPlural, n)
for i := range n {
gvkps[i] = groupVersionKindPlural{
GroupVersionKind: schema.GroupVersionKind{
Group: fmt.Sprintf("group%d.example.com", i),
Version: "v1",
Kind: fmt.Sprintf("Kind%d", i),
},
Plural: fmt.Sprintf("kind%ds", i),
}
}
return gvkps
}

func heapKB() uint64 {
runtime.GC()
runtime.GC()
var m runtime.MemStats
runtime.ReadMemStats(&m)
return m.HeapInuse / 1024
}

// TestMemoryLeakSimulation runs the buggy and fixed AppendToMap side-by-side
// across many poll cycles and reports heap growth and map entry counts.
func TestMemoryLeakSimulation(t *testing.T) {
const (
numGVKs = 5
pollCycles = 500
)

gvkps := makeGVKPs(numGVKs)

buggyDiscoverer := &CRDiscoverer{}
heapBefore := heapKB()
goroutinesBefore := runtime.NumGoroutine()

for range pollCycles {
appendToMapBuggy(buggyDiscoverer, gvkps...)
}

heapAfterBuggy := heapKB()
goroutinesAfterBuggy := runtime.NumGoroutine()

buggyKindCount := 0
for _, versions := range buggyDiscoverer.Map {
for _, kinds := range versions {
buggyKindCount += len(kinds)
}
}
buggyChannelCount := len(buggyDiscoverer.GVKToReflectorStopChanMap)

fixedDiscoverer := &CRDiscoverer{}
heapBeforeFixed := heapKB()

for range pollCycles {
fixedDiscoverer.AppendToMap(gvkps...)
}

heapAfterFixed := heapKB()

fixedKindCount := 0
for _, versions := range fixedDiscoverer.Map {
for _, kinds := range versions {
fixedKindCount += len(kinds)
}
}
fixedChannelCount := len(fixedDiscoverer.GVKToReflectorStopChanMap)

t.Logf("Simulation: %d GVKs × %d poll cycles", numGVKs, pollCycles)
t.Logf("")
t.Logf(" │ BUGGY (pre-fix) │ FIXED (post-fix)")
t.Logf(" ─────────────────────┼───────────────────┼──────────────────")
t.Logf(" Kind entries in map │ %17d │ %d", buggyKindCount, fixedKindCount)
t.Logf(" Stop channels live │ %17d │ %d", buggyChannelCount, fixedChannelCount)
t.Logf(" Heap before (KB) │ %17d │ %d", heapBefore, heapBeforeFixed)
t.Logf(" Heap after (KB) │ %17d │ %d", heapAfterBuggy, heapAfterFixed)
t.Logf(" Heap growth (KB) │ %17d │ %d", int64(heapAfterBuggy)-int64(heapBefore), int64(heapAfterFixed)-int64(heapBeforeFixed)) //nolint:gosec
t.Logf(" Goroutines before │ %17d │ (same baseline)", goroutinesBefore)
t.Logf(" Goroutines after │ %17d │ (no goroutines started)", goroutinesAfterBuggy)

if buggyKindCount != numGVKs*pollCycles {
t.Errorf("[buggy] expected %d kind entries (linear growth), got %d", numGVKs*pollCycles, buggyKindCount)
}
if fixedKindCount != numGVKs {
t.Errorf("[fixed] expected exactly %d kind entries (stable), got %d", numGVKs, fixedKindCount)
}
if fixedChannelCount != numGVKs {
t.Errorf("[fixed] expected exactly %d stop channels (stable), got %d", numGVKs, fixedChannelCount)
}

buggyGrowth := int64(heapAfterBuggy) - int64(heapBefore) //nolint:gosec
fixedGrowth := int64(heapAfterFixed) - int64(heapBeforeFixed) //nolint:gosec
t.Logf("Heap growth comparison is diagnostic only: buggy=%d KB fixed=%d KB", buggyGrowth, fixedGrowth)
}

// TestGoroutineLeakSimulation verifies that goroutines started with the fixed
// pattern (bridge goroutine selecting on both GVK stop channel and context
// cancellation) exit when the context is cancelled.
func TestGoroutineLeakSimulation(t *testing.T) {
const (
numGVKs = 5
rebuilds = 20
)

var wg sync.WaitGroup

for range rebuilds {
ctx, cancel := context.WithCancel(context.Background())
for range numGVKs {
gvkStopCh := make(chan struct{})
stopCh := make(chan struct{})
wg.Add(1)
go func() {
defer wg.Done()
defer close(stopCh)
select {
case <-gvkStopCh:
case <-ctx.Done():
}
}()
_ = stopCh
}
cancel()
}

done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()

select {
case <-done:
t.Logf("All %d bridge goroutines exited after context cancellation", numGVKs*rebuilds)
case <-time.After(2 * time.Second):
t.Errorf("goroutines did not exit within 2s after context cancellation")
}
}
24 changes: 22 additions & 2 deletions internal/discovery/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,31 @@ func (r *CRDiscoverer) AppendToMap(gvkps ...groupVersionKindPlural) {
if _, ok := r.Map[gvkp.Group][gvkp.Version]; !ok {
r.Map[gvkp.Group][gvkp.Version] = []kindPlural{}
}
r.Map[gvkp.Group][gvkp.Version] = append(r.Map[gvkp.Group][gvkp.Version], kindPlural{Kind: gvkp.Kind, Plural: gvkp.Plural})
r.GVKToReflectorStopChanMap[gvkp.GroupVersionKind.String()] = make(chan struct{})
alreadyExists := false
for _, existing := range r.Map[gvkp.Group][gvkp.Version] {
if existing.Kind == gvkp.Kind {
alreadyExists = true
break
}
}
if !alreadyExists {
r.Map[gvkp.Group][gvkp.Version] = append(r.Map[gvkp.Group][gvkp.Version], kindPlural{Kind: gvkp.Kind, Plural: gvkp.Plural})
}
if _, exists := r.GVKToReflectorStopChanMap[gvkp.GroupVersionKind.String()]; !exists {
r.GVKToReflectorStopChanMap[gvkp.GroupVersionKind.String()] = make(chan struct{})
}
}
}

// GetStopChanForGVK returns the stop channel for the given GVK under the read lock.
func (r *CRDiscoverer) GetStopChanForGVK(gvk string) chan struct{} {
var ch chan struct{}
r.SafeRead(func() {
ch = r.GVKToReflectorStopChanMap[gvk]
})
return ch
}

// RemoveFromMap removes the given GVKs from the cache.
func (r *CRDiscoverer) RemoveFromMap(gvkps ...groupVersionKindPlural) {
for _, gvkp := range gvkps {
Expand Down
96 changes: 96 additions & 0 deletions internal/discovery/types_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
Copyright 2026 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package discovery

import (
"testing"

"k8s.io/apimachinery/pkg/runtime/schema"
)

// TestAppendToMapIdempotency verifies that calling AppendToMap repeatedly with
// the same GVK does not accumulate duplicate kind entries or replace existing
// stop channels.
func TestAppendToMapIdempotency(t *testing.T) {
const iterations = 10

gvkp := groupVersionKindPlural{
GroupVersionKind: schema.GroupVersionKind{
Group: "example.com",
Version: "v1",
Kind: "Foo",
},
Plural: "foos",
}

r := &CRDiscoverer{}

r.AppendToMap(gvkp)
firstCh := r.GVKToReflectorStopChanMap[gvkp.GroupVersionKind.String()]
if firstCh == nil {
t.Fatal("expected stop channel to be created on first AppendToMap call")
}

for i := 1; i < iterations; i++ {
r.AppendToMap(gvkp)
}

kinds := r.Map[gvkp.Group][gvkp.Version]
if len(kinds) != 1 {
t.Errorf("expected exactly 1 kind entry, got %d", len(kinds))
}

gotCh := r.GVKToReflectorStopChanMap[gvkp.GroupVersionKind.String()]
if gotCh != firstCh {
t.Error("stop channel was replaced on repeated AppendToMap calls")
}
}

// TestRemoveFromMapClosesChannel verifies that RemoveFromMap closes and removes
// the stop channel for the deleted GVK.
func TestRemoveFromMapClosesChannel(t *testing.T) {
gvkp := groupVersionKindPlural{
GroupVersionKind: schema.GroupVersionKind{
Group: "example.com",
Version: "v1",
Kind: "Bar",
},
Plural: "bars",
}

r := &CRDiscoverer{}
r.AppendToMap(gvkp)

ch := r.GVKToReflectorStopChanMap[gvkp.GroupVersionKind.String()]
if ch == nil {
t.Fatal("expected stop channel after AppendToMap")
}

r.RemoveFromMap(gvkp)

// Channel must be closed (readable immediately with zero value).
select {
case _, open := <-ch:
if open {
t.Error("channel should be closed, but received a value")
}
default:
t.Error("channel should be closed but is still blocking")
}

// Entry must be removed from the stop channel map.
if _, exists := r.GVKToReflectorStopChanMap[gvkp.GroupVersionKind.String()]; exists {
t.Error("stop channel map entry should be deleted after RemoveFromMap")
}
}
14 changes: 12 additions & 2 deletions internal/store/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ type Builder struct {
useAPIServerCache bool
objectLimit int64

GVKToReflectorStopChanMap *map[string]chan struct{}
GetGVKStopChan func(gvk string) chan struct{}
}

// NewBuilder returns a new builder.
Expand Down Expand Up @@ -622,7 +622,17 @@ func (b *Builder) startReflector(
instrumentedListWatch := watch.NewInstrumentedListerWatcher(listWatcher, b.listWatchMetrics, reflect.TypeOf(expectedType).String(), useAPIServerCache, objectLimit, client)
reflector := cache.NewReflectorWithOptions(sharding.NewShardedListWatch(b.shard, b.totalShards, instrumentedListWatch), expectedType, store, cache.ReflectorOptions{ResyncPeriod: 0})
if cr, ok := expectedType.(*unstructured.Unstructured); ok {
go reflector.Run((*b.GVKToReflectorStopChanMap)[cr.GroupVersionKind().String()])
stopCh := make(chan struct{})
gvkStopCh := b.GetGVKStopChan(cr.GroupVersionKind().String())
ctxDone := b.ctx.Done()
Comment thread
bhope marked this conversation as resolved.
go func() {
defer close(stopCh)
select {
case <-gvkStopCh:
case <-ctxDone:
}
}()
go reflector.Run(stopCh)
} else {
go reflector.Run(b.ctx.Done())
}
Expand Down
Loading