fix: cache did not refresh sometime by looplj · Pull Request #1783 · looplj/axonhub

looplj · 2026-06-04T13:47:49Z

devin-ai-integration

Devin Review found 2 potential issues.

View 3 additional findings in Devin Review.

devin-ai-integration · 2026-06-04T13:50:33Z

+func (p *DeviceFlowProvider) runAutoRefresh(
 	autoCtx context.Context,
-	exec executors.ScheduledExecutor,
 	refreshBefore time.Duration,
 	fallbackInterval time.Duration,
-	runImmediately bool,
 ) {
-	if autoCtx.Err() != nil {
-		return
-	}
-
 	delay := time.Duration(0)
-	if !runImmediately {
-		delay = p.nextAutoRefreshDelay(refreshBefore, fallbackInterval)
-	}
-
-	p.autoMu.Lock()
-
-	if p.autoCancel == nil || p.autoExecutor == nil || exec != p.autoExecutor {
-		p.autoMu.Unlock()
-		return
-	}
-
-	prevCancel := p.autoTaskCancel
-	p.autoTaskCancel = nil
-	p.autoMu.Unlock()
-
-	if prevCancel != nil {
-		prevCancel()
-	}
-
-	cancelFunc, err := exec.ScheduleFunc(func(_ context.Context) {
-		defer func() {
-			if r := recover(); r != nil {
-				slog.ErrorContext(autoCtx, "auto refresh device flow provider panicked", slog.Any("cause", r))
-			}
-		}()
-
-		if autoCtx.Err() != nil {
+	for {
+		if !sleepForAutoRefresh(autoCtx, delay) {
 			return
 		}

-		// Ensure credentials are fresh
-		refreshFailed := false
-
-		if _, err := p.GetToken(autoCtx); err != nil {
-			slog.WarnContext(autoCtx, "failed to auto refresh device flow token", slog.Any("error", err))
-
-			refreshFailed = true
-		}
-
-		if autoCtx.Err() != nil {
+		refreshFailed, ok := p.runAutoRefreshOnce(autoCtx)
+		if !ok {
 			return
 		}

 		if refreshFailed {
-			p.scheduleNextAutoRefreshWithDelay(autoCtx, exec, refreshBefore, fallbackInterval, fallbackInterval)
+			delay = fallbackInterval
 		} else {
-			p.scheduleNextAutoRefresh(autoCtx, exec, refreshBefore, fallbackInterval, false)
+			delay = p.nextAutoRefreshDelay(refreshBefore, fallbackInterval)
 		}
-	}, delay)
-	if err != nil {
-		p.StopAutoRefresh()
-		return
 	}
-
-	p.autoMu.Lock()
-
-	if p.autoCancel == nil || p.autoExecutor == nil || exec != p.autoExecutor {
-		p.autoMu.Unlock()
-		cancelFunc()
-
-		return
-	}
-
-	p.autoTaskCancel = cancelFunc
-	p.autoMu.Unlock()
 }


🟡 Missing top-level recover guard in goroutine DeviceFlowProvider.runAutoRefresh

runAutoRefresh is launched as a goroutine at llm/oauth/device_flow_provider.go:438 but does not install a top-level defer recover() guard. The recover guard exists only inside runAutoRefreshOnce (llm/oauth/device_flow_provider.go:480), so any panic in the surrounding loop code (sleepForAutoRefresh, nextAutoRefreshDelay) would crash the goroutine without logging. This violates .agent/rules/go-general.md rule 8: "Any manually started goroutine must install a top-level defer recover() guard that logs the panic before the goroutine exits."

Suggested change

func (p *DeviceFlowProvider) runAutoRefresh(

autoCtx context.Context,

exec executors.ScheduledExecutor,

refreshBefore time.Duration,

fallbackInterval time.Duration,

runImmediately bool,

) {

if autoCtx.Err() != nil {

return

}

delay := time.Duration(0)

if !runImmediately {

delay = p.nextAutoRefreshDelay(refreshBefore, fallbackInterval)

}

p.autoMu.Lock()

if p.autoCancel == nil || p.autoExecutor == nil || exec != p.autoExecutor {

p.autoMu.Unlock()

return

}

prevCancel := p.autoTaskCancel

p.autoTaskCancel = nil

p.autoMu.Unlock()

if prevCancel != nil {

prevCancel()

}

cancelFunc, err := exec.ScheduleFunc(func(_ context.Context) {

defer func() {

if r := recover(); r != nil {

slog.ErrorContext(autoCtx, "auto refresh device flow provider panicked", slog.Any("cause", r))

}

}()

if autoCtx.Err() != nil {

for {

if !sleepForAutoRefresh(autoCtx, delay) {

return

}

// Ensure credentials are fresh

refreshFailed := false

if _, err := p.GetToken(autoCtx); err != nil {

slog.WarnContext(autoCtx, "failed to auto refresh device flow token", slog.Any("error", err))

refreshFailed = true

}

if autoCtx.Err() != nil {

refreshFailed, ok := p.runAutoRefreshOnce(autoCtx)

if !ok {

return

}

if refreshFailed {

p.scheduleNextAutoRefreshWithDelay(autoCtx, exec, refreshBefore, fallbackInterval, fallbackInterval)

delay = fallbackInterval

} else {

p.scheduleNextAutoRefresh(autoCtx, exec, refreshBefore, fallbackInterval, false)

delay = p.nextAutoRefreshDelay(refreshBefore, fallbackInterval)

}

}, delay)

if err != nil {

p.StopAutoRefresh()

return

}

p.autoMu.Lock()

if p.autoCancel == nil || p.autoExecutor == nil || exec != p.autoExecutor {

p.autoMu.Unlock()

cancelFunc()

return

}

p.autoTaskCancel = cancelFunc

p.autoMu.Unlock()

}

func (p *DeviceFlowProvider) runAutoRefresh(

autoCtx context.Context,

refreshBefore time.Duration,

fallbackInterval time.Duration,

) {

defer func() {

if r := recover(); r != nil {

slog.ErrorContext(autoCtx, "auto refresh device flow provider panicked", slog.Any("cause", r))

}

}()

delay := time.Duration(0)

for {

if !sleepForAutoRefresh(autoCtx, delay) {

return

}

refreshFailed, ok := p.runAutoRefreshOnce(autoCtx)

if !ok {

return

}

if refreshFailed {

delay = fallbackInterval

} else {

delay = p.nextAutoRefreshDelay(refreshBefore, fallbackInterval)

}

}

}

Was this helpful? React with 👍 or 👎 to provide feedback.

devin-ai-integration · 2026-06-04T13:50:35Z

+func (p *TokenProvider) runAutoRefresh(
 	autoCtx context.Context,
-	exec executors.ScheduledExecutor,
 	refreshBefore time.Duration,
 	fallbackInterval time.Duration,
-	runImmediately bool,
 ) {
-	if autoCtx.Err() != nil {
-		return
-	}
-
 	delay := time.Duration(0)
-	if !runImmediately {
-		delay = p.nextAutoRefreshDelay(refreshBefore, fallbackInterval)
-	}
-
-	p.autoMu.Lock()
-
-	if p.autoCancel == nil || p.autoExecutor == nil || exec != p.autoExecutor {
-		p.autoMu.Unlock()
-		return
-	}
-
-	prevCancel := p.autoTaskCancel
-	p.autoTaskCancel = nil
-	p.autoMu.Unlock()
-
-	if prevCancel != nil {
-		prevCancel()
-	}
-
-	cancelFunc, err := exec.ScheduleFunc(func(_ context.Context) {
-		defer func() {
-			if r := recover(); r != nil {
-				slog.ErrorContext(autoCtx, "auto refresh token provider panicked", slog.Any("cause", r))
-			}
-		}()
-
-		if autoCtx.Err() != nil {
+	for {
+		if !sleepForAutoRefresh(autoCtx, delay) {
 			return
 		}

-		refreshFailed := false
-
-		if _, err := p.EnsureFresh(autoCtx, refreshBefore); err != nil {
-			slog.WarnContext(autoCtx, "failed to auto refresh token", slog.Any("error", err))
-
-			refreshFailed = true
-		}
-
-		if autoCtx.Err() != nil {
+		refreshFailed, ok := p.runAutoRefreshOnce(autoCtx, refreshBefore)
+		if !ok {
 			return
 		}

 		if refreshFailed {
-			// Use fallback interval to avoid tight retry loops on persistent errors
-			// (e.g. refresh_token_reused, invalid_grant).
-			p.scheduleNextAutoRefreshWithDelay(autoCtx, exec, refreshBefore, fallbackInterval, fallbackInterval)
+			delay = fallbackInterval
 		} else {
-			p.scheduleNextAutoRefresh(autoCtx, exec, refreshBefore, fallbackInterval, false)
+			delay = p.nextAutoRefreshDelay(refreshBefore, fallbackInterval)
 		}
-	}, delay)
-	if err != nil {
-		p.StopAutoRefresh()
-		return
 	}
-
-	p.autoMu.Lock()
-
-	if p.autoCancel == nil || p.autoExecutor == nil || exec != p.autoExecutor {
-		p.autoMu.Unlock()
-		cancelFunc()
-
-		return
-	}
-
-	p.autoTaskCancel = cancelFunc
-	p.autoMu.Unlock()
 }


🟡 Missing top-level recover guard in goroutine TokenProvider.runAutoRefresh

runAutoRefresh is launched as a goroutine at llm/oauth/token_provider.go:306 but does not install a top-level defer recover() guard. The recover guard exists only inside runAutoRefreshOnce (llm/oauth/token_provider.go:347), so any panic in the surrounding loop code (sleepForAutoRefresh, nextAutoRefreshDelay) would crash the goroutine without logging. This violates .agent/rules/go-general.md rule 8: "Any manually started goroutine must install a top-level defer recover() guard that logs the panic before the goroutine exits."

Suggested change

func (p *TokenProvider) runAutoRefresh(

autoCtx context.Context,

exec executors.ScheduledExecutor,

refreshBefore time.Duration,

fallbackInterval time.Duration,

runImmediately bool,

) {

if autoCtx.Err() != nil {

return

}

delay := time.Duration(0)

if !runImmediately {

delay = p.nextAutoRefreshDelay(refreshBefore, fallbackInterval)

}

p.autoMu.Lock()

if p.autoCancel == nil || p.autoExecutor == nil || exec != p.autoExecutor {

p.autoMu.Unlock()

return

}

prevCancel := p.autoTaskCancel

p.autoTaskCancel = nil

p.autoMu.Unlock()

if prevCancel != nil {

prevCancel()

}

cancelFunc, err := exec.ScheduleFunc(func(_ context.Context) {

defer func() {

if r := recover(); r != nil {

slog.ErrorContext(autoCtx, "auto refresh token provider panicked", slog.Any("cause", r))

}

}()

if autoCtx.Err() != nil {

for {

if !sleepForAutoRefresh(autoCtx, delay) {

return

}

refreshFailed := false

if _, err := p.EnsureFresh(autoCtx, refreshBefore); err != nil {

slog.WarnContext(autoCtx, "failed to auto refresh token", slog.Any("error", err))

refreshFailed = true

}

if autoCtx.Err() != nil {

refreshFailed, ok := p.runAutoRefreshOnce(autoCtx, refreshBefore)

if !ok {

return

}

if refreshFailed {

// Use fallback interval to avoid tight retry loops on persistent errors

// (e.g. refresh_token_reused, invalid_grant).

p.scheduleNextAutoRefreshWithDelay(autoCtx, exec, refreshBefore, fallbackInterval, fallbackInterval)

delay = fallbackInterval

} else {

p.scheduleNextAutoRefresh(autoCtx, exec, refreshBefore, fallbackInterval, false)

delay = p.nextAutoRefreshDelay(refreshBefore, fallbackInterval)

}

}, delay)

if err != nil {

p.StopAutoRefresh()

return

}

p.autoMu.Lock()

if p.autoCancel == nil || p.autoExecutor == nil || exec != p.autoExecutor {

p.autoMu.Unlock()

cancelFunc()

return

}

p.autoTaskCancel = cancelFunc

p.autoMu.Unlock()

}

func (p *TokenProvider) runAutoRefresh(

autoCtx context.Context,

refreshBefore time.Duration,

fallbackInterval time.Duration,

) {

defer func() {

if r := recover(); r != nil {

slog.ErrorContext(autoCtx, "auto refresh token provider panicked", slog.Any("cause", r))

}

}()

delay := time.Duration(0)

for {

if !sleepForAutoRefresh(autoCtx, delay) {

return

}

refreshFailed, ok := p.runAutoRefreshOnce(autoCtx, refreshBefore)

if !ok {

return

}

if refreshFailed {

delay = fallbackInterval

} else {

delay = p.nextAutoRefreshDelay(refreshBefore, fallbackInterval)

}

}

}

Was this helpful? React with 👍 or 👎 to provide feedback.

greptile-apps · 2026-06-04T13:52:34Z

Greptile Summary

This PR fixes a stall in cache refresh caused by onEnabledChannelsSwap blocking synchronously on stopping old channels' token providers. It also replaces both providers' ScheduledExecutor-based auto-refresh with a simpler goroutine loop, removing the github.com/zhenzou/executors dependency entirely.

channel.go: Old-channel cleanup (stop token providers + outbounds) is moved to a background goroutine so the swap — including the cache-version bump and new-provider start — returns immediately. A defensive panic recovery is added to prevent the goroutine from crashing silently.
token_provider.go / device_flow_provider.go: The complex recursive scheduleNextAutoRefresh chain using executors.ScheduledExecutor is replaced by a straightforward runAutoRefresh goroutine loop with sleepForAutoRefresh. StopAutoRefresh now only cancels the context instead of also awaiting executor shutdown.
go.mod / go.sum: Removes github.com/zhenzou/executors and eight transitive dependencies.

Confidence Score: 5/5

Safe to merge. The core fix — moving old-channel cleanup to a background goroutine — is straightforward and well-tested. The executor-to-goroutine refactor in both OAuth providers simplifies the scheduling logic without changing observable behavior.

All changed code paths are logically sound. The goroutine cleanup in channel.go correctly copies the slice before launching so it cannot race with the next swap. The new runAutoRefresh loop replicates the old executor's sequencing (one refresh at a time, delay computed from token expiry). The one issue found — panic recovery scoped to the whole cleanup function rather than each channel iteration — only matters if stopTokenProvider or stopChannelOutbounds ever actually panic, which is unlikely in practice.

internal/server/biz/channel.go — the per-function panic recovery in cleanupSwappedChannels could leave later channels uncleaned on an unexpected panic.

Important Files Changed

Filename	Overview
internal/server/biz/channel.go	Moves old-channel cleanup (stopTokenProvider + stopChannelOutbounds) to a background goroutine so the swap operation returns immediately; adds a panic-recovery wrapper, but that wrapper only catches the first panic in the loop.
internal/server/biz/channel_llm_openai_compatible_test.go	Adds TestOnEnabledChannelsSwapDoesNotWaitForOldCleanup, correctly verifying the swap returns quickly while background cleanup completes asynchronously.
llm/oauth/token_provider.go	Replaces the executors.ScheduledExecutor-based scheduling with a plain goroutine loop (runAutoRefresh + runAutoRefreshOnce); removes executor fields and the associated Shutdown() call from StopAutoRefresh.
llm/oauth/device_flow_provider.go	Same executor-to-goroutine refactor as token_provider.go; removes scheduleNextAutoRefresh/scheduleNextAutoRefreshWithDelay and replaces with runAutoRefresh loop identical in structure to TokenProvider's.
llm/go.mod	Removes the github.com/zhenzou/executors direct dependency and several transitive deps that came with it (supercronic, dubbogo, panjf2000/ants, etc.), shrinking the dependency tree significantly.

Sequence Diagram

sequenceDiagram
    participant Loader as Channel Loader
    participant Swap as onEnabledChannelsSwap
    participant NewCh as New Channel
    participant Cleanup as cleanupSwappedChannels (goroutine)
    participant OldCh as Old Channel

    Loader->>Swap: "call with old=[...], new=[...]"
    Swap->>Swap: cacheVersion.Add(1)
    loop for each new channel
        Swap->>NewCh: startTokenProvider()
        NewCh-->>Swap: returns (auto-refresh goroutine started)
    end
    Swap->>Cleanup: go cleanupSwappedChannels(oldChannels copy)
    Swap-->>Loader: returns immediately

    Note over Cleanup,OldCh: runs asynchronously
    loop for each old channel
        Cleanup->>OldCh: stopTokenProvider()
        OldCh-->>Cleanup: cancel context (auto-refresh goroutine exits)
        Cleanup->>OldCh: stopChannelOutbounds()
    end

_{Reviews (2): Last reviewed commit: "fix: cache did not refresh sometime" | Re-trigger Greptile}

greptile-apps · 2026-06-04T13:52:37Z

+func (p *TokenProvider) runAutoRefreshOnce(autoCtx context.Context, refreshBefore time.Duration) (refreshFailed bool, ok bool) {
+	defer func() {
+		if r := recover(); r != nil {
+			slog.ErrorContext(autoCtx, "auto refresh token provider panicked", slog.Any("cause", r))
+			refreshFailed = false
+			ok = false
+		}
+	}()
+
 	if autoCtx.Err() != nil {
-		return
+		return false, false
 	}

-	p.autoMu.Lock()
-
-	if p.autoCancel == nil || p.autoExecutor == nil || exec != p.autoExecutor {
-		p.autoMu.Unlock()
-		return
+	if _, err := p.EnsureFresh(autoCtx, refreshBefore); err != nil {
+		slog.WarnContext(autoCtx, "failed to auto refresh token", slog.Any("error", err))
+		refreshFailed = true
 	}

-	prevCancel := p.autoTaskCancel
-	p.autoTaskCancel = nil
-	p.autoMu.Unlock()
-
-	if prevCancel != nil {
-		prevCancel()
+	if autoCtx.Err() != nil {
+		return refreshFailed, false
 	}

-	cancelFunc, err := exec.ScheduleFunc(func(_ context.Context) {
-		if autoCtx.Err() != nil {
-			return
-		}
+	return refreshFailed, true
+}


Panic leaves autoCancel set — auto-refresh silently dead

When runAutoRefreshOnce recovers from a panic it returns ok = false, which causes runAutoRefresh to exit. The goroutine is gone, but p.autoCancel was never cleared. Any subsequent call to StartAutoRefresh hits the if p.autoCancel != nil guard and returns early without launching a new goroutine, so the provider is permanently stuck with no background refresh until an explicit StopAutoRefresh() call clears the field.

A minimal fix is to clear p.autoCancel inside the goroutine when exiting abnormally — e.g., call p.StopAutoRefresh() at the end of runAutoRefresh so the state is always consistent when the loop exits for any reason. The same issue exists in DeviceFlowProvider.runAutoRefreshOnce.

devin-ai-integration Bot reviewed Jun 4, 2026

View reviewed changes

greptile-apps Bot reviewed Jun 4, 2026

View reviewed changes

fix: cache did not refresh sometime

2230fbd

looplj force-pushed the dev-tmp branch from 77936a3 to 2230fbd Compare June 4, 2026 14:35

looplj merged commit ac64075 into unstable Jun 4, 2026
4 checks passed

junjiangao pushed a commit to junjiangao/axonhub that referenced this pull request Jun 5, 2026

fix: cache did not refresh sometime (looplj#1783)

6231297

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix: cache did not refresh sometime#1783

fix: cache did not refresh sometime#1783
looplj merged 1 commit into
unstablefrom
dev-tmp

looplj commented Jun 4, 2026 •

edited by devin-ai-integration Bot

Loading

Uh oh!

devin-ai-integration Bot left a comment

Uh oh!

devin-ai-integration Bot Jun 4, 2026

Uh oh!

devin-ai-integration Bot Jun 4, 2026

Uh oh!

greptile-apps Bot commented Jun 4, 2026 •

edited

Loading

Uh oh!

greptile-apps Bot Jun 4, 2026

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

1 participant

Conversation

looplj commented Jun 4, 2026 • edited by devin-ai-integration Bot Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

devin-ai-integration Bot left a comment

Choose a reason for hiding this comment

Uh oh!

devin-ai-integration Bot Jun 4, 2026

Choose a reason for hiding this comment

Uh oh!

devin-ai-integration Bot Jun 4, 2026

Choose a reason for hiding this comment

Uh oh!

greptile-apps Bot commented Jun 4, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Greptile Summary

Confidence Score: 5/5

Important Files Changed

Sequence Diagram

Uh oh!

greptile-apps Bot Jun 4, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

1 participant

looplj commented Jun 4, 2026 •

edited by devin-ai-integration Bot

Loading

greptile-apps Bot commented Jun 4, 2026 •

edited

Loading