fix(cron): stop unresolved next-run refire loops (#66083)

Merged via squash.

Prepared head SHA: b86ba58d3b
This commit is contained in:
Mariano
2026-04-13 20:10:03 +02:00
committed by GitHub
parent 114ff23f2a
commit c602824215
5 changed files with 285 additions and 6 deletions

View File

@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
- Browser/CDP: let managed local Chrome readiness, status probes, and managed loopback CDP control bypass browser SSRF policy for their own loopback control plane, so OpenClaw no longer misclassifies a healthy child browser as "not reachable after start". (#65695, #66043) Thanks @mbelinky.
- Gateway/sessions: stop heartbeat, cron-event, and exec-event turns from overwriting shared-session routing and origin metadata, preventing synthetic `heartbeat` targets from poisoning later cron or user delivery. (#63733, #35300)
- Browser/CDP: let local attach-only `manual-cdp` profiles reuse the local loopback CDP control plane under strict default policy and remote-class probe timeouts, so tabs/snapshot stop falsely reporting a live local browser session as not running. (#65611, #66080) Thanks @mbelinky.
- Cron/scheduler: stop inventing short retries when cron next-run calculation returns no valid future slot, and keep a maintenance wake armed so enabled unscheduled jobs recover without entering a refire loop. (#66019, #66083) Thanks @mbelinky.
## 2026.4.12

View File

@@ -141,6 +141,43 @@ describe("CronService - armTimer tight loop prevention", () => {
timeoutSpy.mockRestore();
});
it("keeps a maintenance wake armed when enabled jobs have no nextRunAtMs", () => {
const timeoutSpy = vi.spyOn(globalThis, "setTimeout");
const now = Date.parse("2026-02-28T12:32:00.000Z");
const state = createTimerState({
storePath: "/tmp/test-cron/jobs.json",
now,
});
state.store = {
version: 1,
jobs: [
{
id: "missing-next-run",
name: "missing-next-run",
enabled: true,
deleteAfterRun: false,
createdAtMs: now - 60_000,
updatedAtMs: now - 60_000,
schedule: { kind: "cron", expr: "*/15 * * * *" },
sessionTarget: "isolated" as const,
wakeMode: "next-heartbeat" as const,
payload: { kind: "agentTurn" as const, message: "test" },
delivery: { mode: "none" as const },
state: {},
},
],
};
armTimer(state);
expect(state.timer).not.toBeNull();
const delays = extractTimeoutDelays(timeoutSpy);
expect(delays).toContain(60_000);
timeoutSpy.mockRestore();
});
it("breaks the onTimer→armTimer hot-loop with stuck runningAtMs", async () => {
const timeoutSpy = vi.spyOn(globalThis, "setTimeout");
const store = await makeStorePath();

View File

@@ -0,0 +1,114 @@
import { describe, expect, it, vi } from "vitest";
import {
createDefaultIsolatedRunner,
createIsolatedRegressionJob,
noopLogger,
setupCronRegressionFixtures,
writeCronJobs,
} from "../../test/helpers/cron/service-regression-fixtures.js";
import * as schedule from "./schedule.js";
import { createCronServiceState } from "./service/state.js";
import { onTimer } from "./service/timer.js";
const issue66019Fixtures = setupCronRegressionFixtures({ prefix: "cron-66019-" });
describe("#66019 unresolved next-run repro", () => {
it("does not refire a recurring cron job 2s later when next-run resolution returns undefined", async () => {
const store = issue66019Fixtures.makeStorePath();
const scheduledAt = Date.parse("2026-04-13T15:40:00.000Z");
let now = scheduledAt;
const cronJob = createIsolatedRegressionJob({
id: "cron-66019-minimal-success",
name: "cron-66019-minimal-success",
scheduledAt,
schedule: { kind: "cron", expr: "0 7 * * *", tz: "Asia/Shanghai" },
payload: { kind: "agentTurn", message: "ping" },
state: { nextRunAtMs: scheduledAt - 1_000 },
});
await writeCronJobs(store.storePath, [cronJob]);
const runIsolatedAgentJob = createDefaultIsolatedRunner();
const nextRunSpy = vi.spyOn(schedule, "computeNextRunAtMs").mockReturnValue(undefined);
const state = createCronServiceState({
cronEnabled: true,
storePath: store.storePath,
log: noopLogger,
nowMs: () => now,
enqueueSystemEvent: vi.fn(),
requestHeartbeatNow: vi.fn(),
runIsolatedAgentJob,
});
try {
await onTimer(state);
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1);
expect(state.store?.jobs[0]?.state.nextRunAtMs).toBeUndefined();
// Before the fix, applyJobResult would synthesize endedAt + 2_000 here,
// so a second tick a couple seconds later would refire the same job.
now = scheduledAt + 2_001;
await onTimer(state);
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1);
expect(state.store?.jobs[0]?.state.nextRunAtMs).toBeUndefined();
} finally {
nextRunSpy.mockRestore();
if (state.timer) {
clearTimeout(state.timer);
state.timer = null;
}
}
});
it("does not refire a recurring errored cron job after the first backoff window when next-run resolution returns undefined", async () => {
const store = issue66019Fixtures.makeStorePath();
const scheduledAt = Date.parse("2026-04-13T15:45:00.000Z");
let now = scheduledAt;
const cronJob = createIsolatedRegressionJob({
id: "cron-66019-minimal-error",
name: "cron-66019-minimal-error",
scheduledAt,
schedule: { kind: "cron", expr: "0 7 * * *", tz: "Asia/Shanghai" },
payload: { kind: "agentTurn", message: "ping" },
state: { nextRunAtMs: scheduledAt - 1_000 },
});
await writeCronJobs(store.storePath, [cronJob]);
const runIsolatedAgentJob = vi.fn().mockResolvedValue({
status: "error",
error: "synthetic failure",
});
const nextRunSpy = vi.spyOn(schedule, "computeNextRunAtMs").mockReturnValue(undefined);
const state = createCronServiceState({
cronEnabled: true,
storePath: store.storePath,
log: noopLogger,
nowMs: () => now,
enqueueSystemEvent: vi.fn(),
requestHeartbeatNow: vi.fn(),
runIsolatedAgentJob,
});
try {
await onTimer(state);
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1);
expect(state.store?.jobs[0]?.state.nextRunAtMs).toBeUndefined();
// Before the fix, the error branch would synthesize the first backoff
// retry (30s), so the next tick after that window would rerun the job.
now = scheduledAt + 30_001;
await onTimer(state);
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1);
expect(state.store?.jobs[0]?.state.nextRunAtMs).toBeUndefined();
} finally {
nextRunSpy.mockRestore();
if (state.timer) {
clearTimeout(state.timer);
state.timer = null;
}
}
});
});

View File

@@ -1061,11 +1061,11 @@ describe("cron service timer regressions", () => {
expect(job.state.lastStatus).toBe("ok");
expect(job.state.scheduleErrorCount).toBe(1);
expect(job.state.lastError).toMatch(/^schedule error:/);
expect(job.state.nextRunAtMs).toBe(endedAt + 2_000);
expect(job.state.nextRunAtMs).toBeUndefined();
expect(job.enabled).toBe(true);
});
it("falls back to backoff schedule when cron next-run computation throws on error path (#30905)", () => {
it("keeps state updates when cron next-run computation throws on error path (#30905)", () => {
const startedAt = Date.parse("2026-03-02T12:05:00.000Z");
const endedAt = startedAt + 25;
const state = createCronServiceState({
@@ -1100,10 +1100,93 @@ describe("cron service timer regressions", () => {
expect(job.state.consecutiveErrors).toBe(1);
expect(job.state.scheduleErrorCount).toBe(1);
expect(job.state.lastError).toMatch(/^schedule error:/);
expect(job.state.nextRunAtMs).toBe(endedAt + 30_000);
expect(job.state.nextRunAtMs).toBeUndefined();
expect(job.enabled).toBe(true);
});
it("does not synthesize a 2s retry when cron schedule computation returns undefined (#66019)", () => {
const startedAt = Date.parse("2026-04-13T15:40:00.000Z");
const endedAt = startedAt + 50;
const state = createCronServiceState({
cronEnabled: true,
storePath: "/tmp/cron-66019-success.json",
log: noopLogger,
nowMs: () => endedAt,
enqueueSystemEvent: vi.fn(),
requestHeartbeatNow: vi.fn(),
runIsolatedAgentJob: createDefaultIsolatedRunner(),
});
const job = createIsolatedRegressionJob({
id: "cron-66019-success",
name: "cron-66019-success",
scheduledAt: startedAt,
schedule: { kind: "cron", expr: "0 7 * * *", tz: "Asia/Shanghai" },
payload: { kind: "agentTurn", message: "ping" },
state: { nextRunAtMs: startedAt - 1_000, runningAtMs: startedAt - 500 },
});
const nextRunSpy = vi.spyOn(schedule, "computeNextRunAtMs").mockReturnValue(undefined);
try {
const shouldDelete = applyJobResult(state, job, {
status: "ok",
delivered: true,
startedAt,
endedAt,
});
expect(shouldDelete).toBe(false);
expect(job.state.runningAtMs).toBeUndefined();
expect(job.state.lastRunAtMs).toBe(startedAt);
expect(job.state.lastStatus).toBe("ok");
expect(job.state.nextRunAtMs).toBeUndefined();
expect(job.enabled).toBe(true);
} finally {
nextRunSpy.mockRestore();
}
});
it("does not synthesize backoff retries when cron schedule computation returns undefined (#66019)", () => {
const startedAt = Date.parse("2026-04-13T15:45:00.000Z");
const endedAt = startedAt + 25;
const state = createCronServiceState({
cronEnabled: true,
storePath: "/tmp/cron-66019-error.json",
log: noopLogger,
nowMs: () => endedAt,
enqueueSystemEvent: vi.fn(),
requestHeartbeatNow: vi.fn(),
runIsolatedAgentJob: createDefaultIsolatedRunner(),
});
const job = createIsolatedRegressionJob({
id: "cron-66019-error",
name: "cron-66019-error",
scheduledAt: startedAt,
schedule: { kind: "cron", expr: "0 7 * * *", tz: "Asia/Shanghai" },
payload: { kind: "agentTurn", message: "ping" },
state: { nextRunAtMs: startedAt - 1_000, runningAtMs: startedAt - 500 },
});
const nextRunSpy = vi.spyOn(schedule, "computeNextRunAtMs").mockReturnValue(undefined);
try {
const shouldDelete = applyJobResult(state, job, {
status: "error",
error: "synthetic failure",
startedAt,
endedAt,
});
expect(shouldDelete).toBe(false);
expect(job.state.runningAtMs).toBeUndefined();
expect(job.state.lastRunAtMs).toBe(startedAt);
expect(job.state.lastStatus).toBe("error");
expect(job.state.consecutiveErrors).toBe(1);
expect(job.state.nextRunAtMs).toBeUndefined();
expect(job.enabled).toBe(true);
} finally {
nextRunSpy.mockRestore();
}
});
it("force run preserves 'every' anchor while recording manual lastRunAtMs", () => {
const nowMs = Date.now();
const everyMs = 24 * 60 * 60 * 1_000;

View File

@@ -240,6 +240,27 @@ function isTransientCronError(error: string | undefined, retryOn?: CronRetryOn[]
return keys.some((k) => TRANSIENT_PATTERNS[k]?.test(error));
}
function resolveCronNextRunWithLowerBound(params: {
state: CronServiceState;
job: CronJob;
naturalNext: number | undefined;
lowerBoundMs: number;
context: "completion" | "error_backoff";
}): number | undefined {
if (params.naturalNext === undefined) {
params.state.deps.log.warn(
{
jobId: params.job.id,
jobName: params.job.name,
context: params.context,
},
"cron: next run unresolved; clearing schedule to avoid a refire loop",
);
return undefined;
}
return Math.max(params.naturalNext, params.lowerBoundMs);
}
function resolveRetryConfig(cronConfig?: CronConfig) {
const retry = cronConfig?.retry;
return {
@@ -518,7 +539,17 @@ export function applyJobResult(
const backoffNext = result.endedAt + backoff;
// Use whichever is later: the natural next run or the backoff delay.
job.state.nextRunAtMs =
normalNext !== undefined ? Math.max(normalNext, backoffNext) : backoffNext;
job.schedule.kind === "cron"
? resolveCronNextRunWithLowerBound({
state,
job,
naturalNext: normalNext,
lowerBoundMs: backoffNext,
context: "error_backoff",
})
: normalNext !== undefined
? Math.max(normalNext, backoffNext)
: backoffNext;
state.deps.log.info(
{
jobId: job.id,
@@ -547,8 +578,13 @@ export function applyJobResult(
// schedule computation lands in the same second due to
// timezone/croner edge cases (see #17821).
const minNext = result.endedAt + MIN_REFIRE_GAP_MS;
job.state.nextRunAtMs =
naturalNext !== undefined ? Math.max(naturalNext, minNext) : minNext;
job.state.nextRunAtMs = resolveCronNextRunWithLowerBound({
state,
job,
naturalNext,
lowerBoundMs: minNext,
context: "completion",
});
} else {
job.state.nextRunAtMs = naturalNext;
}
@@ -609,6 +645,14 @@ export function armTimer(state: CronServiceState) {
const withNextRun =
state.store?.jobs.filter((j) => j.enabled && hasScheduledNextRunAtMs(j.state.nextRunAtMs))
.length ?? 0;
if (enabledCount > 0) {
armRunningRecheckTimer(state);
state.deps.log.debug(
{ jobCount, enabledCount, withNextRun, delayMs: MAX_TIMER_DELAY_MS },
"cron: timer armed for maintenance recheck",
);
return;
}
state.deps.log.debug(
{ jobCount, enabledCount, withNextRun },
"cron: armTimer skipped - no jobs with nextRunAtMs",