mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-21 08:43:37 +02:00
fix(cron): stop unresolved next-run refire loops (#66083)
Merged via squash.
Prepared head SHA: b86ba58d3b
This commit is contained in:
@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Browser/CDP: let managed local Chrome readiness, status probes, and managed loopback CDP control bypass browser SSRF policy for their own loopback control plane, so OpenClaw no longer misclassifies a healthy child browser as "not reachable after start". (#65695, #66043) Thanks @mbelinky.
|
||||
- Gateway/sessions: stop heartbeat, cron-event, and exec-event turns from overwriting shared-session routing and origin metadata, preventing synthetic `heartbeat` targets from poisoning later cron or user delivery. (#63733, #35300)
|
||||
- Browser/CDP: let local attach-only `manual-cdp` profiles reuse the local loopback CDP control plane under strict default policy and remote-class probe timeouts, so tabs/snapshot stop falsely reporting a live local browser session as not running. (#65611, #66080) Thanks @mbelinky.
|
||||
- Cron/scheduler: stop inventing short retries when cron next-run calculation returns no valid future slot, and keep a maintenance wake armed so enabled unscheduled jobs recover without entering a refire loop. (#66019, #66083) Thanks @mbelinky.
|
||||
|
||||
## 2026.4.12
|
||||
|
||||
|
||||
@@ -141,6 +141,43 @@ describe("CronService - armTimer tight loop prevention", () => {
|
||||
timeoutSpy.mockRestore();
|
||||
});
|
||||
|
||||
it("keeps a maintenance wake armed when enabled jobs have no nextRunAtMs", () => {
|
||||
const timeoutSpy = vi.spyOn(globalThis, "setTimeout");
|
||||
const now = Date.parse("2026-02-28T12:32:00.000Z");
|
||||
|
||||
const state = createTimerState({
|
||||
storePath: "/tmp/test-cron/jobs.json",
|
||||
now,
|
||||
});
|
||||
state.store = {
|
||||
version: 1,
|
||||
jobs: [
|
||||
{
|
||||
id: "missing-next-run",
|
||||
name: "missing-next-run",
|
||||
enabled: true,
|
||||
deleteAfterRun: false,
|
||||
createdAtMs: now - 60_000,
|
||||
updatedAtMs: now - 60_000,
|
||||
schedule: { kind: "cron", expr: "*/15 * * * *" },
|
||||
sessionTarget: "isolated" as const,
|
||||
wakeMode: "next-heartbeat" as const,
|
||||
payload: { kind: "agentTurn" as const, message: "test" },
|
||||
delivery: { mode: "none" as const },
|
||||
state: {},
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
armTimer(state);
|
||||
|
||||
expect(state.timer).not.toBeNull();
|
||||
const delays = extractTimeoutDelays(timeoutSpy);
|
||||
expect(delays).toContain(60_000);
|
||||
|
||||
timeoutSpy.mockRestore();
|
||||
});
|
||||
|
||||
it("breaks the onTimer→armTimer hot-loop with stuck runningAtMs", async () => {
|
||||
const timeoutSpy = vi.spyOn(globalThis, "setTimeout");
|
||||
const store = await makeStorePath();
|
||||
|
||||
114
src/cron/service.issue-66019-unresolved-next-run.test.ts
Normal file
114
src/cron/service.issue-66019-unresolved-next-run.test.ts
Normal file
@@ -0,0 +1,114 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
createDefaultIsolatedRunner,
|
||||
createIsolatedRegressionJob,
|
||||
noopLogger,
|
||||
setupCronRegressionFixtures,
|
||||
writeCronJobs,
|
||||
} from "../../test/helpers/cron/service-regression-fixtures.js";
|
||||
import * as schedule from "./schedule.js";
|
||||
import { createCronServiceState } from "./service/state.js";
|
||||
import { onTimer } from "./service/timer.js";
|
||||
|
||||
const issue66019Fixtures = setupCronRegressionFixtures({ prefix: "cron-66019-" });
|
||||
|
||||
describe("#66019 unresolved next-run repro", () => {
|
||||
it("does not refire a recurring cron job 2s later when next-run resolution returns undefined", async () => {
|
||||
const store = issue66019Fixtures.makeStorePath();
|
||||
const scheduledAt = Date.parse("2026-04-13T15:40:00.000Z");
|
||||
let now = scheduledAt;
|
||||
|
||||
const cronJob = createIsolatedRegressionJob({
|
||||
id: "cron-66019-minimal-success",
|
||||
name: "cron-66019-minimal-success",
|
||||
scheduledAt,
|
||||
schedule: { kind: "cron", expr: "0 7 * * *", tz: "Asia/Shanghai" },
|
||||
payload: { kind: "agentTurn", message: "ping" },
|
||||
state: { nextRunAtMs: scheduledAt - 1_000 },
|
||||
});
|
||||
await writeCronJobs(store.storePath, [cronJob]);
|
||||
|
||||
const runIsolatedAgentJob = createDefaultIsolatedRunner();
|
||||
const nextRunSpy = vi.spyOn(schedule, "computeNextRunAtMs").mockReturnValue(undefined);
|
||||
const state = createCronServiceState({
|
||||
cronEnabled: true,
|
||||
storePath: store.storePath,
|
||||
log: noopLogger,
|
||||
nowMs: () => now,
|
||||
enqueueSystemEvent: vi.fn(),
|
||||
requestHeartbeatNow: vi.fn(),
|
||||
runIsolatedAgentJob,
|
||||
});
|
||||
|
||||
try {
|
||||
await onTimer(state);
|
||||
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1);
|
||||
expect(state.store?.jobs[0]?.state.nextRunAtMs).toBeUndefined();
|
||||
|
||||
// Before the fix, applyJobResult would synthesize endedAt + 2_000 here,
|
||||
// so a second tick a couple seconds later would refire the same job.
|
||||
now = scheduledAt + 2_001;
|
||||
await onTimer(state);
|
||||
|
||||
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1);
|
||||
expect(state.store?.jobs[0]?.state.nextRunAtMs).toBeUndefined();
|
||||
} finally {
|
||||
nextRunSpy.mockRestore();
|
||||
if (state.timer) {
|
||||
clearTimeout(state.timer);
|
||||
state.timer = null;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("does not refire a recurring errored cron job after the first backoff window when next-run resolution returns undefined", async () => {
|
||||
const store = issue66019Fixtures.makeStorePath();
|
||||
const scheduledAt = Date.parse("2026-04-13T15:45:00.000Z");
|
||||
let now = scheduledAt;
|
||||
|
||||
const cronJob = createIsolatedRegressionJob({
|
||||
id: "cron-66019-minimal-error",
|
||||
name: "cron-66019-minimal-error",
|
||||
scheduledAt,
|
||||
schedule: { kind: "cron", expr: "0 7 * * *", tz: "Asia/Shanghai" },
|
||||
payload: { kind: "agentTurn", message: "ping" },
|
||||
state: { nextRunAtMs: scheduledAt - 1_000 },
|
||||
});
|
||||
await writeCronJobs(store.storePath, [cronJob]);
|
||||
|
||||
const runIsolatedAgentJob = vi.fn().mockResolvedValue({
|
||||
status: "error",
|
||||
error: "synthetic failure",
|
||||
});
|
||||
const nextRunSpy = vi.spyOn(schedule, "computeNextRunAtMs").mockReturnValue(undefined);
|
||||
const state = createCronServiceState({
|
||||
cronEnabled: true,
|
||||
storePath: store.storePath,
|
||||
log: noopLogger,
|
||||
nowMs: () => now,
|
||||
enqueueSystemEvent: vi.fn(),
|
||||
requestHeartbeatNow: vi.fn(),
|
||||
runIsolatedAgentJob,
|
||||
});
|
||||
|
||||
try {
|
||||
await onTimer(state);
|
||||
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1);
|
||||
expect(state.store?.jobs[0]?.state.nextRunAtMs).toBeUndefined();
|
||||
|
||||
// Before the fix, the error branch would synthesize the first backoff
|
||||
// retry (30s), so the next tick after that window would rerun the job.
|
||||
now = scheduledAt + 30_001;
|
||||
await onTimer(state);
|
||||
|
||||
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(1);
|
||||
expect(state.store?.jobs[0]?.state.nextRunAtMs).toBeUndefined();
|
||||
} finally {
|
||||
nextRunSpy.mockRestore();
|
||||
if (state.timer) {
|
||||
clearTimeout(state.timer);
|
||||
state.timer = null;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -1061,11 +1061,11 @@ describe("cron service timer regressions", () => {
|
||||
expect(job.state.lastStatus).toBe("ok");
|
||||
expect(job.state.scheduleErrorCount).toBe(1);
|
||||
expect(job.state.lastError).toMatch(/^schedule error:/);
|
||||
expect(job.state.nextRunAtMs).toBe(endedAt + 2_000);
|
||||
expect(job.state.nextRunAtMs).toBeUndefined();
|
||||
expect(job.enabled).toBe(true);
|
||||
});
|
||||
|
||||
it("falls back to backoff schedule when cron next-run computation throws on error path (#30905)", () => {
|
||||
it("keeps state updates when cron next-run computation throws on error path (#30905)", () => {
|
||||
const startedAt = Date.parse("2026-03-02T12:05:00.000Z");
|
||||
const endedAt = startedAt + 25;
|
||||
const state = createCronServiceState({
|
||||
@@ -1100,10 +1100,93 @@ describe("cron service timer regressions", () => {
|
||||
expect(job.state.consecutiveErrors).toBe(1);
|
||||
expect(job.state.scheduleErrorCount).toBe(1);
|
||||
expect(job.state.lastError).toMatch(/^schedule error:/);
|
||||
expect(job.state.nextRunAtMs).toBe(endedAt + 30_000);
|
||||
expect(job.state.nextRunAtMs).toBeUndefined();
|
||||
expect(job.enabled).toBe(true);
|
||||
});
|
||||
|
||||
it("does not synthesize a 2s retry when cron schedule computation returns undefined (#66019)", () => {
|
||||
const startedAt = Date.parse("2026-04-13T15:40:00.000Z");
|
||||
const endedAt = startedAt + 50;
|
||||
const state = createCronServiceState({
|
||||
cronEnabled: true,
|
||||
storePath: "/tmp/cron-66019-success.json",
|
||||
log: noopLogger,
|
||||
nowMs: () => endedAt,
|
||||
enqueueSystemEvent: vi.fn(),
|
||||
requestHeartbeatNow: vi.fn(),
|
||||
runIsolatedAgentJob: createDefaultIsolatedRunner(),
|
||||
});
|
||||
const job = createIsolatedRegressionJob({
|
||||
id: "cron-66019-success",
|
||||
name: "cron-66019-success",
|
||||
scheduledAt: startedAt,
|
||||
schedule: { kind: "cron", expr: "0 7 * * *", tz: "Asia/Shanghai" },
|
||||
payload: { kind: "agentTurn", message: "ping" },
|
||||
state: { nextRunAtMs: startedAt - 1_000, runningAtMs: startedAt - 500 },
|
||||
});
|
||||
const nextRunSpy = vi.spyOn(schedule, "computeNextRunAtMs").mockReturnValue(undefined);
|
||||
|
||||
try {
|
||||
const shouldDelete = applyJobResult(state, job, {
|
||||
status: "ok",
|
||||
delivered: true,
|
||||
startedAt,
|
||||
endedAt,
|
||||
});
|
||||
|
||||
expect(shouldDelete).toBe(false);
|
||||
expect(job.state.runningAtMs).toBeUndefined();
|
||||
expect(job.state.lastRunAtMs).toBe(startedAt);
|
||||
expect(job.state.lastStatus).toBe("ok");
|
||||
expect(job.state.nextRunAtMs).toBeUndefined();
|
||||
expect(job.enabled).toBe(true);
|
||||
} finally {
|
||||
nextRunSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
|
||||
it("does not synthesize backoff retries when cron schedule computation returns undefined (#66019)", () => {
|
||||
const startedAt = Date.parse("2026-04-13T15:45:00.000Z");
|
||||
const endedAt = startedAt + 25;
|
||||
const state = createCronServiceState({
|
||||
cronEnabled: true,
|
||||
storePath: "/tmp/cron-66019-error.json",
|
||||
log: noopLogger,
|
||||
nowMs: () => endedAt,
|
||||
enqueueSystemEvent: vi.fn(),
|
||||
requestHeartbeatNow: vi.fn(),
|
||||
runIsolatedAgentJob: createDefaultIsolatedRunner(),
|
||||
});
|
||||
const job = createIsolatedRegressionJob({
|
||||
id: "cron-66019-error",
|
||||
name: "cron-66019-error",
|
||||
scheduledAt: startedAt,
|
||||
schedule: { kind: "cron", expr: "0 7 * * *", tz: "Asia/Shanghai" },
|
||||
payload: { kind: "agentTurn", message: "ping" },
|
||||
state: { nextRunAtMs: startedAt - 1_000, runningAtMs: startedAt - 500 },
|
||||
});
|
||||
const nextRunSpy = vi.spyOn(schedule, "computeNextRunAtMs").mockReturnValue(undefined);
|
||||
|
||||
try {
|
||||
const shouldDelete = applyJobResult(state, job, {
|
||||
status: "error",
|
||||
error: "synthetic failure",
|
||||
startedAt,
|
||||
endedAt,
|
||||
});
|
||||
|
||||
expect(shouldDelete).toBe(false);
|
||||
expect(job.state.runningAtMs).toBeUndefined();
|
||||
expect(job.state.lastRunAtMs).toBe(startedAt);
|
||||
expect(job.state.lastStatus).toBe("error");
|
||||
expect(job.state.consecutiveErrors).toBe(1);
|
||||
expect(job.state.nextRunAtMs).toBeUndefined();
|
||||
expect(job.enabled).toBe(true);
|
||||
} finally {
|
||||
nextRunSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
|
||||
it("force run preserves 'every' anchor while recording manual lastRunAtMs", () => {
|
||||
const nowMs = Date.now();
|
||||
const everyMs = 24 * 60 * 60 * 1_000;
|
||||
|
||||
@@ -240,6 +240,27 @@ function isTransientCronError(error: string | undefined, retryOn?: CronRetryOn[]
|
||||
return keys.some((k) => TRANSIENT_PATTERNS[k]?.test(error));
|
||||
}
|
||||
|
||||
function resolveCronNextRunWithLowerBound(params: {
|
||||
state: CronServiceState;
|
||||
job: CronJob;
|
||||
naturalNext: number | undefined;
|
||||
lowerBoundMs: number;
|
||||
context: "completion" | "error_backoff";
|
||||
}): number | undefined {
|
||||
if (params.naturalNext === undefined) {
|
||||
params.state.deps.log.warn(
|
||||
{
|
||||
jobId: params.job.id,
|
||||
jobName: params.job.name,
|
||||
context: params.context,
|
||||
},
|
||||
"cron: next run unresolved; clearing schedule to avoid a refire loop",
|
||||
);
|
||||
return undefined;
|
||||
}
|
||||
return Math.max(params.naturalNext, params.lowerBoundMs);
|
||||
}
|
||||
|
||||
function resolveRetryConfig(cronConfig?: CronConfig) {
|
||||
const retry = cronConfig?.retry;
|
||||
return {
|
||||
@@ -518,7 +539,17 @@ export function applyJobResult(
|
||||
const backoffNext = result.endedAt + backoff;
|
||||
// Use whichever is later: the natural next run or the backoff delay.
|
||||
job.state.nextRunAtMs =
|
||||
normalNext !== undefined ? Math.max(normalNext, backoffNext) : backoffNext;
|
||||
job.schedule.kind === "cron"
|
||||
? resolveCronNextRunWithLowerBound({
|
||||
state,
|
||||
job,
|
||||
naturalNext: normalNext,
|
||||
lowerBoundMs: backoffNext,
|
||||
context: "error_backoff",
|
||||
})
|
||||
: normalNext !== undefined
|
||||
? Math.max(normalNext, backoffNext)
|
||||
: backoffNext;
|
||||
state.deps.log.info(
|
||||
{
|
||||
jobId: job.id,
|
||||
@@ -547,8 +578,13 @@ export function applyJobResult(
|
||||
// schedule computation lands in the same second due to
|
||||
// timezone/croner edge cases (see #17821).
|
||||
const minNext = result.endedAt + MIN_REFIRE_GAP_MS;
|
||||
job.state.nextRunAtMs =
|
||||
naturalNext !== undefined ? Math.max(naturalNext, minNext) : minNext;
|
||||
job.state.nextRunAtMs = resolveCronNextRunWithLowerBound({
|
||||
state,
|
||||
job,
|
||||
naturalNext,
|
||||
lowerBoundMs: minNext,
|
||||
context: "completion",
|
||||
});
|
||||
} else {
|
||||
job.state.nextRunAtMs = naturalNext;
|
||||
}
|
||||
@@ -609,6 +645,14 @@ export function armTimer(state: CronServiceState) {
|
||||
const withNextRun =
|
||||
state.store?.jobs.filter((j) => j.enabled && hasScheduledNextRunAtMs(j.state.nextRunAtMs))
|
||||
.length ?? 0;
|
||||
if (enabledCount > 0) {
|
||||
armRunningRecheckTimer(state);
|
||||
state.deps.log.debug(
|
||||
{ jobCount, enabledCount, withNextRun, delayMs: MAX_TIMER_DELAY_MS },
|
||||
"cron: timer armed for maintenance recheck",
|
||||
);
|
||||
return;
|
||||
}
|
||||
state.deps.log.debug(
|
||||
{ jobCount, enabledCount, withNextRun },
|
||||
"cron: armTimer skipped - no jobs with nextRunAtMs",
|
||||
|
||||
Reference in New Issue
Block a user