Skip to content

Commit 3a43dcb

Browse files
twoethswemeetagain
andauthored
feat: track epoch transition steps time in metrics (#6143)
* feat: track epoch transition steps time in metrics * feat: also track beforeProcessEpoch and afterProcessEpoch * fix: do not need to time simple steps in processEpoch * fix: incorporate to single metric with step label * fix: pass metrics to processEpoch() * chore: use separate timer for each epoch transition step * chore: use blocks to separate timers --------- Co-authored-by: Cayman <caymannava@gmail.com>
1 parent d7357fd commit 3a43dcb

4 files changed

Lines changed: 90 additions & 16 deletions

File tree

packages/beacon-node/src/metrics/metrics/lodestar.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,12 @@ export function createLodestarMetrics(
286286
help: "Time to call commit after process a single epoch transition in seconds",
287287
buckets: [0.01, 0.05, 0.1, 0.2, 0.5, 0.75, 1],
288288
}),
289+
epochTransitionStepTime: register.histogram<"step">({
290+
name: "lodestar_stfn_epoch_transition_step_seconds",
291+
help: "Time to call each step of epoch transition in seconds",
292+
labelNames: ["step"],
293+
buckets: [0.01, 0.05, 0.1, 0.2, 0.5, 0.75, 1],
294+
}),
289295
processBlockTime: register.histogram({
290296
name: "lodestar_stfn_process_block_seconds",
291297
help: "Time to process a single block in seconds",

packages/state-transition/src/epoch/index.ts

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
CachedBeaconStatePhase0,
1313
EpochTransitionCache,
1414
} from "../types.js";
15+
import {BeaconStateTransitionMetrics} from "../metrics.js";
1516
import {processEffectiveBalanceUpdates} from "./processEffectiveBalanceUpdates.js";
1617
import {processEth1DataReset} from "./processEth1DataReset.js";
1718
import {processHistoricalRootsUpdate} from "./processHistoricalRootsUpdate.js";
@@ -50,7 +51,12 @@ export {computeUnrealizedCheckpoints} from "./computeUnrealizedCheckpoints.js";
5051
const maxValidatorsPerStateSlashing = SLOTS_PER_EPOCH * MAX_ATTESTER_SLASHINGS * MAX_VALIDATORS_PER_COMMITTEE;
5152
const maxSafeValidators = Math.floor(Number.MAX_SAFE_INTEGER / MAX_EFFECTIVE_BALANCE);
5253

53-
export function processEpoch(fork: ForkSeq, state: CachedBeaconStateAllForks, cache: EpochTransitionCache): void {
54+
export function processEpoch(
55+
fork: ForkSeq,
56+
state: CachedBeaconStateAllForks,
57+
cache: EpochTransitionCache,
58+
metrics?: BeaconStateTransitionMetrics | null
59+
): void {
5460
// state.slashings is initially a Gwei (BigInt) vector, however since Nov 2023 it's converted to UintNum64 (number) vector in the state transition because:
5561
// - state.slashings[nextEpoch % EPOCHS_PER_SLASHINGS_VECTOR] is reset per epoch in processSlashingsReset()
5662
// - max slashed validators per epoch is SLOTS_PER_EPOCH * MAX_ATTESTER_SLASHINGS * MAX_VALIDATORS_PER_COMMITTEE which is 32 * 2 * 2048 = 131072 on mainnet
@@ -59,19 +65,53 @@ export function processEpoch(fork: ForkSeq, state: CachedBeaconStateAllForks, ca
5965
throw new Error("Lodestar does not support this network, parameters don't fit number value inside state.slashings");
6066
}
6167

62-
processJustificationAndFinalization(state, cache);
68+
{
69+
const timer = metrics?.epochTransitionStepTime.startTimer({
70+
step: "processJustificationAndFinalization",
71+
});
72+
processJustificationAndFinalization(state, cache);
73+
timer?.();
74+
}
75+
6376
if (fork >= ForkSeq.altair) {
77+
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processInactivityUpdates"});
6478
processInactivityUpdates(state as CachedBeaconStateAltair, cache);
79+
timer?.();
6580
}
81+
6682
// processRewardsAndPenalties() is 2nd step in the specs, we optimize to do it
6783
// after processSlashings() to update balances only once
6884
// processRewardsAndPenalties(state, cache);
69-
processRegistryUpdates(state, cache);
85+
{
86+
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processRegistryUpdates"});
87+
processRegistryUpdates(state, cache);
88+
timer?.();
89+
}
90+
7091
// accumulate slashing penalties and only update balances once in processRewardsAndPenalties()
71-
const slashingPenalties = processSlashings(state, cache, false);
72-
processRewardsAndPenalties(state, cache, slashingPenalties);
92+
let slashingPenalties: number[];
93+
{
94+
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processSlashings"});
95+
slashingPenalties = processSlashings(state, cache, false);
96+
timer?.();
97+
}
98+
99+
{
100+
const timer = metrics?.epochTransitionStepTime.startTimer({step: "processRewardsAndPenalties"});
101+
processRewardsAndPenalties(state, cache, slashingPenalties);
102+
timer?.();
103+
}
104+
73105
processEth1DataReset(state, cache);
74-
processEffectiveBalanceUpdates(state, cache);
106+
107+
{
108+
const timer = metrics?.epochTransitionStepTime.startTimer({
109+
step: "processEffectiveBalanceUpdates",
110+
});
111+
processEffectiveBalanceUpdates(state, cache);
112+
timer?.();
113+
}
114+
75115
processSlashingsReset(state, cache);
76116
processRandaoMixesReset(state, cache);
77117

@@ -84,7 +124,20 @@ export function processEpoch(fork: ForkSeq, state: CachedBeaconStateAllForks, ca
84124
if (fork === ForkSeq.phase0) {
85125
processParticipationRecordUpdates(state as CachedBeaconStatePhase0);
86126
} else {
87-
processParticipationFlagUpdates(state as CachedBeaconStateAltair);
88-
processSyncCommitteeUpdates(state as CachedBeaconStateAltair);
127+
{
128+
const timer = metrics?.epochTransitionStepTime.startTimer({
129+
step: "processParticipationFlagUpdates",
130+
});
131+
processParticipationFlagUpdates(state as CachedBeaconStateAltair);
132+
timer?.();
133+
}
134+
135+
{
136+
const timer = metrics?.epochTransitionStepTime.startTimer({
137+
step: "processSyncCommitteeUpdates",
138+
});
139+
processSyncCommitteeUpdates(state as CachedBeaconStateAltair);
140+
timer?.();
141+
}
89142
}
90143
}

packages/state-transition/src/metrics.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {AttesterStatus} from "./util/attesterStatus.js";
55
export type BeaconStateTransitionMetrics = {
66
epochTransitionTime: Histogram;
77
epochTransitionCommitTime: Histogram;
8+
epochTransitionStepTime: Histogram<"step">;
89
processBlockTime: Histogram;
910
processBlockCommitTime: Histogram;
1011
stateHashTreeRootTime: Histogram;
@@ -23,7 +24,7 @@ export type BeaconStateTransitionMetrics = {
2324
type LabelValues<T extends string> = Partial<Record<T, string | number>>;
2425

2526
interface Histogram<T extends string = string> {
26-
startTimer(): () => void;
27+
startTimer(labels?: LabelValues<T>): (labels?: LabelValues<T>) => number;
2728

2829
observe(value: number): void;
2930
observe(labels: LabelValues<T>, values: number): void;

packages/state-transition/src/stateTransition.ts

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import {toHexString} from "@chainsafe/ssz";
22
import {allForks, Slot, ssz} from "@lodestar/types";
33
import {SLOTS_PER_EPOCH} from "@lodestar/params";
44
import {BeaconStateTransitionMetrics, onPostStateMetrics, onStateCloneMetrics} from "./metrics.js";
5-
import {beforeProcessEpoch, EpochTransitionCacheOpts} from "./cache/epochTransitionCache.js";
5+
import {beforeProcessEpoch, EpochTransitionCache, EpochTransitionCacheOpts} from "./cache/epochTransitionCache.js";
66
import {
77
CachedBeaconStateAllForks,
88
CachedBeaconStatePhase0,
@@ -165,19 +165,33 @@ function processSlotsWithTransientCache(
165165

166166
const epochTransitionTimer = metrics?.epochTransitionTime.startTimer();
167167

168-
const epochTransitionCache = beforeProcessEpoch(postState, epochTransitionCacheOpts);
169-
processEpoch(fork, postState, epochTransitionCache);
168+
let epochTransitionCache: EpochTransitionCache;
169+
{
170+
const timer = metrics?.epochTransitionStepTime.startTimer({step: "beforeProcessEpoch"});
171+
epochTransitionCache = beforeProcessEpoch(postState, epochTransitionCacheOpts);
172+
timer?.();
173+
}
174+
175+
processEpoch(fork, postState, epochTransitionCache, metrics);
176+
170177
const {currentEpoch, statuses, balances} = epochTransitionCache;
171178
metrics?.registerValidatorStatuses(currentEpoch, statuses, balances);
172179

173180
postState.slot++;
174-
postState.epochCtx.afterProcessEpoch(postState, epochTransitionCache);
181+
182+
{
183+
const timer = metrics?.epochTransitionStepTime.startTimer({step: "afterProcessEpoch"});
184+
postState.epochCtx.afterProcessEpoch(postState, epochTransitionCache);
185+
timer?.();
186+
}
175187

176188
// Running commit here is not strictly necessary. The cost of running commit twice (here + after process block)
177189
// Should be negligible but gives better metrics to differentiate the cost of it for block and epoch proc.
178-
const epochTransitionCommitTimer = metrics?.epochTransitionCommitTime.startTimer();
179-
postState.commit();
180-
epochTransitionCommitTimer?.();
190+
{
191+
const timer = metrics?.epochTransitionCommitTime.startTimer();
192+
postState.commit();
193+
timer?.();
194+
}
181195

182196
// Note: time only on success. Include beforeProcessEpoch, processEpoch, afterProcessEpoch, commit
183197
epochTransitionTimer?.();

0 commit comments

Comments
 (0)