Benchmark Examples

CLI Examples

Test performance and convergence of operators and integrators via the command-line interface (cli).

1) Wall-clock microbenchmark

Measures average step time using high-resolution timing on macOS.

t_{\text{step}} = \frac{\text{elapsed}}{12000000}

local ffi = require("ffi")
local steps = 12000000
local ctx = ooc.create()

-- Setup for high-resolution timing on macOS
ffi.cdef [[
    uint64_t mach_absolute_time(void);
    typedef struct {
        uint32_t numer;
        uint32_t denom;
    } mach_timebase_info_data_t;
    int mach_timebase_info(mach_timebase_info_data_t *info);
]]

-- Allocate and populate timebase info
local info = ffi.new("mach_timebase_info_data_t")
ffi.C.mach_timebase_info(info)

-- High-resolution current time function
local function now()
    return tonumber(ffi.C.mach_absolute_time() * info.numer / info.denom) * 1e-9
end

ooc.log("Starting simulation with %d steps", steps)

-- Record start time
local t0 = now()

for i = 1, steps do
    ooc.step(ctx)
end

-- Calculate elapsed time
local elapsed = now() - t0

ooc.log("%d steps in %.3f s (%.16f ms/step)", steps, elapsed, elapsed * 1000 / steps)

ooc.shutdown(ctx)

return ctx

Example output:

oak@field % ./bin/cli --script wall-clock.lua
[INFO] Starting simulation with 12000000 steps
[INFO] 12000000 steps in 0.476 s (0.0000396332916668 ms/step)

2) Step metrics sweep

Collects the last $N$ step metrics and reports

\bar{\Delta t} = \frac{1}{N}\sum_{i=1}^{N} m_i^{\text{accepted}},

local steps = 12000000
local ctx = ooc.create()

ooc.log("Starting simulation with %d steps", steps)

for i = 1, steps do
    ooc.step(ctx)
end

local history = ooc.step_metrics_history(ctx, 128)

local avg_dt = 0
for _, m in ipairs(history) do
    avg_dt = avg_dt + m.accepted_dt
end
avg_dt = avg_dt / #history
ooc.log("Average dt over last %d steps: %.6f", #history, avg_dt)

ooc.shutdown(ctx)
return ctx

Example output:

oak@field % ./bin/cli --script step-metrics.lua
[INFO] Starting simulation with 12000000 steps
[INFO] Average dt over last 128 steps: 0.016667

3) Parameterized kernel timings

Simple CPU timing of two convolution kernels comparing $t_{3\text{-tap}}$ versus $t_{9\text{-tap}}$ to see how wider stencils scale.

local steps = 12000000
local N = 512
local ctx = ooc.create()

ooc.log("Starting simulation with %d steps", steps)

local u = ooc.add_field(ctx, {N}, {
    type = "complex_double",
    fill = {0.0, 0.0}
})

local v = ooc.add_field(ctx, {N}, {
    type = "complex_double",
    fill = {0.0, 0.0}
})

local op = ooc.add_minimal_convolution_operator(ctx, u, v)

local function timed_run(kernel_opts)
    ooc.minimal_convolution_update(ctx, 0, kernel_opts)
    local t0 = os.clock()
    for _ = 1, steps do
        ooc.step(ctx)
    end
    return os.clock() - t0
end

local base = timed_run({
    kernel_taps = 3
})

local wide = timed_run({
    kernel_taps = 9,
    kernel = "1,2,3,4,3,2,1,0,0"
})

ooc.log("3-tap: %.3f s, 9-tap: %.3f s", base, wide)

ooc.shutdown(ctx)
return ctx

Example output:

oak@field % ./bin/cli --script kernel-timings.lua
[INFO] Starting simulation with 12000000 steps
[INFO] 3-tap: 17.209 s, 9-tap: 46.522 s

4) Integrator comparison

Contrasts wall-clock runtime for RK4 and RKF45 so you can inspect ratios like $t_{\text{rkf45}} / t_{\text{rk4}}$ before committing to adaptive stepping.

local steps = 12000000
local N = 512
local dt = 0.01
local ctx = ooc.create()

ooc.set_timestep(ctx, dt)

ooc.log("Starting simulation with %d steps and dt = %.3f", steps, dt)

local function run_with(name, steps)
    local integrator = ooc.create_context_integrator(ctx, name, {
        initial_dt = dt,
        adaptive = (name == "rkf45")
    })

    ooc.set_integrator(ctx, integrator)

    local t0 = os.clock()
    for _ = 1, steps do
        ooc.integrator_step(ctx, integrator, dt)
    end

    local time = os.clock() - t0

    ooc.detach_integrator(ctx, integrator)

    return time
end

local t_rk4 = run_with("rk4", steps)
local t_rkf = run_with("rkf45", steps)

ooc.log("rk4 (fixed): %.3f s, rkf45 (adaptive): %.3f s", t_rk4, t_rkf)

ooc.shutdown(ctx)

return ctx

Example output:

oak@field % ./bin/cli --script integrator-comparison.lua
[INFO] Starting simulation with 12000000 steps and dt = 0.010
[INFO] rk4 (fixed): 1.141 s, rkf45 (adaptive): 1.135 s