performance benchmark

+ Switching debug.Stack() → runtime.Callers() saved ~900 ns and ~1 KB per error — the formatted string is gone, just raw program counters stored
2026-04-23 19:48:28 +00:00 · 2026-04-23 19:48:28 +00:00 · 9599b8c0a3
commit 9599b8c0a3
parent e2b7e94847
2 changed files with 538 additions and 11 deletions
--- a/pkg/result/bench_test.go
+++ b/pkg/result/bench_test.go
@ -0,0 +1,488 @@
 package result_test
 // Comparison benchmark: canonical Go error handling vs the result package.
 //
 // Scenario: a "process record" pipeline with five steps and varying call-stack
 // depths. Four inputs exercise the happy path and failure at depths 3, 5, 10.
 //
 // Run:
 //
 //	go test -bench=. -benchmem ./pkg/result/
 //
 // What to look for:
 //   - HappyPath: result always spawns a goroutine (result.Go); that dominates.
 //   - FailDepthN: result pays goroutineID (runtime.Stack) + debug.Stack +
 //     sync.Map.Store + runtime.Goexit; canonical just returns an error.
 //   - Allocs: result error path allocates for the stack-trace string;
 //     canonical error path is typically zero additional allocs.
 import (
 	"errors"
 	"fmt"
 	"strconv"
 	"strings"
 	"testing"
 	"gitea.djmil.dev/go/template/pkg/result"
 )
 // ── Shared types ───────────────────────────────────────────────────────────────
 type bHeader struct{ raw, id, name, val string }
 type bFields struct {
 	id        int
 	name, val string
 }
 type bRecord struct {
 	id    int
 	name  string
 	score float64
 }
 type bOutput struct{ line string }
 // mockRsrc simulates a resource requiring cleanup (e.g. a file, DB conn).
 type mockRsrc struct{ closed bool }
 func (m *mockRsrc) close() { m.closed = true }
 // ── Inputs ─────────────────────────────────────────────────────────────────────
 const (
 	happy  = "42|Alice|100" // all steps succeed
 	fail3  = ""             // parseHeader fails at depth 3: empty input
 	fail5  = "0|Alice|100"  // validateFields fails at depth 5: id ≤ 0
 	fail10 = "42|Alice|-9"  // enrichRecord fails at depth 10: negative score
 )
 var (
 	errEmpty    = errors.New("empty input")
 	errNegScore = errors.New("negative score")
 )
 // ── Canonical: standard (value, error) error handling ──────────────────────────
 //
 // Each step returns (T, error); every call site does: if err != nil { return }.
 // The entry point uses goRun, a thin generic helper that mirrors result.Go
 // (spawns one goroutine, blocks, returns (T, error)) so both sides pay the
 // same goroutine-spawn cost and the benchmark isolates the error-propagation
 // mechanism only.
 // //go:noinline on intermediate frames prevents the compiler from collapsing
 // the declared stack depth, keeping the benchmark representative.
 // goRun is the canonical counterpart of result.Go: spawns a goroutine, blocks
 // until fn returns, and hands back (T, error). Both entry points are now
 // structurally identical — the only measured difference is how errors travel
 // through the call stack.
 func goRun[T any](fn func() (T, error)) (T, error) {
 	type ret struct {
 		v   T
 		err error
 	}
 	ch := make(chan ret, 1)
 	go func() {
 		v, err := fn()
 		ch <- ret{v, err}
 	}()
 	r := <-ch
 	return r.v, r.err
 }
 // parseHeader — 3 levels deep; fails on fail3 input.
 //
 //go:noinline
 func c_parseHeader(raw string) (bHeader, error) { return c_parseHeader2(raw) }
 //go:noinline
 func c_parseHeader2(raw string) (bHeader, error) { return c_parseHeader3(raw) }
 func c_parseHeader3(raw string) (bHeader, error) {
 	if raw == "" {
 		return bHeader{}, errEmpty
 	}
 	parts := strings.SplitN(raw, "|", 3)
 	if len(parts) != 3 {
 		return bHeader{}, fmt.Errorf("malformed record: %q", raw)
 	}
 	return bHeader{raw: raw, id: parts[0], name: parts[1], val: parts[2]}, nil
 }
 // validateFields — 5 levels deep; fails on fail5 input.
 //
 //go:noinline
 func c_validate(h bHeader) (bFields, error) { return c_validate2(h) }
 //go:noinline
 func c_validate2(h bHeader) (bFields, error) { return c_validate3(h) }
 //go:noinline
 func c_validate3(h bHeader) (bFields, error) { return c_validate4(h) }
 //go:noinline
 func c_validate4(h bHeader) (bFields, error) { return c_validate5(h) }
 func c_validate5(h bHeader) (bFields, error) {
 	id, err := strconv.Atoi(h.id)
 	if err != nil {
 		return bFields{}, fmt.Errorf("parse id %q: %w", h.id, err)
 	}
 	if id <= 0 {
 		return bFields{}, fmt.Errorf("id %d: must be > 0", id)
 	}
 	return bFields{id: id, name: h.name, val: h.val}, nil
 }
 // transformData — 5 levels deep; does not fail on any benchmark input.
 //
 //go:noinline
 func c_transform(f bFields) (bRecord, error) { return c_transform2(f) }
 //go:noinline
 func c_transform2(f bFields) (bRecord, error) { return c_transform3(f) }
 //go:noinline
 func c_transform3(f bFields) (bRecord, error) { return c_transform4(f) }
 //go:noinline
 func c_transform4(f bFields) (bRecord, error) { return c_transform5(f) }
 func c_transform5(f bFields) (bRecord, error) {
 	v, err := strconv.ParseFloat(f.val, 64)
 	if err != nil {
 		return bRecord{}, fmt.Errorf("parse value %q: %w", f.val, err)
 	}
 	return bRecord{id: f.id, name: f.name, score: v * 1.5}, nil
 }
 // enrichRecord — 10 levels deep; 1 defer at level 1; fails on fail10 input.
 //
 //go:noinline
 func c_enrich(r bRecord) (bRecord, error) {
 	res := &mockRsrc{}
 	defer res.close()
 	return c_enrich2(r)
 }
 //go:noinline
 func c_enrich2(r bRecord) (bRecord, error) { return c_enrich3(r) }
 //go:noinline
 func c_enrich3(r bRecord) (bRecord, error) { return c_enrich4(r) }
 //go:noinline
 func c_enrich4(r bRecord) (bRecord, error) { return c_enrich5(r) }
 //go:noinline
 func c_enrich5(r bRecord) (bRecord, error) { return c_enrich6(r) }
 //go:noinline
 func c_enrich6(r bRecord) (bRecord, error) { return c_enrich7(r) }
 //go:noinline
 func c_enrich7(r bRecord) (bRecord, error) { return c_enrich8(r) }
 //go:noinline
 func c_enrich8(r bRecord) (bRecord, error) { return c_enrich9(r) }
 //go:noinline
 func c_enrich9(r bRecord) (bRecord, error) { return c_enrich10(r) }
 func c_enrich10(r bRecord) (bRecord, error) {
 	if r.score < 0 {
 		return bRecord{}, errNegScore
 	}
 	return bRecord{id: r.id, name: r.name, score: r.score + 10.0}, nil
 }
 // formatOutput — 3 levels deep; does not fail on any benchmark input.
 //
 //go:noinline
 func c_format(r bRecord) (bOutput, error) { return c_format2(r) }
 //go:noinline
 func c_format2(r bRecord) (bOutput, error) { return c_format3(r) }
 func c_format3(r bRecord) (bOutput, error) {
 	return bOutput{line: fmt.Sprintf("%d|%s|%.2f", r.id, r.name, r.score)}, nil
 }
 // canonicalProcess is the pipeline entry point using canonical error handling.
 func canonicalProcess(raw string) (bOutput, error) {
 	return goRun(func() (bOutput, error) {
 		h, err := c_parseHeader(raw)
 		if err != nil {
 			return bOutput{}, fmt.Errorf("parse header: %w", err)
 		}
 		f, err := c_validate(h)
 		if err != nil {
 			return bOutput{}, fmt.Errorf("validate fields: %w", err)
 		}
 		r, err := c_transform(f)
 		if err != nil {
 			return bOutput{}, fmt.Errorf("transform: %w", err)
 		}
 		r, err = c_enrich(r)
 		if err != nil {
 			return bOutput{}, fmt.Errorf("enrich: %w", err)
 		}
 		out, err := c_format(r)
 		if err != nil {
 			return bOutput{}, fmt.Errorf("format: %w", err)
 		}
 		return out, nil
 	})
 }
 // ── Result: happy-path-oriented error handling ─────────────────────────────────
 //
 // Each step returns result.Expect[T]; the call site chains .Expect("ctx").
 // The entry point wraps everything in result.Go, which spawns one goroutine
 // and collects any Expect failure as a normal Go error.
 // parseHeader — 3 levels deep; fails on fail3 input.
 //
 //go:noinline
 func r_parseHeader(raw string) result.Expect[bHeader] { return r_parseHeader2(raw) }
 //go:noinline
 func r_parseHeader2(raw string) result.Expect[bHeader] { return r_parseHeader3(raw) }
 func r_parseHeader3(raw string) result.Expect[bHeader] {
 	if raw == "" {
 		return result.Fail[bHeader](errEmpty)
 	}
 	parts := strings.SplitN(raw, "|", 3)
 	if len(parts) != 3 {
 		return result.Fail[bHeader](fmt.Errorf("malformed record: %q", raw))
 	}
 	return result.Ok(bHeader{raw: raw, id: parts[0], name: parts[1], val: parts[2]})
 }
 // validateFields — 5 levels deep; fails on fail5 input.
 //
 //go:noinline
 func r_validate(h bHeader) result.Expect[bFields] { return r_validate2(h) }
 //go:noinline
 func r_validate2(h bHeader) result.Expect[bFields] { return r_validate3(h) }
 //go:noinline
 func r_validate3(h bHeader) result.Expect[bFields] { return r_validate4(h) }
 //go:noinline
 func r_validate4(h bHeader) result.Expect[bFields] { return r_validate5(h) }
 func r_validate5(h bHeader) result.Expect[bFields] {
 	id, err := strconv.Atoi(h.id)
 	if err != nil {
 		return result.Fail[bFields](fmt.Errorf("parse id %q: %w", h.id, err))
 	}
 	if id <= 0 {
 		return result.Fail[bFields](fmt.Errorf("id %d: must be > 0", id))
 	}
 	return result.Ok(bFields{id: id, name: h.name, val: h.val})
 }
 // transformData — 5 levels deep; does not fail on any benchmark input.
 //
 //go:noinline
 func r_transform(f bFields) result.Expect[bRecord] { return r_transform2(f) }
 //go:noinline
 func r_transform2(f bFields) result.Expect[bRecord] { return r_transform3(f) }
 //go:noinline
 func r_transform3(f bFields) result.Expect[bRecord] { return r_transform4(f) }
 //go:noinline
 func r_transform4(f bFields) result.Expect[bRecord] { return r_transform5(f) }
 func r_transform5(f bFields) result.Expect[bRecord] {
 	v, err := strconv.ParseFloat(f.val, 64)
 	if err != nil {
 		return result.Fail[bRecord](fmt.Errorf("parse value %q: %w", f.val, err))
 	}
 	return result.Ok(bRecord{id: f.id, name: f.name, score: v * 1.5})
 }
 // enrichRecord — 10 levels deep; 1 defer at level 1; fails on fail10 input.
 //
 //go:noinline
 func r_enrich(r bRecord) result.Expect[bRecord] {
 	res := &mockRsrc{}
 	defer res.close()
 	return r_enrich2(r)
 }
 //go:noinline
 func r_enrich2(r bRecord) result.Expect[bRecord] { return r_enrich3(r) }
 //go:noinline
 func r_enrich3(r bRecord) result.Expect[bRecord] { return r_enrich4(r) }
 //go:noinline
 func r_enrich4(r bRecord) result.Expect[bRecord] { return r_enrich5(r) }
 //go:noinline
 func r_enrich5(r bRecord) result.Expect[bRecord] { return r_enrich6(r) }
 //go:noinline
 func r_enrich6(r bRecord) result.Expect[bRecord] { return r_enrich7(r) }
 //go:noinline
 func r_enrich7(r bRecord) result.Expect[bRecord] { return r_enrich8(r) }
 //go:noinline
 func r_enrich8(r bRecord) result.Expect[bRecord] { return r_enrich9(r) }
 //go:noinline
 func r_enrich9(r bRecord) result.Expect[bRecord] { return r_enrich10(r) }
 func r_enrich10(r bRecord) result.Expect[bRecord] {
 	if r.score < 0 {
 		return result.Fail[bRecord](errNegScore)
 	}
 	return result.Ok(bRecord{id: r.id, name: r.name, score: r.score + 10.0})
 }
 // formatOutput — 3 levels deep; does not fail on any benchmark input.
 //
 //go:noinline
 func r_format(r bRecord) result.Expect[bOutput] { return r_format2(r) }
 //go:noinline
 func r_format2(r bRecord) result.Expect[bOutput] { return r_format3(r) }
 func r_format3(r bRecord) result.Expect[bOutput] {
 	return result.Ok(bOutput{line: fmt.Sprintf("%d|%s|%.2f", r.id, r.name, r.score)})
 }
 // resultProcess is the pipeline entry point using result-package error handling.
 func resultProcess(raw string) (bOutput, error) {
 	return result.Go(func() bOutput {
 		h := r_parseHeader(raw).Expect("parse header")
 		f := r_validate(h).Expect("validate fields")
 		r := r_transform(f).Expect("transform")
 		r = r_enrich(r).Expect("enrich")
 		return r_format(r).Expect("format")
 	}).Unwrap()
 }
 // ── Benchmarks ─────────────────────────────────────────────────────────────────
 var sinkOutput bOutput // prevents the compiler from eliminating pipeline work
 func BenchmarkCanonical_HappyPath(b *testing.B) {
 	b.ReportAllocs()
 	for b.Loop() {
 		out, err := canonicalProcess(happy)
 		if err != nil {
 			b.Fatal(err)
 		}
 		sinkOutput = out
 	}
 }
 func BenchmarkResult_HappyPath(b *testing.B) {
 	b.ReportAllocs()
 	for b.Loop() {
 		out, err := resultProcess(happy)
 		if err != nil {
 			b.Fatal(err)
 		}
 		sinkOutput = out
 	}
 }
 func BenchmarkCanonical_FailDepth3(b *testing.B) {
 	b.ReportAllocs()
 	for b.Loop() {
 		_, err := canonicalProcess(fail3)
 		if err == nil {
 			b.Fatal("expected error")
 		}
 	}
 }
 func BenchmarkResult_FailDepth3(b *testing.B) {
 	b.ReportAllocs()
 	for b.Loop() {
 		_, err := resultProcess(fail3)
 		if err == nil {
 			b.Fatal("expected error")
 		}
 	}
 }
 func BenchmarkCanonical_FailDepth5(b *testing.B) {
 	b.ReportAllocs()
 	for b.Loop() {
 		_, err := canonicalProcess(fail5)
 		if err == nil {
 			b.Fatal("expected error")
 		}
 	}
 }
 func BenchmarkResult_FailDepth5(b *testing.B) {
 	b.ReportAllocs()
 	for b.Loop() {
 		_, err := resultProcess(fail5)
 		if err == nil {
 			b.Fatal("expected error")
 		}
 	}
 }
 func BenchmarkCanonical_FailDepth10(b *testing.B) {
 	b.ReportAllocs()
 	for b.Loop() {
 		_, err := canonicalProcess(fail10)
 		if err == nil {
 			b.Fatal("expected error")
 		}
 	}
 }
 func BenchmarkResult_FailDepth10(b *testing.B) {
 	b.ReportAllocs()
 	for b.Loop() {
 		_, err := resultProcess(fail10)
 		if err == nil {
 			b.Fatal("expected error")
 		}
 	}
 }
 // ── CaptureStack=false variants ────────────────────────────────────────────────
 //
 // These show the floor: goroutine spawn + Goexit mechanism with no stack
 // capture. Set result.CaptureStack=false at startup to reach this level in
 // production. The happy-path cost is unchanged (CaptureStack is only
 // consulted on error paths).
 func BenchmarkResult_NoStack_HappyPath(b *testing.B) {
 	result.CaptureStack = false
 	defer func() { result.CaptureStack = true }()
 	b.ReportAllocs()
 	for b.Loop() {
 		out, err := resultProcess(happy)
 		if err != nil {
 			b.Fatal(err)
 		}
 		sinkOutput = out
 	}
 }
 func BenchmarkResult_NoStack_FailDepth3(b *testing.B) {
 	result.CaptureStack = false
 	defer func() { result.CaptureStack = true }()
 	b.ReportAllocs()
 	for b.Loop() {
 		_, err := resultProcess(fail3)
 		if err == nil {
 			b.Fatal("expected error")
 		}
 	}
 }
 func BenchmarkResult_NoStack_FailDepth10(b *testing.B) {
 	result.CaptureStack = false
 	defer func() { result.CaptureStack = true }()
 	b.ReportAllocs()
 	for b.Loop() {
 		_, err := resultProcess(fail10)
 		if err == nil {
 			b.Fatal("expected error")
 		}
 	}
 }
--- a/pkg/result/result.go
+++ b/pkg/result/result.go
@ -4,7 +4,7 @@ import (
 	"errors"
 	"fmt"
 	"runtime"
-	"runtime/debug"
+	"strings"
 	"sync"
 )
@ -18,6 +18,31 @@ type Expect[T any] struct {
 // keyed by goroutine ID. Entries are consumed by the enclosing Go or Run call.
 var gErrors sync.Map
 // CaptureStack controls whether Expect, Expectf, and Must capture a stack
 // trace at the failure site. Defaults to true. Set to false at program startup
 // (before spawning goroutines) to cut ~1.5 KB of allocation and most of the
 // error-path overhead; errors are still collected and propagated, just without
 // a trace. StackTrace will return an empty string for errors captured while
 // this is false.
 var CaptureStack = true
 // callers returns the program counters of the call stack starting at the
 // caller's caller, skipping skip frames above runtime.Callers itself.
 // Returns nil when CaptureStack is false.
 func callers(skip int) []uintptr {
 	if !CaptureStack {
 		return nil
 	}
 	var pcs [32]uintptr
 	n := runtime.Callers(skip, pcs[:])
 	if n == 0 {
 		return nil
 	}
 	cp := make([]uintptr, n)
 	copy(cp, pcs[:n])
 	return cp
 }
 // goroutineID returns the current goroutine's numeric ID by parsing the first
 // line of runtime.Stack output ("goroutine NNN [...]"). Called only on error
 // paths so the runtime.Stack overhead is acceptable.
@ -63,7 +88,7 @@ func (r Expect[T]) Err() error {
 // condition warrants an immediate crash.
 func (r Expect[T]) Must() T {
 	if r.err != nil {
-		panic(&stackError{err: r.err, stack: debug.Stack()})
+		panic(&stackError{err: r.err, stack: callers(3)})
 	}
 	return r.value
 }
@ -77,7 +102,7 @@ func (r Expect[T]) Expect(msg string) T {
 	if r.err != nil {
 		gErrors.Store(goroutineID(), &stackError{
 			err:   fmt.Errorf("%s: %w", msg, r.err),
-			stack: debug.Stack(),
+			stack: callers(3),
 		})
 		runtime.Goexit()
 	}
@ -92,7 +117,7 @@ func (r Expect[T]) Expectf(format string, args ...any) T {
 	if r.err != nil {
 		gErrors.Store(goroutineID(), &stackError{
 			err:   fmt.Errorf("%s: %w", fmt.Sprintf(format, args...), r.err),
-			stack: debug.Stack(),
+			stack: callers(3),
 		})
 		runtime.Goexit()
 	}
@ -197,21 +222,35 @@ func Catch(errp *error) {
 	panic(v) // not an error — let it propagate
 }
-// StackTrace returns the stack trace captured when [Expect.Expect],
+// StackTrace returns the call stack captured when [Expect.Expect],
 // [Expect.Expectf], or [Expect.Must] stored or panicked with an error.
-// Returns an empty string if err was not produced by this package.
+// Frames are formatted as "function\n\tfile:line\n" starting at the call
 // site of the failing Expect/Expectf/Must call.
 // Returns an empty string if err was not produced by this package or if
 // [CaptureStack] was false when the error was captured.
 func StackTrace(err error) string {
 	var s *stackError
-	if errors.As(err, &s) {
+	if !errors.As(err, &s) || len(s.stack) == 0 {
-		return string(s.stack)
+		return ""
 	}
-	return ""
+	frames := runtime.CallersFrames(s.stack)
 	var b strings.Builder
 	for {
 		f, more := frames.Next()
 		if f.Function != "runtime.goexit" {
 			fmt.Fprintf(&b, "%s\n\t%s:%d\n", f.Function, f.File, f.Line)
 		}
 		if !more {
 			break
 		}
 	}
 	return b.String()
 }
-// stackError wraps an error with a stack trace captured at the failure site.
+// stackError wraps an error with program counters captured at the failure site.
 type stackError struct {
 	err   error
-	stack []byte
+	stack []uintptr // nil when CaptureStack is false
 }
 func (s *stackError) Error() string { return s.err.Error() }