From cf666b4e8d21d109f720d417b2f893097b6cb28d Mon Sep 17 00:00:00 2001 From: Diego Augusto Molina Date: Sat, 13 Sep 2025 03:39:34 -0300 Subject: [PATCH 1/6] optimize vm allocation of function arguments --- vm/vm.go | 119 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 92 insertions(+), 27 deletions(-) diff --git a/vm/vm.go b/vm/vm.go index ed61d2f90..ebe3d4413 100644 --- a/vm/vm.go +++ b/vm/vm.go @@ -83,6 +83,8 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) { vm.memory = 0 vm.ip = 0 + var fnArgsBuf []any + for vm.ip < len(program.Bytecode) { if debug && vm.debug { <-vm.step @@ -355,27 +357,24 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) { vm.push(out) case OpCall1: - a := vm.pop() - out, err := program.functions[arg](a) + args := vm.getArgsForFunc(&fnArgsBuf, program, 1) + out, err := program.functions[arg](args...) if err != nil { panic(err) } vm.push(out) case OpCall2: - b := vm.pop() - a := vm.pop() - out, err := program.functions[arg](a, b) + args := vm.getArgsForFunc(&fnArgsBuf, program, 2) + out, err := program.functions[arg](args...) if err != nil { panic(err) } vm.push(out) case OpCall3: - c := vm.pop() - b := vm.pop() - a := vm.pop() - out, err := program.functions[arg](a, b, c) + args := vm.getArgsForFunc(&fnArgsBuf, program, 3) + out, err := program.functions[arg](args...) if err != nil { panic(err) } @@ -383,12 +382,8 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) { case OpCallN: fn := vm.pop().(Function) - size := arg - in := make([]any, size) - for i := int(size) - 1; i >= 0; i-- { - in[i] = vm.pop() - } - out, err := fn(in...) + args := vm.getArgsForFunc(&fnArgsBuf, program, arg) + out, err := fn(args...) if err != nil { panic(err) } @@ -396,21 +391,13 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) { case OpCallFast: fn := vm.pop().(func(...any) any) - size := arg - in := make([]any, size) - for i := int(size) - 1; i >= 0; i-- { - in[i] = vm.pop() - } - vm.push(fn(in...)) + args := vm.getArgsForFunc(&fnArgsBuf, program, arg) + vm.push(fn(args...)) case OpCallSafe: fn := vm.pop().(SafeFunction) - size := arg - in := make([]any, size) - for i := int(size) - 1; i >= 0; i-- { - in[i] = vm.pop() - } - out, mem, err := fn(in...) + args := vm.getArgsForFunc(&fnArgsBuf, program, arg) + out, mem, err := fn(args...) if err != nil { panic(err) } @@ -609,6 +596,56 @@ func (vm *VM) scope() *Scope { return vm.Scopes[len(vm.Scopes)-1] } +// getArgsForFunc lazily initializes the buffer the first time it is called for +// a given program (thus, it also needs "program" to run). It will +// take "needed" elements from the buffer and populate them with vm.pop() in +// reverse order. Because the estimation can fall short, this function can +// occasionally make a new allocation. +func (vm *VM) getArgsForFunc(bufPtr *[]any, program *Program, needed int) []any { + // Step 1: fix estimations and preallocate + if *bufPtr == nil { + estimatedFnArgsCount := estimateFnArgsCount(program) + if estimatedFnArgsCount < needed { + // in the case that the first call is for example OpCallN with a large + // number of arguments, then make sure we will be able to serve them at + // least. + estimatedFnArgsCount = needed + } + + // in the case that we are preparing the arguments for the first + // function call of the program, then *bufPtr will be nil, so we + // initialize it. We delay this initial allocation here because a + // program could have many function calls but exit earlier than the + // first call, so in that case we avoid allocating unnecessarily + *bufPtr = make([]any, estimatedFnArgsCount) + } + + // Step 2: get the final slice that will be returned + var buf []any + if len(*bufPtr) >= needed { + // in this case, we are successfully using the single preallocation. We + // use the full slice expression [low : high : max] because in that way + // a function that receives this slice as variadic arguments will not be + // able to make modifications to contiguous elements with append(). If + // they call append on their variadic arguments they will make a new + // allocation. + buf = (*bufPtr)[:needed:needed] + *bufPtr = (*bufPtr)[needed:] // advance the buffer + } else { + // if we have been making calls to something like OpCallN with many more + // arguments than what we estimated, then we will need to allocate + // separately + buf = make([]any, needed) + } + + // Step 3: populate the final slice bulk copying from the stack. This is the + // exact order and copy() is a highly optimized operation + copy(buf, vm.Stack[len(vm.Stack)-needed:]) + vm.Stack = vm.Stack[:len(vm.Stack)-needed] + + return buf +} + func (vm *VM) Step() { vm.step <- struct{}{} } @@ -623,3 +660,31 @@ func clearSlice[S ~[]E, E any](s S) { s[i] = zero // clear mem, optimized by the compiler, in Go 1.21 the "clear" builtin can be used } } + +// estimateFnArgsCount inspects a *Program and estimates how many function +// arguments will be required to run it. +func estimateFnArgsCount(program *Program) int { + // Implementation note: a program will not necessarily go through all + // operations, but this is just an estimation + var count int + for _, op := range program.Bytecode { + switch op { + case OpCall1: + count++ + case OpCall2: + count += 2 + case OpCall3: + count += 3 + case OpCallN: + // we don't know exactly but we know at least 4, so be conservative + // as this is only an optimization and we also want to avoid + // excessive preallocation + count += 4 + case OpCallFast, OpCallSafe: + // here we don't know either, but we can guess it could be common to + // receive up to 3 arguments in a function + count += 3 + } + } + return count +} From 60d09effd34b210732717df31c949c7fad1e77eb Mon Sep 17 00:00:00 2001 From: Diego Augusto Molina Date: Sat, 13 Sep 2025 04:01:24 -0300 Subject: [PATCH 2/6] make estimation faster by using a table --- vm/vm.go | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/vm/vm.go b/vm/vm.go index ebe3d4413..f8d34b8a4 100644 --- a/vm/vm.go +++ b/vm/vm.go @@ -668,23 +668,22 @@ func estimateFnArgsCount(program *Program) int { // operations, but this is just an estimation var count int for _, op := range program.Bytecode { - switch op { - case OpCall1: - count++ - case OpCall2: - count += 2 - case OpCall3: - count += 3 - case OpCallN: - // we don't know exactly but we know at least 4, so be conservative - // as this is only an optimization and we also want to avoid - // excessive preallocation - count += 4 - case OpCallFast, OpCallSafe: - // here we don't know either, but we can guess it could be common to - // receive up to 3 arguments in a function - count += 3 + if int(op) < len(opArgLenEstimation) { + count += opArgLenEstimation[op] } } return count } + +var opArgLenEstimation = [...]int{ + OpCall1: 1, + OpCall2: 2, + OpCall3: 3, + // we don't know exactly but we know at least 4, so be conservative as this + // is only an optimization and we also want to avoid excessive preallocation + OpCallN: 4, + // here we don't know either, but we can guess it could be common to receive + // up to 3 arguments in a function + OpCallFast: 3, + OpCallSafe: 3, +} From d030d1e45f181d326fda08ae9d8734f86d32708c Mon Sep 17 00:00:00 2001 From: Diego Augusto Molina Date: Sat, 13 Sep 2025 04:21:07 -0300 Subject: [PATCH 3/6] add benchmarks --- vm/vm_bench_test.go | 82 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 vm/vm_bench_test.go diff --git a/vm/vm_bench_test.go b/vm/vm_bench_test.go new file mode 100644 index 000000000..bafbfb030 --- /dev/null +++ b/vm/vm_bench_test.go @@ -0,0 +1,82 @@ +package vm_test + +import ( + "runtime" + "testing" + + "github.com/expr-lang/expr" + "github.com/expr-lang/expr/checker" + "github.com/expr-lang/expr/compiler" + "github.com/expr-lang/expr/conf" + "github.com/expr-lang/expr/vm" +) + +func BenchmarkVM(b *testing.B) { + cases := []struct { + name, input string + }{ + {"function calls", ` +func( + func( + func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)), + func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)), + func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)), + ), + func( + func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)), + func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)), + func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)), + ), + func( + func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)), + func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)), + func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)), + ) +) + `}, + } + + a := new(recursive) + for i, b := 0, a; i < 40*4; i++ { + b.Inner = new(recursive) + b = b.Inner + } + + f := func(params ...any) (any, error) { return nil, nil } + env := map[string]any{ + "a": a, + "b": true, + "func": f, + } + config := conf.New(env) + expr.Function("func", f, f)(config) + config.Check() + + for _, c := range cases { + tree, err := checker.ParseCheck(c.input, config) + if err != nil { + b.Fatal(c.input, "parse and check", err) + } + prog, err := compiler.Compile(tree, config) + if err != nil { + b.Fatal(c.input, "compile", err) + } + //b.Logf("disassembled:\n%s", prog.Disassemble()) + //b.FailNow() + runtime.GC() + + var vm vm.VM + b.Run("name="+c.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, err = vm.Run(prog, env) + } + }) + if err != nil { + b.Fatal(err) + } + } +} + +type recursive struct { + Inner *recursive `expr:"a"` +} From 39e88fca705a4f25d1a270c34b67f892efe099b6 Mon Sep 17 00:00:00 2001 From: Diego Augusto Molina Date: Sat, 13 Sep 2025 04:41:40 -0300 Subject: [PATCH 4/6] avoid all allocations if no arguments are needed --- vm/vm.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vm/vm.go b/vm/vm.go index f8d34b8a4..c71ba8a64 100644 --- a/vm/vm.go +++ b/vm/vm.go @@ -602,6 +602,10 @@ func (vm *VM) scope() *Scope { // reverse order. Because the estimation can fall short, this function can // occasionally make a new allocation. func (vm *VM) getArgsForFunc(bufPtr *[]any, program *Program, needed int) []any { + if needed == 0 || bufPtr == nil && program == nil { + return nil + } + // Step 1: fix estimations and preallocate if *bufPtr == nil { estimatedFnArgsCount := estimateFnArgsCount(program) From 64d5d1c9647b185d12c07c87649263244d8c32e7 Mon Sep 17 00:00:00 2001 From: Diego Augusto Molina Date: Sat, 13 Sep 2025 12:10:01 -0300 Subject: [PATCH 5/6] simplify code and gain 2% speed --- vm/vm.go | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/vm/vm.go b/vm/vm.go index c71ba8a64..84fe9b6fa 100644 --- a/vm/vm.go +++ b/vm/vm.go @@ -357,7 +357,8 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) { vm.push(out) case OpCall1: - args := vm.getArgsForFunc(&fnArgsBuf, program, 1) + var args []any + args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, 1) out, err := program.functions[arg](args...) if err != nil { panic(err) @@ -365,7 +366,8 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) { vm.push(out) case OpCall2: - args := vm.getArgsForFunc(&fnArgsBuf, program, 2) + var args []any + args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, 2) out, err := program.functions[arg](args...) if err != nil { panic(err) @@ -373,7 +375,8 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) { vm.push(out) case OpCall3: - args := vm.getArgsForFunc(&fnArgsBuf, program, 3) + var args []any + args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, 3) out, err := program.functions[arg](args...) if err != nil { panic(err) @@ -382,7 +385,8 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) { case OpCallN: fn := vm.pop().(Function) - args := vm.getArgsForFunc(&fnArgsBuf, program, arg) + var args []any + args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, arg) out, err := fn(args...) if err != nil { panic(err) @@ -391,12 +395,14 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) { case OpCallFast: fn := vm.pop().(func(...any) any) - args := vm.getArgsForFunc(&fnArgsBuf, program, arg) + var args []any + args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, arg) vm.push(fn(args...)) case OpCallSafe: fn := vm.pop().(SafeFunction) - args := vm.getArgsForFunc(&fnArgsBuf, program, arg) + var args []any + args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, arg) out, mem, err := fn(args...) if err != nil { panic(err) @@ -601,13 +607,13 @@ func (vm *VM) scope() *Scope { // take "needed" elements from the buffer and populate them with vm.pop() in // reverse order. Because the estimation can fall short, this function can // occasionally make a new allocation. -func (vm *VM) getArgsForFunc(bufPtr *[]any, program *Program, needed int) []any { - if needed == 0 || bufPtr == nil && program == nil { - return nil +func (vm *VM) getArgsForFunc(argsBuf []any, program *Program, needed int) (args []any, argsBufOut []any) { + if needed == 0 || program == nil { + return nil, argsBuf } // Step 1: fix estimations and preallocate - if *bufPtr == nil { + if argsBuf == nil { estimatedFnArgsCount := estimateFnArgsCount(program) if estimatedFnArgsCount < needed { // in the case that the first call is for example OpCallN with a large @@ -617,24 +623,24 @@ func (vm *VM) getArgsForFunc(bufPtr *[]any, program *Program, needed int) []any } // in the case that we are preparing the arguments for the first - // function call of the program, then *bufPtr will be nil, so we + // function call of the program, then argsBuf will be nil, so we // initialize it. We delay this initial allocation here because a // program could have many function calls but exit earlier than the // first call, so in that case we avoid allocating unnecessarily - *bufPtr = make([]any, estimatedFnArgsCount) + argsBuf = make([]any, estimatedFnArgsCount) } // Step 2: get the final slice that will be returned var buf []any - if len(*bufPtr) >= needed { + if len(argsBuf) >= needed { // in this case, we are successfully using the single preallocation. We // use the full slice expression [low : high : max] because in that way // a function that receives this slice as variadic arguments will not be // able to make modifications to contiguous elements with append(). If // they call append on their variadic arguments they will make a new // allocation. - buf = (*bufPtr)[:needed:needed] - *bufPtr = (*bufPtr)[needed:] // advance the buffer + buf = (argsBuf)[:needed:needed] + argsBuf = (argsBuf)[needed:] // advance the buffer } else { // if we have been making calls to something like OpCallN with many more // arguments than what we estimated, then we will need to allocate @@ -647,7 +653,7 @@ func (vm *VM) getArgsForFunc(bufPtr *[]any, program *Program, needed int) []any copy(buf, vm.Stack[len(vm.Stack)-needed:]) vm.Stack = vm.Stack[:len(vm.Stack)-needed] - return buf + return buf, argsBuf } func (vm *VM) Step() { From 2fb1f53f2b10e4e08796380bc39ae5da89b83911 Mon Sep 17 00:00:00 2001 From: Diego Augusto Molina Date: Sat, 13 Sep 2025 17:46:01 -0300 Subject: [PATCH 6/6] add safety limit on preallocation --- vm/vm.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vm/vm.go b/vm/vm.go index 84fe9b6fa..338fbe341 100644 --- a/vm/vm.go +++ b/vm/vm.go @@ -17,6 +17,8 @@ import ( "github.com/expr-lang/expr/vm/runtime" ) +const maxFnArgsBuf = 256 + func Run(program *Program, env any) (any, error) { if program == nil { return nil, fmt.Errorf("program is nil") @@ -615,6 +617,10 @@ func (vm *VM) getArgsForFunc(argsBuf []any, program *Program, needed int) (args // Step 1: fix estimations and preallocate if argsBuf == nil { estimatedFnArgsCount := estimateFnArgsCount(program) + if estimatedFnArgsCount > maxFnArgsBuf { + // put a practical limit to avoid excessive preallocation + estimatedFnArgsCount = maxFnArgsBuf + } if estimatedFnArgsCount < needed { // in the case that the first call is for example OpCallN with a large // number of arguments, then make sure we will be able to serve them at