Skip to content

Commit bf8f63d

Browse files
authored
trie, core/state: introduce trie Prefetch for optimizing preload (#32134)
This pull introduces a `Prefetch` operation in the trie to prefetch trie nodes in parallel. It is used by the `triePrefetcher` to accelerate state loading and improve overall chain processing performance.
1 parent 9ce40d1 commit bf8f63d

File tree

10 files changed

+250
-37
lines changed

10 files changed

+250
-37
lines changed

core/state/database.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,19 @@ type Trie interface {
8181
// be returned.
8282
GetAccount(address common.Address) (*types.StateAccount, error)
8383

84+
// PrefetchAccount attempts to resolve specific accounts from the database
85+
// to accelerate subsequent trie operations.
86+
PrefetchAccount([]common.Address) error
87+
8488
// GetStorage returns the value for key stored in the trie. The value bytes
8589
// must not be modified by the caller. If a node was not found in the database,
8690
// a trie.MissingNodeError is returned.
8791
GetStorage(addr common.Address, key []byte) ([]byte, error)
8892

93+
// PrefetchStorage attempts to resolve specific storage slots from the database
94+
// to accelerate subsequent trie operations.
95+
PrefetchStorage(addr common.Address, keys [][]byte) error
96+
8997
// UpdateAccount abstracts an account write to the trie. It encodes the
9098
// provided account object with associated algorithm and then updates it
9199
// in the trie with provided address.

core/state/trie_prefetcher.go

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,10 @@ func (sf *subfetcher) loop() {
388388
sf.tasks = nil
389389
sf.lock.Unlock()
390390

391+
var (
392+
addresses []common.Address
393+
slots [][]byte
394+
)
391395
for _, task := range tasks {
392396
if task.addr != nil {
393397
key := *task.addr
@@ -400,6 +404,7 @@ func (sf *subfetcher) loop() {
400404
sf.dupsCross++
401405
continue
402406
}
407+
sf.seenReadAddr[key] = struct{}{}
403408
} else {
404409
if _, ok := sf.seenReadAddr[key]; ok {
405410
sf.dupsCross++
@@ -409,7 +414,9 @@ func (sf *subfetcher) loop() {
409414
sf.dupsWrite++
410415
continue
411416
}
417+
sf.seenWriteAddr[key] = struct{}{}
412418
}
419+
addresses = append(addresses, *task.addr)
413420
} else {
414421
key := *task.slot
415422
if task.read {
@@ -421,6 +428,7 @@ func (sf *subfetcher) loop() {
421428
sf.dupsCross++
422429
continue
423430
}
431+
sf.seenReadSlot[key] = struct{}{}
424432
} else {
425433
if _, ok := sf.seenReadSlot[key]; ok {
426434
sf.dupsCross++
@@ -430,25 +438,19 @@ func (sf *subfetcher) loop() {
430438
sf.dupsWrite++
431439
continue
432440
}
441+
sf.seenWriteSlot[key] = struct{}{}
433442
}
443+
slots = append(slots, key.Bytes())
434444
}
435-
if task.addr != nil {
436-
sf.trie.GetAccount(*task.addr)
437-
} else {
438-
sf.trie.GetStorage(sf.addr, (*task.slot)[:])
445+
}
446+
if len(addresses) != 0 {
447+
if err := sf.trie.PrefetchAccount(addresses); err != nil {
448+
log.Error("Failed to prefetch accounts", "err", err)
439449
}
440-
if task.read {
441-
if task.addr != nil {
442-
sf.seenReadAddr[*task.addr] = struct{}{}
443-
} else {
444-
sf.seenReadSlot[*task.slot] = struct{}{}
445-
}
446-
} else {
447-
if task.addr != nil {
448-
sf.seenWriteAddr[*task.addr] = struct{}{}
449-
} else {
450-
sf.seenWriteSlot[*task.slot] = struct{}{}
451-
}
450+
}
451+
if len(slots) != 0 {
452+
if err := sf.trie.PrefetchStorage(sf.addr, slots); err != nil {
453+
log.Error("Failed to prefetch storage", "err", err)
452454
}
453455
}
454456

core/state_prefetcher.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,6 @@ func (p *statePrefetcher) Prefetch(block *types.Block, statedb *state.StateDB, c
111111
fails.Add(1)
112112
return nil // Ugh, something went horribly wrong, bail out
113113
}
114-
// Pre-load trie nodes for the intermediate root.
115-
//
116-
// This operation incurs significant memory allocations due to
117-
// trie hashing and node decoding. TODO(rjl493456442): investigate
118-
// ways to mitigate this overhead.
119-
stateCpy.IntermediateRoot(true)
120114
return nil
121115
})
122116
}

trie/secure_trie.go

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -105,19 +105,6 @@ func (t *StateTrie) MustGet(key []byte) []byte {
105105
return t.trie.MustGet(crypto.Keccak256(key))
106106
}
107107

108-
// GetStorage attempts to retrieve a storage slot with provided account address
109-
// and slot key. The value bytes must not be modified by the caller.
110-
// If the specified storage slot is not in the trie, nil will be returned.
111-
// If a trie node is not found in the database, a MissingNodeError is returned.
112-
func (t *StateTrie) GetStorage(_ common.Address, key []byte) ([]byte, error) {
113-
enc, err := t.trie.Get(crypto.Keccak256(key))
114-
if err != nil || len(enc) == 0 {
115-
return nil, err
116-
}
117-
_, content, _, err := rlp.Split(enc)
118-
return content, err
119-
}
120-
121108
// GetAccount attempts to retrieve an account with provided account address.
122109
// If the specified account is not in the trie, nil will be returned.
123110
// If a trie node is not found in the database, a MissingNodeError is returned.
@@ -144,6 +131,39 @@ func (t *StateTrie) GetAccountByHash(addrHash common.Hash) (*types.StateAccount,
144131
return ret, err
145132
}
146133

134+
// PrefetchAccount attempts to resolve specific accounts from the database
135+
// to accelerate subsequent trie operations.
136+
func (t *StateTrie) PrefetchAccount(addresses []common.Address) error {
137+
var keys [][]byte
138+
for _, addr := range addresses {
139+
keys = append(keys, crypto.Keccak256(addr.Bytes()))
140+
}
141+
return t.trie.Prefetch(keys)
142+
}
143+
144+
// GetStorage attempts to retrieve a storage slot with provided account address
145+
// and slot key. The value bytes must not be modified by the caller.
146+
// If the specified storage slot is not in the trie, nil will be returned.
147+
// If a trie node is not found in the database, a MissingNodeError is returned.
148+
func (t *StateTrie) GetStorage(_ common.Address, key []byte) ([]byte, error) {
149+
enc, err := t.trie.Get(crypto.Keccak256(key))
150+
if err != nil || len(enc) == 0 {
151+
return nil, err
152+
}
153+
_, content, _, err := rlp.Split(enc)
154+
return content, err
155+
}
156+
157+
// PrefetchStorage attempts to resolve specific storage slots from the database
158+
// to accelerate subsequent trie operations.
159+
func (t *StateTrie) PrefetchStorage(_ common.Address, keys [][]byte) error {
160+
var keylist [][]byte
161+
for _, key := range keys {
162+
keylist = append(keylist, crypto.Keccak256(key))
163+
}
164+
return t.trie.Prefetch(keylist)
165+
}
166+
147167
// GetNode attempts to retrieve a trie node by compact-encoded path. It is not
148168
// possible to use keybyte-encoding as the path might contain odd nibbles.
149169
// If the specified trie node is not in the trie, nil will be returned.

trie/tracer.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package trie
1919
import (
2020
"maps"
2121
"slices"
22+
"sync"
2223
)
2324

2425
// opTracer tracks the changes of trie nodes. During the trie operations,
@@ -102,6 +103,7 @@ func (t *opTracer) deletedList() [][]byte {
102103
// handling the concurrency issues by themselves.
103104
type prevalueTracer struct {
104105
data map[string][]byte
106+
lock sync.RWMutex
105107
}
106108

107109
// newPrevalueTracer initializes the tracer for capturing resolved trie nodes.
@@ -115,18 +117,27 @@ func newPrevalueTracer() *prevalueTracer {
115117
// blob internally. Do not modify the value outside this function,
116118
// as it is not deep-copied.
117119
func (t *prevalueTracer) put(path []byte, val []byte) {
120+
t.lock.Lock()
121+
defer t.lock.Unlock()
122+
118123
t.data[string(path)] = val
119124
}
120125

121126
// get returns the cached trie node value. If the node is not found, nil will
122127
// be returned.
123128
func (t *prevalueTracer) get(path []byte) []byte {
129+
t.lock.RLock()
130+
defer t.lock.RUnlock()
131+
124132
return t.data[string(path)]
125133
}
126134

127135
// hasList returns a list of flags indicating whether the corresponding trie nodes
128136
// specified by the path exist in the trie.
129137
func (t *prevalueTracer) hasList(list [][]byte) []bool {
138+
t.lock.RLock()
139+
defer t.lock.RUnlock()
140+
130141
exists := make([]bool, 0, len(list))
131142
for _, path := range list {
132143
_, ok := t.data[string(path)]
@@ -137,16 +148,25 @@ func (t *prevalueTracer) hasList(list [][]byte) []bool {
137148

138149
// values returns a list of values of the cached trie nodes.
139150
func (t *prevalueTracer) values() [][]byte {
151+
t.lock.RLock()
152+
defer t.lock.RUnlock()
153+
140154
return slices.Collect(maps.Values(t.data))
141155
}
142156

143157
// reset resets the cached content in the prevalueTracer.
144158
func (t *prevalueTracer) reset() {
159+
t.lock.Lock()
160+
defer t.lock.Unlock()
161+
145162
clear(t.data)
146163
}
147164

148165
// copy returns a copied prevalueTracer instance.
149166
func (t *prevalueTracer) copy() *prevalueTracer {
167+
t.lock.RLock()
168+
defer t.lock.RUnlock()
169+
150170
// Shadow clone is used, as the cached trie node values are immutable
151171
return &prevalueTracer{
152172
data: maps.Clone(t.data),

trie/tracer_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ func testTrieOpTracer(t *testing.T, vals []struct{ k, v string }) {
7070
}
7171
insertSet := copySet(trie.opTracer.inserts) // copy before commit
7272
deleteSet := copySet(trie.opTracer.deletes) // copy before commit
73-
7473
root, nodes := trie.Commit(false)
7574
db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes))
7675

trie/transition.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,17 @@ func (t *TransitionTrie) GetStorage(addr common.Address, key []byte) ([]byte, er
7878
return t.base.GetStorage(addr, key)
7979
}
8080

81+
// PrefetchStorage attempts to resolve specific storage slots from the database
82+
// to accelerate subsequent trie operations.
83+
func (t *TransitionTrie) PrefetchStorage(addr common.Address, keys [][]byte) error {
84+
for _, key := range keys {
85+
if _, err := t.GetStorage(addr, key); err != nil {
86+
return err
87+
}
88+
}
89+
return nil
90+
}
91+
8192
// GetAccount abstract an account read from the trie.
8293
func (t *TransitionTrie) GetAccount(address common.Address) (*types.StateAccount, error) {
8394
data, err := t.overlay.GetAccount(address)
@@ -94,6 +105,17 @@ func (t *TransitionTrie) GetAccount(address common.Address) (*types.StateAccount
94105
return t.base.GetAccount(address)
95106
}
96107

108+
// PrefetchAccount attempts to resolve specific accounts from the database
109+
// to accelerate subsequent trie operations.
110+
func (t *TransitionTrie) PrefetchAccount(addresses []common.Address) error {
111+
for _, addr := range addresses {
112+
if _, err := t.GetAccount(addr); err != nil {
113+
return err
114+
}
115+
}
116+
return nil
117+
}
118+
97119
// UpdateStorage associates key with value in the trie. If value has length zero, any
98120
// existing value is deleted from the trie. The value bytes must not be modified
99121
// by the caller while they are stored in the trie.
@@ -173,7 +195,7 @@ func (t *TransitionTrie) IsVerkle() bool {
173195
return true
174196
}
175197

176-
// UpdateStems updates a group of values, given the stem they are using. If
198+
// UpdateStem updates a group of values, given the stem they are using. If
177199
// a value already exists, it is overwritten.
178200
func (t *TransitionTrie) UpdateStem(key []byte, values [][]byte) error {
179201
trie := t.overlay

trie/trie.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"github.com/ethereum/go-ethereum/log"
2929
"github.com/ethereum/go-ethereum/trie/trienode"
3030
"github.com/ethereum/go-ethereum/triedb/database"
31+
"golang.org/x/sync/errgroup"
3132
)
3233

3334
// Trie represents a Merkle Patricia Trie. Use New to create a trie that operates
@@ -194,6 +195,51 @@ func (t *Trie) get(origNode node, key []byte, pos int) (value []byte, newnode no
194195
}
195196
}
196197

198+
// Prefetch attempts to resolve the leaves and intermediate trie nodes
199+
// specified by the key list in parallel. The results are silently
200+
// discarded to simplify the function.
201+
func (t *Trie) Prefetch(keylist [][]byte) error {
202+
// Short circuit if the trie is already committed and not usable.
203+
if t.committed {
204+
return ErrCommitted
205+
}
206+
// Resolve the trie nodes sequentially if there are not too many
207+
// trie nodes in the trie.
208+
fn, ok := t.root.(*fullNode)
209+
if !ok || len(keylist) < 16 {
210+
for _, key := range keylist {
211+
_, err := t.Get(key)
212+
if err != nil {
213+
return err
214+
}
215+
}
216+
return nil
217+
}
218+
var (
219+
keys = make(map[byte][][]byte)
220+
eg errgroup.Group
221+
)
222+
for _, key := range keylist {
223+
hkey := keybytesToHex(key)
224+
keys[hkey[0]] = append(keys[hkey[0]], hkey)
225+
}
226+
for pos, ks := range keys {
227+
eg.Go(func() error {
228+
for _, k := range ks {
229+
_, newnode, didResolve, err := t.get(fn.Children[pos], k, 1)
230+
if err == nil && didResolve {
231+
fn.Children[pos] = newnode
232+
}
233+
if err != nil {
234+
return err
235+
}
236+
}
237+
return nil
238+
})
239+
}
240+
return eg.Wait()
241+
}
242+
197243
// MustGetNode is a wrapper of GetNode and will omit any encountered error but
198244
// just print out an error message.
199245
func (t *Trie) MustGetNode(path []byte) ([]byte, int) {

0 commit comments

Comments
 (0)