Skip to content

cmd, core, eth, triedb/pathdb: track node origins in the path database #32418

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,13 @@ type BlockChainConfig struct {
TrieNoAsyncFlush bool // Whether the asynchronous buffer flushing is disallowed
TrieJournalDirectory string // Directory path to the journal used for persisting trie data across node restarts

Preimages bool // Whether to store preimage of trie key to the disk
StateHistory uint64 // Number of blocks from head whose state histories are reserved.
StateScheme string // Scheme used to store ethereum states and merkle tree nodes on top
ArchiveMode bool // Whether to enable the archive mode
Preimages bool // Whether to store preimage of trie key to the disk
StateScheme string // Scheme used to store ethereum states and merkle tree nodes on top
ArchiveMode bool // Whether to enable the archive mode

// Number of blocks from the chain head for which state histories are retained.
// If set to 0, all state histories across the entire chain will be retained;
StateHistory uint64

// State snapshot related options
SnapshotLimit int // Memory allowance (MB) to use for caching snapshot entries in memory
Expand Down
17 changes: 15 additions & 2 deletions trie/trienode/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,11 +259,24 @@ func (set *MergedNodeSet) Merge(other *NodeSet) error {
return nil
}

// Flatten returns a two-dimensional map for internal nodes.
func (set *MergedNodeSet) Flatten() map[common.Hash]map[string]*Node {
// Nodes returns a two-dimensional map for internal nodes.
func (set *MergedNodeSet) Nodes() map[common.Hash]map[string]*Node {
nodes := make(map[common.Hash]map[string]*Node, len(set.Sets))
for owner, set := range set.Sets {
nodes[owner] = set.Nodes
}
return nodes
}

// NodeAndOrigins returns a two-dimensional map for internal nodes along with
// their original values.
func (set *MergedNodeSet) NodeAndOrigins() (map[common.Hash]map[string]*Node, map[common.Hash]map[string][]byte) {
var (
nodes = make(map[common.Hash]map[string]*Node, len(set.Sets))
origins = make(map[common.Hash]map[string][]byte, len(set.Sets))
)
for owner, set := range set.Sets {
nodes[owner], origins[owner] = set.Nodes, set.Origins
}
return nodes, origins
}
118 changes: 118 additions & 0 deletions triedb/pathdb/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Copyright 2025 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package pathdb

import (
"fmt"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/params"
)

const (
// defaultTrieCleanSize is the default memory allowance of clean trie cache.
defaultTrieCleanSize = 16 * 1024 * 1024

// defaultStateCleanSize is the default memory allowance of clean state cache.
defaultStateCleanSize = 16 * 1024 * 1024

// maxBufferSize is the maximum memory allowance of node buffer.
// Too large buffer will cause the system to pause for a long
// time when write happens. Also, the largest batch that pebble can
// support is 4GB, node will panic if batch size exceeds this limit.
maxBufferSize = 256 * 1024 * 1024

// defaultBufferSize is the default memory allowance of node buffer
// that aggregates the writes from above until it's flushed into the
// disk. It's meant to be used once the initial sync is finished.
// Do not increase the buffer size arbitrarily, otherwise the system
// pause time will increase when the database writes happen.
defaultBufferSize = 64 * 1024 * 1024
)

var (
// maxDiffLayers is the maximum diff layers allowed in the layer tree.
maxDiffLayers = 128
)

// Defaults contains default settings for Ethereum mainnet.
var Defaults = &Config{
StateHistory: params.FullImmutabilityThreshold,
EnableStateIndexing: false,
TrieCleanSize: defaultTrieCleanSize,
StateCleanSize: defaultStateCleanSize,
WriteBufferSize: defaultBufferSize,
}

// ReadOnly is the config in order to open database in read only mode.
var ReadOnly = &Config{
ReadOnly: true,
TrieCleanSize: defaultTrieCleanSize,
StateCleanSize: defaultStateCleanSize,
}

// Config contains the settings for database.
type Config struct {
StateHistory uint64 // Number of recent blocks to maintain state history for, 0: full chain
EnableStateIndexing bool // Whether to enable state history indexing for external state access
TrieCleanSize int // Maximum memory allowance (in bytes) for caching clean trie data
StateCleanSize int // Maximum memory allowance (in bytes) for caching clean state data
WriteBufferSize int // Maximum memory allowance (in bytes) for write buffer
ReadOnly bool // Flag whether the database is opened in read only mode
JournalDirectory string // Absolute path of journal directory (null means the journal data is persisted in key-value store)

// Testing configurations
SnapshotNoBuild bool // Flag Whether the state generation is disabled
NoAsyncFlush bool // Flag whether the background buffer flushing is disabled
NoAsyncGeneration bool // Flag whether the background generation is disabled
}

// sanitize checks the provided user configurations and changes anything that's
// unreasonable or unworkable.
func (c *Config) sanitize() *Config {
conf := *c
if conf.WriteBufferSize > maxBufferSize {
log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.WriteBufferSize), "updated", common.StorageSize(maxBufferSize))
conf.WriteBufferSize = maxBufferSize
}
return &conf
}

// fields returns a list of attributes of config for printing.
func (c *Config) fields() []interface{} {
var list []interface{}
if c.ReadOnly {
list = append(list, "readonly", true)
}
list = append(list, "triecache", common.StorageSize(c.TrieCleanSize))
list = append(list, "statecache", common.StorageSize(c.StateCleanSize))
list = append(list, "buffer", common.StorageSize(c.WriteBufferSize))

if c.StateHistory == 0 {
list = append(list, "state-history", "entire chain")
} else {
list = append(list, "state-history", fmt.Sprintf("last %d blocks", c.StateHistory))
}
if c.EnableStateIndexing {
list = append(list, "index-history", true)
}
if c.JournalDirectory != "" {
list = append(list, "journal-dir", c.JournalDirectory)
}
return list
}
94 changes: 3 additions & 91 deletions triedb/pathdb/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,37 +31,10 @@ import (
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/params"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/ethereum/go-verkle"
)

const (
// defaultTrieCleanSize is the default memory allowance of clean trie cache.
defaultTrieCleanSize = 16 * 1024 * 1024

// defaultStateCleanSize is the default memory allowance of clean state cache.
defaultStateCleanSize = 16 * 1024 * 1024

// maxBufferSize is the maximum memory allowance of node buffer.
// Too large buffer will cause the system to pause for a long
// time when write happens. Also, the largest batch that pebble can
// support is 4GB, node will panic if batch size exceeds this limit.
maxBufferSize = 256 * 1024 * 1024

// defaultBufferSize is the default memory allowance of node buffer
// that aggregates the writes from above until it's flushed into the
// disk. It's meant to be used once the initial sync is finished.
// Do not increase the buffer size arbitrarily, otherwise the system
// pause time will increase when the database writes happen.
defaultBufferSize = 64 * 1024 * 1024
)

var (
// maxDiffLayers is the maximum diff layers allowed in the layer tree.
maxDiffLayers = 128
)

// layer is the interface implemented by all state layers which includes some
// public methods and some additional methods for internal usage.
type layer interface {
Expand Down Expand Up @@ -105,76 +78,14 @@ type layer interface {
// the provided dirty trie nodes along with the state change set.
//
// Note, the maps are retained by the method to avoid copying everything.
update(root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer
update(root common.Hash, id uint64, block uint64, nodes *nodeSetWithOrigin, states *StateSetWithOrigin) *diffLayer

// journal commits an entire diff hierarchy to disk into a single journal entry.
// This is meant to be used during shutdown to persist the layer without
// flattening everything down (bad for reorgs).
journal(w io.Writer) error
}

// Config contains the settings for database.
type Config struct {
StateHistory uint64 // Number of recent blocks to maintain state history for
EnableStateIndexing bool // Whether to enable state history indexing for external state access
TrieCleanSize int // Maximum memory allowance (in bytes) for caching clean trie nodes
StateCleanSize int // Maximum memory allowance (in bytes) for caching clean state data
WriteBufferSize int // Maximum memory allowance (in bytes) for write buffer
ReadOnly bool // Flag whether the database is opened in read only mode
JournalDirectory string // Absolute path of journal directory (null means the journal data is persisted in key-value store)

// Testing configurations
SnapshotNoBuild bool // Flag Whether the state generation is allowed
NoAsyncFlush bool // Flag whether the background buffer flushing is allowed
NoAsyncGeneration bool // Flag whether the background generation is allowed
}

// sanitize checks the provided user configurations and changes anything that's
// unreasonable or unworkable.
func (c *Config) sanitize() *Config {
conf := *c
if conf.WriteBufferSize > maxBufferSize {
log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.WriteBufferSize), "updated", common.StorageSize(maxBufferSize))
conf.WriteBufferSize = maxBufferSize
}
return &conf
}

// fields returns a list of attributes of config for printing.
func (c *Config) fields() []interface{} {
var list []interface{}
if c.ReadOnly {
list = append(list, "readonly", true)
}
if c.SnapshotNoBuild {
list = append(list, "snapshot", false)
}
list = append(list, "triecache", common.StorageSize(c.TrieCleanSize))
list = append(list, "statecache", common.StorageSize(c.StateCleanSize))
list = append(list, "buffer", common.StorageSize(c.WriteBufferSize))

if c.StateHistory == 0 {
list = append(list, "history", "entire chain")
} else {
list = append(list, "history", fmt.Sprintf("last %d blocks", c.StateHistory))
}
if c.JournalDirectory != "" {
list = append(list, "journal-dir", c.JournalDirectory)
}
return list
}

// Defaults contains default settings for Ethereum mainnet.
var Defaults = &Config{
StateHistory: params.FullImmutabilityThreshold,
TrieCleanSize: defaultTrieCleanSize,
StateCleanSize: defaultStateCleanSize,
WriteBufferSize: defaultBufferSize,
}

// ReadOnly is the config in order to open database in read only mode.
var ReadOnly = &Config{ReadOnly: true}

// nodeHasher is the function to compute the hash of supplied node blob.
type nodeHasher func([]byte) (common.Hash, error)

Expand Down Expand Up @@ -419,7 +330,8 @@ func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint6
if err := db.modifyAllowed(); err != nil {
return err
}
if err := db.tree.add(root, parentRoot, block, nodes, states); err != nil {
// TODO(rjl493456442) tracking the origins in the following PRs.
if err := db.tree.add(root, parentRoot, block, NewNodeSetWithOrigin(nodes.Nodes(), nil), states); err != nil {
return err
}
// Keep 128 diff layers in the memory, persistent layer is 129th.
Expand Down
Loading