Skip to content

Commit 3898ed5

Browse files
committed
chore(1416): Publisher cleanup items
* Implemented proper condition wait for the publisher manager when there are no incoming data items to forward to messaging. * Added metrics for closed blocks (complete and incomplete). * Improved queue transfers * Added a node level configuration, and used that for "earliest managed block". Signed-off-by: Joseph S <121976561+jsync-swirlds@users.noreply.github.com>
1 parent faad8ef commit 3898ed5

File tree

7 files changed

+185
-60
lines changed

7 files changed

+185
-60
lines changed

block-node/app-config/src/main/java/module-info.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
com.swirlds.config.impl,
66
com.swirlds.config.extensions,
77
org.hiero.block.node.app;
8+
// export the node-wide configuration to everything.
9+
exports org.hiero.block.node.app.config.node;
810

911
requires transitive com.swirlds.config.api;
1012
requires com.swirlds.base;
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
package org.hiero.block.node.app.config.node;
3+
4+
import com.swirlds.config.api.ConfigData;
5+
import com.swirlds.config.api.ConfigProperty;
6+
import com.swirlds.config.api.validation.annotation.Min;
7+
import org.hiero.block.node.base.Loggable;
8+
9+
/**
10+
* Use this configuration for Node-wide configuration.
11+
* <p>
12+
* Node-wide configuration includes settings useful to _all_ or nearly all
13+
* plugins. Examples include the earliest block number managed by this node.
14+
*
15+
* @param earliestManagedBlock the block number for the earliest block managed
16+
* by this node. Blocks earlier than this might be present, but the node
17+
* should not make any particular effort to obtain or store them.
18+
*/
19+
@ConfigData("block.node")
20+
public record NodeConfig(@Loggable @ConfigProperty(defaultValue = "0") @Min(0) long earliestManagedBlock) {}

block-node/stream-publisher/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
requires transitive com.swirlds.metrics.api;
1717
requires transitive org.hiero.block.node.spi;
1818
requires transitive org.hiero.block.protobuf.pbj;
19+
requires org.hiero.block.node.app.config;
1920
requires org.hiero.block.node.base;
2021
requires com.github.spotbugs.annotations;
2122

block-node/stream-publisher/src/main/java/org/hiero/block/node/stream/publisher/LiveStreamPublisherManager.java

Lines changed: 106 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
package org.hiero.block.node.stream.publisher;
33

44
import static java.lang.System.Logger.Level.TRACE;
5-
import static java.util.concurrent.locks.LockSupport.parkNanos;
65
import static org.hiero.block.node.spi.BlockNodePlugin.METRICS_CATEGORY;
76
import static org.hiero.block.node.spi.BlockNodePlugin.UNKNOWN_BLOCK_NUMBER;
87

@@ -23,9 +22,13 @@
2322
import java.util.concurrent.ConcurrentSkipListMap;
2423
import java.util.concurrent.Future;
2524
import java.util.concurrent.LinkedTransferQueue;
25+
import java.util.concurrent.TimeUnit;
2626
import java.util.concurrent.atomic.AtomicLong;
27+
import java.util.concurrent.locks.Condition;
28+
import java.util.concurrent.locks.ReentrantLock;
2729
import org.hiero.block.api.PublishStreamResponse;
2830
import org.hiero.block.internal.BlockItemSetUnparsed;
31+
import org.hiero.block.node.app.config.node.NodeConfig;
2932
import org.hiero.block.node.spi.BlockNodeContext;
3033
import org.hiero.block.node.spi.blockmessaging.BlockItems;
3134
import org.hiero.block.node.spi.blockmessaging.BlockMessagingFacility;
@@ -38,6 +41,7 @@
3841
*/
3942
public final class LiveStreamPublisherManager implements StreamPublisherManager {
4043
private static final String QUEUE_ID_FORMAT = "Q%016d";
44+
private static final int DATA_READY_WAIT_MICROSECONDS = 500;
4145
// @todo(1413) utilize the logger
4246
private final System.Logger LOGGER = System.getLogger(LiveStreamPublisherManager.class.getName());
4347
private final MetricsHolder metrics;
@@ -47,6 +51,8 @@ public final class LiveStreamPublisherManager implements StreamPublisherManager
4751
private final AtomicLong nextHandlerId;
4852
private final ConcurrentMap<String, BlockingQueue<BlockItemSetUnparsed>> transferQueueMap;
4953
private final ConcurrentMap<Long, BlockingQueue<BlockItemSetUnparsed>> queueByBlockMap;
54+
private final Condition dataReadyLatch;
55+
private final ReentrantLock dataReadyLock;
5056

5157
/**
5258
* Future tracking the queue forwarder task.
@@ -80,6 +86,8 @@ public LiveStreamPublisherManager(
8086
currentStreamingBlockNumber = new AtomicLong(-1);
8187
nextUnstreamedBlockNumber = new AtomicLong(-1);
8288
lastPersistedBlockNumber = new AtomicLong(-1);
89+
dataReadyLock = new ReentrantLock();
90+
dataReadyLatch = dataReadyLock.newCondition();
8391
updateBlockNumbers(serverContext);
8492
}
8593

@@ -128,6 +136,25 @@ public BlockAction getActionForBlock(
128136
};
129137
}
130138

139+
@Override
140+
public void shutdown() {
141+
// Shut down all handlers and clear the queues.
142+
for (final Long nextKey : handlers.keySet()) {
143+
final PublisherHandler value = handlers.remove(nextKey);
144+
if (value != null) {
145+
value.closeCommunication();
146+
}
147+
}
148+
handlers.clear();
149+
transferQueueMap.clear();
150+
queueByBlockMap.clear();
151+
// Cancel the queue forwarder task if it is running.
152+
if (queueForwarderResult != null) {
153+
queueForwarderResult.cancel(true);
154+
queueForwarderResult = null;
155+
}
156+
}
157+
131158
/**
132159
* todo(1420) add documentation
133160
*/
@@ -166,13 +193,61 @@ private BlockAction addHandlerQueueForBlock(final long blockNumber, final long h
166193
if (queueForwarderResult == null) {
167194
queueForwarderResult = launchQueueForwarder();
168195
}
196+
// This should result in new data being available, so we
197+
// count down the data ready latch.
198+
signalDataReady();
169199
return BlockAction.ACCEPT;
170200
}
171201
}
172202
// Return the correct action if another handler jumped in front of the caller.
173203
return blockNumber < nextUnstreamedBlockNumber.get() ? BlockAction.SKIP : BlockAction.END_BEHIND;
174204
}
175205

206+
/*
207+
* Signal the data ready condition.
208+
* <p>
209+
* This method is called to indicate that data _might_ be available to be
210+
* sent to the messaging facility.<br/>
211+
* The messaging thread may wait on this condition to limit spin cycles
212+
* and still have a low impact on latency.
213+
*/
214+
private void signalDataReady() {
215+
dataReadyLock.lock();
216+
try {
217+
dataReadyLatch.signal();
218+
} finally {
219+
dataReadyLock.unlock();
220+
}
221+
}
222+
223+
/**
224+
* Wait for data to be ready.
225+
* <p>
226+
* This method will block until the data ready condition is signaled or
227+
* the timeout is reached.<br/>
228+
* This method is used (with `signalDataReady`) to limit spin cycles and
229+
* still have a low impact on latency.
230+
* <p>
231+
* When this method returns data _might_ be available to send to the
232+
* messaging facility, but it is not guaranteed.
233+
* <p>
234+
* Note
235+
* <blockquote>This method ignored interrupted exceptions as a specific
236+
* optimization to avoid unnecessarily ending a thread or causing failures
237+
* when interrupt is used as a signal rather than signaling the `Condition`
238+
* variable.
239+
*/
240+
private void waitForDataReady() {
241+
dataReadyLock.lock();
242+
try {
243+
dataReadyLatch.await(DATA_READY_WAIT_MICROSECONDS, TimeUnit.MICROSECONDS);
244+
} catch (InterruptedException e) {
245+
// just ignore interruption in this specific case.
246+
} finally {
247+
dataReadyLock.unlock();
248+
}
249+
}
250+
176251
/**
177252
* todo(1420) add documentation
178253
*/
@@ -184,6 +259,9 @@ private BlockAction getActionForCurrentlyStreaming(final long blockNumber) {
184259
// We'll have to skip the rest of this block.
185260
return BlockAction.SKIP;
186261
} else if (blockNumber >= currentStreamingBlockNumber.get() && blockNumber < nextUnstreamedBlockNumber.get()) {
262+
// This should result in new data being available, so we
263+
// count down the data ready latch.
264+
signalDataReady();
187265
// We're one of the handlers currently streaming, keep going.
188266
return BlockAction.ACCEPT;
189267
} else if (blockNumber == nextUnstreamedBlockNumber.get()) {
@@ -219,11 +297,13 @@ public void closeBlock(final BlockProof blockEndProof, final long handlerId) {
219297
if (queueForwarderResult == null || queueForwarderResult.isDone()) {
220298
queueForwarderResult = launchQueueForwarder();
221299
}
222-
// @todo(1416) complete tasks that do not require the block proof data here.
300+
// @todo(1416) complete tasks that do not require the block proof data here (before this line).
223301
if (blockEndProof == null) {
224302
// No point logging here, as the handler would have done that.
225303
// here we just update metrics.
304+
metrics.blocksClosedIncomplete.increment();
226305
} else {
306+
metrics.blocksClosedComplete.increment();
227307
// @todo(1413) Also log completed blocks metric and any other relevant
228308
// actions. Also check if we have incomplete blocks lower than the
229309
// block that completed, and possibly enter the resend process to
@@ -379,10 +459,7 @@ private static String getQueueNameForHandlerId(final long handlerId) {
379459
return QUEUE_ID_FORMAT.formatted(handlerId);
380460
}
381461

382-
// Somewhere we were supposed to set the first block number supported by
383-
// the block node. I don't know what happened to that config, but it seems
384-
// to be missing. I asked the question on the backfill PR as it's also
385-
// relevant there. The current streaming should be the next block to be
462+
// The current streaming should be the next block to be
386463
// streamed, but _only_ on startup. After that there should always be
387464
// a delta (next unstreamed must always be strictly greater than the current
388465
// streaming block number).
@@ -392,11 +469,12 @@ private void updateBlockNumbers(final BlockNodeContext serverContext) {
392469
// Always set the last persisted block number, even if there are no
393470
// known blocks.
394471
lastPersistedBlockNumber.set(latestKnownBlock);
472+
NodeConfig nodeConfiguration = serverContext.configuration().getConfigData(NodeConfig.class);
473+
final long earliestManagedBlock = nodeConfiguration.earliestManagedBlock();
395474
if (UNKNOWN_BLOCK_NUMBER == latestKnownBlock) {
396475
// if we have entered here, then we have no blocks available
397-
// @todo(1416) get below values from hiero config.
398-
currentStreamingBlockNumber.set(0L);
399-
nextUnstreamedBlockNumber.set(0L);
476+
currentStreamingBlockNumber.set(earliestManagedBlock);
477+
nextUnstreamedBlockNumber.set(earliestManagedBlock);
400478
} else {
401479
// if we have entered here, we know what the latest known block is,
402480
// so we can set the next unstreamed block number to one greater
@@ -469,15 +547,11 @@ public Long call() {
469547
}
470548
}
471549
// If the current block number has no batches to send, then
472-
// block on a count down latch until more data is available.
473-
// @todo(1416) need to figure out how to reset and set the countdown
474-
// latch... Until then, just park for 1/2 millisecond.
475-
// Park for 500 microseconds if there is no data available,
476-
// but not if the current block is completed (i.e. we just
477-
// sent a block proof).
550+
// block on a condition variable until more data is
551+
// _probably_ available, or 500 microseconds elapses.
478552
if (publisherManager.currentStreamingBlockNumber.get() == currentBlockNumber
479553
&& availableBatches.isEmpty()) {
480-
parkNanos(500_000); // Park for 500 microseconds
554+
publisherManager.waitForDataReady();
481555
}
482556
}
483557
}
@@ -487,25 +561,37 @@ public Long call() {
487561

488562
/**
489563
* Metrics for tracking publisher handler activity:
564+
* blockItemsMessaged - Number of block items delivered to the messaging service
565+
* currentPublisherCount - Number of currently connected publishers
490566
* lowestBlockNumber - Lowest incoming block number
491567
* currentBlockNumber - Current incoming block number
492568
* highestBlockNumber - Highest incoming block number
493569
* latestBlockNumberAcknowledged - The latest block number acknowledged
570+
* blocksClosedComplete - Number of blocks received complete (with both header and proof)
571+
* blocksClosedIncomplete - Number of blocks received incomplete (missing header or proof)
494572
*/
495573
public record MetricsHolder(
496574
Counter blockItemsMessaged,
497575
LongGauge currentPublisherCount,
498576
LongGauge lowestBlockNumber,
499577
LongGauge currentBlockNumber,
500578
LongGauge highestBlockNumber,
501-
LongGauge latestBlockNumberAcknowledged) {
579+
LongGauge latestBlockNumberAcknowledged,
580+
Counter blocksClosedComplete,
581+
Counter blocksClosedIncomplete) {
502582
/**
503583
* todo(1420) add documentation
504584
*/
505585
static MetricsHolder createMetrics(@NonNull final Metrics metrics) {
506586
final Counter blockItemsMessaged =
507587
metrics.getOrCreate(new Counter.Config(METRICS_CATEGORY, "publisher_block_items_messaged")
508588
.withDescription("Live block items messaged to the messaging service"));
589+
final Counter blocksClosedComplete =
590+
metrics.getOrCreate(new Counter.Config(METRICS_CATEGORY, "publisher_blocks_closed_complete")
591+
.withDescription("Blocks received complete (with both header and proof) by any Handler"));
592+
final Counter blocksClosedIncomplete =
593+
metrics.getOrCreate(new Counter.Config(METRICS_CATEGORY, "publisher_blocks_closed_incomplete")
594+
.withDescription("Blocks received incomplete (missing header or proof) by any Handler"));
509595
final LongGauge numberOfProducers =
510596
metrics.getOrCreate(new LongGauge.Config(METRICS_CATEGORY, "publisher_open_connections")
511597
.withDescription("Connected publishers"));
@@ -527,7 +613,9 @@ static MetricsHolder createMetrics(@NonNull final Metrics metrics) {
527613
lowestBlockNumber,
528614
currentBlockNumber,
529615
highestBlockNumber,
530-
latestBlockNumberAcknowledged);
616+
latestBlockNumberAcknowledged,
617+
blocksClosedComplete,
618+
blocksClosedIncomplete);
531619
}
532620
}
533621
}

0 commit comments

Comments
 (0)