Skip to content

Commit 1183720

Browse files
committed
chore(1416): Publisher cleanup items
* Implemented proper condition wait for the publisher manager when there are no incoming data items to forward to messaging. * Added metrics for closed blocks (complete and incomplete). * Improved queue transfers * Added a node level configuration, and used that for "earliest managed block". Signed-off-by: Joseph S <121976561+jsync-swirlds@users.noreply.github.com>
1 parent fe2faa2 commit 1183720

File tree

7 files changed

+202
-80
lines changed

7 files changed

+202
-80
lines changed

block-node/app-config/src/main/java/module-info.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
com.swirlds.config.impl,
66
com.swirlds.config.extensions,
77
org.hiero.block.node.app;
8+
// export the node-wide configuration to everything.
9+
exports org.hiero.block.node.app.config.node;
810

911
requires transitive com.swirlds.config.api;
1012
requires com.swirlds.base;
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
package org.hiero.block.node.app.config.node;
3+
4+
import com.swirlds.config.api.ConfigData;
5+
import com.swirlds.config.api.ConfigProperty;
6+
import com.swirlds.config.api.validation.annotation.Min;
7+
import org.hiero.block.node.base.Loggable;
8+
9+
/**
10+
* Use this configuration for Node-wide configuration.
11+
* <p>
12+
* Node-wide configuration includes settings useful to _all_ or nearly all
13+
* plugins. Examples include the earliest block number managed by this node.
14+
*
15+
* @param earliestManagedBlock the block number for the earliest block managed
16+
* by this node. Blocks earlier than this might be present, but the node
17+
* should not make any particular effort to obtain or store them.
18+
*/
19+
@ConfigData("block.node")
20+
public record NodeConfig(@Loggable @ConfigProperty(defaultValue = "0") @Min(0) long earliestManagedBlock) {}

block-node/stream-publisher/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
requires transitive com.swirlds.metrics.api;
1717
requires transitive org.hiero.block.node.spi;
1818
requires transitive org.hiero.block.protobuf.pbj;
19+
requires org.hiero.block.node.app.config;
1920
requires org.hiero.block.node.base;
2021
requires com.github.spotbugs.annotations;
2122

block-node/stream-publisher/src/main/java/org/hiero/block/node/stream/publisher/LiveStreamPublisherManager.java

Lines changed: 106 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
package org.hiero.block.node.stream.publisher;
33

44
import static java.lang.System.Logger.Level.TRACE;
5-
import static java.util.concurrent.locks.LockSupport.parkNanos;
65
import static org.hiero.block.node.spi.BlockNodePlugin.METRICS_CATEGORY;
76
import static org.hiero.block.node.spi.BlockNodePlugin.UNKNOWN_BLOCK_NUMBER;
87

@@ -23,9 +22,13 @@
2322
import java.util.concurrent.ConcurrentSkipListMap;
2423
import java.util.concurrent.Future;
2524
import java.util.concurrent.LinkedTransferQueue;
25+
import java.util.concurrent.TimeUnit;
2626
import java.util.concurrent.atomic.AtomicLong;
27+
import java.util.concurrent.locks.Condition;
28+
import java.util.concurrent.locks.ReentrantLock;
2729
import org.hiero.block.api.PublishStreamResponse;
2830
import org.hiero.block.internal.BlockItemSetUnparsed;
31+
import org.hiero.block.node.app.config.node.NodeConfig;
2932
import org.hiero.block.node.spi.BlockNodeContext;
3033
import org.hiero.block.node.spi.blockmessaging.BlockItems;
3134
import org.hiero.block.node.spi.blockmessaging.BlockMessagingFacility;
@@ -39,6 +42,7 @@
3942
*/
4043
public final class LiveStreamPublisherManager implements StreamPublisherManager {
4144
private static final String QUEUE_ID_FORMAT = "Q%016d";
45+
private static final int DATA_READY_WAIT_MICROSECONDS = 500;
4246
// @todo(1413) utilize the logger
4347
private final System.Logger LOGGER = System.getLogger(LiveStreamPublisherManager.class.getName());
4448
private final MetricsHolder metrics;
@@ -48,6 +52,8 @@ public final class LiveStreamPublisherManager implements StreamPublisherManager
4852
private final AtomicLong nextHandlerId;
4953
private final ConcurrentMap<String, BlockingQueue<BlockItemSetUnparsed>> transferQueueMap;
5054
private final ConcurrentMap<Long, BlockingQueue<BlockItemSetUnparsed>> queueByBlockMap;
55+
private final Condition dataReadyLatch;
56+
private final ReentrantLock dataReadyLock;
5157

5258
/**
5359
* Future tracking the queue forwarder task.
@@ -81,6 +87,8 @@ public LiveStreamPublisherManager(
8187
currentStreamingBlockNumber = new AtomicLong(-1);
8288
nextUnstreamedBlockNumber = new AtomicLong(-1);
8389
lastPersistedBlockNumber = new AtomicLong(-1);
90+
dataReadyLock = new ReentrantLock();
91+
dataReadyLatch = dataReadyLock.newCondition();
8492
updateBlockNumbers(serverContext);
8593
}
8694

@@ -129,6 +137,25 @@ public BlockAction getActionForBlock(
129137
};
130138
}
131139

140+
@Override
141+
public void shutdown() {
142+
// Shut down all handlers and clear the queues.
143+
for (final Long nextKey : handlers.keySet()) {
144+
final PublisherHandler value = handlers.remove(nextKey);
145+
if (value != null) {
146+
value.closeCommunication();
147+
}
148+
}
149+
handlers.clear();
150+
transferQueueMap.clear();
151+
queueByBlockMap.clear();
152+
// Cancel the queue forwarder task if it is running.
153+
if (queueForwarderResult != null) {
154+
queueForwarderResult.cancel(true);
155+
queueForwarderResult = null;
156+
}
157+
}
158+
132159
/**
133160
* todo(1420) add documentation
134161
*/
@@ -167,13 +194,61 @@ private BlockAction addHandlerQueueForBlock(final long blockNumber, final long h
167194
if (queueForwarderResult == null) {
168195
queueForwarderResult = launchQueueForwarder();
169196
}
197+
// This should result in new data being available, so we
198+
// count down the data ready latch.
199+
signalDataReady();
170200
return BlockAction.ACCEPT;
171201
}
172202
}
173203
// Return the correct action if another handler jumped in front of the caller.
174204
return blockNumber < nextUnstreamedBlockNumber.get() ? BlockAction.SKIP : BlockAction.END_BEHIND;
175205
}
176206

207+
/*
208+
* Signal the data ready condition.
209+
* <p>
210+
* This method is called to indicate that data _might_ be available to be
211+
* sent to the messaging facility.<br/>
212+
* The messaging thread may wait on this condition to limit spin cycles
213+
* and still have a low impact on latency.
214+
*/
215+
private void signalDataReady() {
216+
dataReadyLock.lock();
217+
try {
218+
dataReadyLatch.signal();
219+
} finally {
220+
dataReadyLock.unlock();
221+
}
222+
}
223+
224+
/**
225+
* Wait for data to be ready.
226+
* <p>
227+
* This method will block until the data ready condition is signaled or
228+
* the timeout is reached.<br/>
229+
* This method is used (with {@link #signalDataReady()}) to limit spin
230+
* cycles and still have a low impact on latency.
231+
* <p>
232+
* When this method returns data _might_ be available to send to the
233+
* messaging facility, but it is not guaranteed.
234+
* <p>
235+
* Note
236+
* <blockquote>This method ignored interrupted exceptions as a specific
237+
* optimization to avoid unnecessarily ending a thread or causing failures
238+
* when interrupt is used as a signal rather than signaling the `Condition`
239+
* variable.</blockquote>
240+
*/
241+
private void waitForDataReady() {
242+
dataReadyLock.lock();
243+
try {
244+
dataReadyLatch.await(DATA_READY_WAIT_MICROSECONDS, TimeUnit.MICROSECONDS);
245+
} catch (InterruptedException e) {
246+
// just ignore interruption in this specific case.
247+
} finally {
248+
dataReadyLock.unlock();
249+
}
250+
}
251+
177252
/**
178253
* todo(1420) add documentation
179254
*/
@@ -185,6 +260,9 @@ private BlockAction getActionForCurrentlyStreaming(final long blockNumber) {
185260
// We'll have to skip the rest of this block.
186261
return BlockAction.SKIP;
187262
} else if (blockNumber >= currentStreamingBlockNumber.get() && blockNumber < nextUnstreamedBlockNumber.get()) {
263+
// This should result in new data being available, so we
264+
// count down the data ready latch.
265+
signalDataReady();
188266
// We're one of the handlers currently streaming, keep going.
189267
return BlockAction.ACCEPT;
190268
} else if (blockNumber == nextUnstreamedBlockNumber.get()) {
@@ -220,11 +298,13 @@ public void closeBlock(final BlockProof blockEndProof, final long handlerId) {
220298
if (queueForwarderResult == null || queueForwarderResult.isDone()) {
221299
queueForwarderResult = launchQueueForwarder();
222300
}
223-
// @todo(1416) complete tasks that do not require the block proof data here.
301+
// @todo(1416) complete tasks that do not require the block proof data here (before this line).
224302
if (blockEndProof == null) {
225303
// No point logging here, as the handler would have done that.
226304
// here we just update metrics.
305+
metrics.blocksClosedIncomplete.increment();
227306
} else {
307+
metrics.blocksClosedComplete.increment();
228308
// @todo(1413) Also log completed blocks metric and any other relevant
229309
// actions. Also check if we have incomplete blocks lower than the
230310
// block that completed, and possibly enter the resend process to
@@ -393,10 +473,7 @@ private static String getQueueNameForHandlerId(final long handlerId) {
393473
return QUEUE_ID_FORMAT.formatted(handlerId);
394474
}
395475

396-
// Somewhere we were supposed to set the first block number supported by
397-
// the block node. I don't know what happened to that config, but it seems
398-
// to be missing. I asked the question on the backfill PR as it's also
399-
// relevant there. The current streaming should be the next block to be
476+
// The current streaming should be the next block to be
400477
// streamed, but _only_ on startup. After that there should always be
401478
// a delta (next unstreamed must always be strictly greater than the current
402479
// streaming block number).
@@ -406,11 +483,12 @@ private void updateBlockNumbers(final BlockNodeContext serverContext) {
406483
// Always set the last persisted block number, even if there are no
407484
// known blocks.
408485
lastPersistedBlockNumber.set(latestKnownBlock);
486+
NodeConfig nodeConfiguration = serverContext.configuration().getConfigData(NodeConfig.class);
487+
final long earliestManagedBlock = nodeConfiguration.earliestManagedBlock();
409488
if (UNKNOWN_BLOCK_NUMBER == latestKnownBlock) {
410489
// if we have entered here, then we have no blocks available
411-
// @todo(1416) get below values from hiero config.
412-
currentStreamingBlockNumber.set(0L);
413-
nextUnstreamedBlockNumber.set(0L);
490+
currentStreamingBlockNumber.set(earliestManagedBlock);
491+
nextUnstreamedBlockNumber.set(earliestManagedBlock);
414492
} else {
415493
// if we have entered here, we know what the latest known block is,
416494
// so we can set the next unstreamed block number to one greater
@@ -483,15 +561,11 @@ public Long call() {
483561
}
484562
}
485563
// If the current block number has no batches to send, then
486-
// block on a count down latch until more data is available.
487-
// @todo(1416) need to figure out how to reset and set the countdown
488-
// latch... Until then, just park for 1/2 millisecond.
489-
// Park for 500 microseconds if there is no data available,
490-
// but not if the current block is completed (i.e. we just
491-
// sent a block proof).
564+
// block on a condition variable until more data is
565+
// _probably_ available, or 500 microseconds elapses.
492566
if (publisherManager.currentStreamingBlockNumber.get() == currentBlockNumber
493567
&& availableBatches.isEmpty()) {
494-
parkNanos(500_000); // Park for 500 microseconds
568+
publisherManager.waitForDataReady();
495569
}
496570
}
497571
}
@@ -501,25 +575,37 @@ public Long call() {
501575

502576
/**
503577
* Metrics for tracking publisher handler activity:
578+
* blockItemsMessaged - Number of block items delivered to the messaging service
579+
* currentPublisherCount - Number of currently connected publishers
504580
* lowestBlockNumber - Lowest incoming block number
505581
* currentBlockNumber - Current incoming block number
506582
* highestBlockNumber - Highest incoming block number
507583
* latestBlockNumberAcknowledged - The latest block number acknowledged
584+
* blocksClosedComplete - Number of blocks received complete (with both header and proof)
585+
* blocksClosedIncomplete - Number of blocks received incomplete (missing header or proof)
508586
*/
509587
public record MetricsHolder(
510588
Counter blockItemsMessaged,
511589
LongGauge currentPublisherCount,
512590
LongGauge lowestBlockNumber,
513591
LongGauge currentBlockNumber,
514592
LongGauge highestBlockNumber,
515-
LongGauge latestBlockNumberAcknowledged) {
593+
LongGauge latestBlockNumberAcknowledged,
594+
Counter blocksClosedComplete,
595+
Counter blocksClosedIncomplete) {
516596
/**
517597
* todo(1420) add documentation
518598
*/
519599
static MetricsHolder createMetrics(@NonNull final Metrics metrics) {
520600
final Counter blockItemsMessaged =
521601
metrics.getOrCreate(new Counter.Config(METRICS_CATEGORY, "publisher_block_items_messaged")
522602
.withDescription("Live block items messaged to the messaging service"));
603+
final Counter blocksClosedComplete =
604+
metrics.getOrCreate(new Counter.Config(METRICS_CATEGORY, "publisher_blocks_closed_complete")
605+
.withDescription("Blocks received complete (with both header and proof) by any Handler"));
606+
final Counter blocksClosedIncomplete =
607+
metrics.getOrCreate(new Counter.Config(METRICS_CATEGORY, "publisher_blocks_closed_incomplete")
608+
.withDescription("Blocks received incomplete (missing header or proof) by any Handler"));
523609
final LongGauge numberOfProducers =
524610
metrics.getOrCreate(new LongGauge.Config(METRICS_CATEGORY, "publisher_open_connections")
525611
.withDescription("Connected publishers"));
@@ -541,7 +627,9 @@ static MetricsHolder createMetrics(@NonNull final Metrics metrics) {
541627
lowestBlockNumber,
542628
currentBlockNumber,
543629
highestBlockNumber,
544-
latestBlockNumberAcknowledged);
630+
latestBlockNumberAcknowledged,
631+
blocksClosedComplete,
632+
blocksClosedIncomplete);
545633
}
546634
}
547635
}

0 commit comments

Comments
 (0)