Skip to content

Commit 699832f

Browse files
author
Arunachalam Thirupathi
committed
Support Large Dictionary for OrcWriter
OrcWriter uses dictionary encoding for all columns until the writer's total dictionary memory exceeds the dictionaryMaxMemory - 4MB. Then starts abandoning the dictionary encodings. When running with large dictionary sizes (say 80 MB), and using long dictionary, the dictionary writer could retain 100's of MB before it will be abandoned. This change introduces new configuration parameters to control this behavior. 1. Make the 4 MB threshold when dictionary is almost full configurable. Large dictionary can configure this to something bigger. 2. When a dictionary column exceeds a certain dictionary size, measure if dictionary is effective and abandon it if it is not. 3. The setting 2 could affect existing writers, so introduce a 3rd setting on how often to do the dictionary effectiveness check. It is configured to INT_MAX to preserve existing behavior.
1 parent fe936d0 commit 699832f

File tree

6 files changed

+348
-57
lines changed

6 files changed

+348
-57
lines changed

presto-orc/src/main/java/com/facebook/presto/orc/DictionaryCompressionOptimizer.java

Lines changed: 109 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import com.google.common.annotations.VisibleForTesting;
1717
import com.google.common.primitives.Longs;
1818
import io.airlift.units.DataSize;
19-
import io.airlift.units.DataSize.Unit;
2019

2120
import java.util.ArrayList;
2221
import java.util.Iterator;
@@ -28,18 +27,48 @@
2827
import static com.google.common.base.Preconditions.checkArgument;
2928
import static com.google.common.base.Preconditions.checkState;
3029
import static com.google.common.collect.ImmutableList.toImmutableList;
30+
import static io.airlift.units.DataSize.Unit.MEGABYTE;
3131
import static java.lang.Math.toIntExact;
3232
import static java.util.Objects.requireNonNull;
3333

34+
/**
35+
* DictionaryCompressionOptimizer has 2 objectives:
36+
* 1) Bound the dictionary memory of the reader, when all columns are read. Reader's dictionary memory
37+
* should not exceed the dictionaryMemoryMaxBytesHigh.
38+
* 2) When dictionary encoding for a column produces size comparable to the direct encoding, choose
39+
* direct encoding over dictionary encoding. Dictionary encoding/decoding is memory and CPU intensive,
40+
* so for comparable column sizes, direct encoding is mostly better.
41+
* <p>
42+
* Note: Dictionary writer might use more memory as they over-allocate dictionary sizes as the writers
43+
* build dictionary as they see new data. The hash tables implementation in the dictionary writer's allocate
44+
* hash buckets in power of 2. So after a million entries, the overallocation consumes large amount of memory.
45+
* <p>
46+
* DictionaryCompressionOptimizer functionality can be controlled by the following configs to the constructor.
47+
* <p>
48+
* 1. dictionaryMemoryMaxBytes -> Max size of the dictionary when all columns are read. Note: Writer
49+
* might consume more memory due to the over-allocation.
50+
* <p>
51+
* 2. dictionaryMemoryAlmostFullRangeBytes -> When the dictionary size exceeds dictionaryMaxMemoryBytes
52+
* dictionary columns will be converted to direct to reduce the dictionary size. By setting a range
53+
* the stripe can be flushed, before the dictionary is full. When dictionary size is higher than
54+
* (dictionaryMemoryMaxBytes - dictionaryMemoryAlmostFullRangeBytes), it is considered almost full
55+
* and is ready for flushing. This setting is defined as a delta on dictionaryMemoryMaxBytes for backward compatibility.
56+
* <p>
57+
* 3. dictionaryUsefulCheckColumnSizeBytes -> Columns start with dictionary encoding and when the dictionary memory
58+
* is almost full, usefulness of the dictionary is measured. For large dictionaries (> 40 MB) the check
59+
* might happen very late and large dictionary might cause writer to OOM due to writer over allocating for
60+
* dictionary growth. When a dictionary for a column grows beyond the dictionaryUsefulCheckColumnSizeBytes the
61+
* dictionary usefulness check will be performed and if dictionary is not useful, it will be converted to direct.
62+
* <p>
63+
* 4. dictionaryUsefulCheckPerChunkFrequency -> dictionaryUsefulCheck could be costly if performed on every chunk.
64+
* The dictionaryUsefulCheck will be performed when a column dictionary is above the dictionaryUsefulCheckColumnSizeBytes
65+
* and per every dictionaryUsefulCheckPerChunkFrequency chunks written.
66+
*/
3467
public class DictionaryCompressionOptimizer
3568
{
3669
private static final double DICTIONARY_MIN_COMPRESSION_RATIO = 1.25;
3770

38-
// Instead of waiting for the dictionary to fill completely, which would force a column into
39-
// direct mode, close the stripe early assuming it has hit the minimum row count.
40-
static final DataSize DICTIONARY_MEMORY_MAX_RANGE = new DataSize(4, Unit.MEGABYTE);
41-
42-
static final DataSize DIRECT_COLUMN_SIZE_RANGE = new DataSize(4, Unit.MEGABYTE);
71+
static final DataSize DIRECT_COLUMN_SIZE_RANGE = new DataSize(4, MEGABYTE);
4372

4473
private final List<DictionaryColumnManager> allWriters;
4574
private final List<DictionaryColumnManager> directConversionCandidates = new ArrayList<>();
@@ -49,15 +78,21 @@ public class DictionaryCompressionOptimizer
4978
private final int stripeMaxRowCount;
5079
private final int dictionaryMemoryMaxBytesLow;
5180
private final int dictionaryMemoryMaxBytesHigh;
81+
private final int dictionaryUsefulCheckColumnSizeBytes;
82+
private final int dictionaryUsefulCheckPerChunkFrequency;
5283

5384
private int dictionaryMemoryBytes;
85+
private int dictionaryUsefulCheckCounter;
5486

5587
public DictionaryCompressionOptimizer(
5688
Set<? extends DictionaryColumn> writers,
5789
int stripeMinBytes,
5890
int stripeMaxBytes,
5991
int stripeMaxRowCount,
60-
int dictionaryMemoryMaxBytes)
92+
int dictionaryMemoryMaxBytes,
93+
int dictionaryMemoryAlmostFullRangeBytes,
94+
int dictionaryUsefulCheckColumnSizeBytes,
95+
int dictionaryUsefulCheckPerChunkFrequency)
6196
{
6297
requireNonNull(writers, "writers is null");
6398
this.allWriters = writers.stream()
@@ -74,9 +109,14 @@ public DictionaryCompressionOptimizer(
74109
this.stripeMaxRowCount = stripeMaxRowCount;
75110

76111
checkArgument(dictionaryMemoryMaxBytes >= 0, "dictionaryMemoryMaxBytes is negative");
112+
checkArgument(dictionaryMemoryAlmostFullRangeBytes >= 0, "dictionaryMemoryRangeBytes is negative");
77113
this.dictionaryMemoryMaxBytesHigh = dictionaryMemoryMaxBytes;
78-
this.dictionaryMemoryMaxBytesLow = (int) Math.max(dictionaryMemoryMaxBytes - DICTIONARY_MEMORY_MAX_RANGE.toBytes(), 0);
114+
this.dictionaryMemoryMaxBytesLow = Math.max(dictionaryMemoryMaxBytes - dictionaryMemoryAlmostFullRangeBytes, 0);
79115

116+
checkArgument(dictionaryUsefulCheckPerChunkFrequency >= 0, "dictionaryUsefulCheckPerChunkFrequency is negative");
117+
this.dictionaryUsefulCheckPerChunkFrequency = dictionaryUsefulCheckPerChunkFrequency;
118+
119+
this.dictionaryUsefulCheckColumnSizeBytes = dictionaryUsefulCheckColumnSizeBytes;
80120
directConversionCandidates.addAll(allWriters);
81121
}
82122

@@ -87,12 +127,12 @@ public int getDictionaryMemoryBytes()
87127

88128
public boolean isFull(long bufferedBytes)
89129
{
90-
// if the strip is big enough to flush, stop before we hit the absolute max, so we are
130+
// if the stripe is big enough to flush, stop before we hit the absolute max, so we are
91131
// not forced to convert a dictionary to direct to fit in memory
92132
if (bufferedBytes > stripeMinBytes) {
93133
return dictionaryMemoryBytes > dictionaryMemoryMaxBytesLow;
94134
}
95-
// strip is small, grow to the high water mark (so at the very least we have more information)
135+
// stripe is small, grow to the high watermark (so at the very least we have more information)
96136
return dictionaryMemoryBytes > dictionaryMemoryMaxBytesHigh;
97137
}
98138

@@ -107,30 +147,43 @@ public void reset()
107147
public void finalOptimize(int bufferedBytes)
108148
{
109149
updateDirectConversionCandidates();
110-
convertLowCompressionStreams(bufferedBytes);
150+
convertLowCompressionStreams(true, bufferedBytes);
111151
}
112152

113-
public void optimize(int bufferedBytes, int stripeRowCount)
153+
@VisibleForTesting
154+
boolean isUsefulCheckRequired(int dictionaryMemoryBytes)
114155
{
115-
// recompute the dictionary memory usage
116-
dictionaryMemoryBytes = allWriters.stream()
117-
.filter(writer -> !writer.isDirectEncoded())
118-
.mapToInt(DictionaryColumnManager::getDictionaryBytes)
119-
.sum();
156+
if (dictionaryMemoryBytes < dictionaryUsefulCheckColumnSizeBytes) {
157+
return false;
158+
}
120159

121-
// update the dictionary growth history
122-
allWriters.stream()
123-
.filter(writer -> !writer.isDirectEncoded())
124-
.forEach(column -> column.updateHistory(stripeRowCount));
160+
dictionaryUsefulCheckCounter++;
161+
if (dictionaryUsefulCheckCounter == dictionaryUsefulCheckPerChunkFrequency) {
162+
dictionaryUsefulCheckCounter = 0;
163+
return true;
164+
}
125165

126-
if (dictionaryMemoryBytes <= dictionaryMemoryMaxBytesLow) {
127-
return;
166+
return false;
167+
}
168+
169+
public void optimize(int bufferedBytes, int stripeRowCount)
170+
{
171+
// recompute the dictionary memory usage
172+
int totalDictionaryBytes = 0;
173+
for (DictionaryColumnManager writer : allWriters) {
174+
if (!writer.isDirectEncoded()) {
175+
totalDictionaryBytes += writer.getDictionaryBytes();
176+
writer.updateHistory(stripeRowCount);
177+
}
128178
}
179+
dictionaryMemoryBytes = totalDictionaryBytes;
129180

130-
updateDirectConversionCandidates();
181+
boolean isDictionaryAlmostFull = dictionaryMemoryBytes > dictionaryMemoryMaxBytesLow;
131182

132-
// before any further checks, convert all low compression streams
133-
bufferedBytes = convertLowCompressionStreams(bufferedBytes);
183+
if (isDictionaryAlmostFull || isUsefulCheckRequired(dictionaryMemoryBytes)) {
184+
updateDirectConversionCandidates();
185+
bufferedBytes = convertLowCompressionStreams(isDictionaryAlmostFull, bufferedBytes);
186+
}
134187

135188
if (dictionaryMemoryBytes <= dictionaryMemoryMaxBytesLow || bufferedBytes >= stripeMaxBytes) {
136189
return;
@@ -161,7 +214,7 @@ private void optimizeDictionaryColumns(int stripeRowCount, BufferedBytesCounter
161214
return;
162215
}
163216

164-
// if the stripe is larger then the minimum stripe size, we are not required to convert any more dictionary columns to direct
217+
// if the stripe is larger than the minimum stripe size, we are not required to convert any more dictionary columns to direct
165218
if (bufferedBytesCounter.getBufferedBytes() >= stripeMinBytes) {
166219
// check if we can get better compression by converting a dictionary column to direct. This can happen when then there are multiple
167220
// dictionary columns and one does not compress well, so if we convert it to direct we can continue to use the existing dictionaries
@@ -196,27 +249,35 @@ private boolean convertDictionaryColumn(BufferedBytesCounter bufferedBytesCounte
196249
}
197250

198251
@VisibleForTesting
199-
int convertLowCompressionStreams(int bufferedBytes)
252+
int convertLowCompressionStreams(boolean tryAllStreams, int bufferedBytes)
200253
{
201254
// convert all low compression column to direct
202255
Iterator<DictionaryColumnManager> iterator = directConversionCandidates.iterator();
203256
while (iterator.hasNext()) {
204257
DictionaryColumnManager dictionaryWriter = iterator.next();
205-
if (dictionaryWriter.getCompressionRatio() < DICTIONARY_MIN_COMPRESSION_RATIO) {
206-
int columnBufferedBytes = toIntExact(dictionaryWriter.getBufferedBytes());
207-
OptionalInt directBytes = tryConvertToDirect(dictionaryWriter, getMaxDirectBytes(bufferedBytes));
208-
iterator.remove();
209-
if (directBytes.isPresent()) {
210-
bufferedBytes = bufferedBytes + directBytes.getAsInt() - columnBufferedBytes;
211-
if (bufferedBytes >= stripeMaxBytes) {
212-
return bufferedBytes;
258+
if (tryAllStreams || dictionaryWriter.getDictionaryBytes() >= dictionaryUsefulCheckColumnSizeBytes) {
259+
if (dictionaryWriter.getCompressionRatio() < DICTIONARY_MIN_COMPRESSION_RATIO) {
260+
int columnBufferedBytes = toIntExact(dictionaryWriter.getBufferedBytes());
261+
OptionalInt directBytes = tryConvertToDirect(dictionaryWriter, getMaxDirectBytes(bufferedBytes));
262+
iterator.remove();
263+
if (directBytes.isPresent()) {
264+
bufferedBytes = bufferedBytes + directBytes.getAsInt() - columnBufferedBytes;
265+
if (bufferedBytes >= stripeMaxBytes) {
266+
return bufferedBytes;
267+
}
213268
}
214269
}
215270
}
216271
}
217272
return bufferedBytes;
218273
}
219274

275+
@VisibleForTesting
276+
List<DictionaryColumnManager> getDirectConversionCandidates()
277+
{
278+
return directConversionCandidates;
279+
}
280+
220281
private void updateDirectConversionCandidates()
221282
{
222283
// Writers can switch to Direct encoding internally. Remove them from direct conversion candidates.
@@ -255,14 +316,14 @@ private double currentCompressionRatio(int totalNonDictionaryBytes)
255316
}
256317

257318
/**
258-
* Choose a dictionary column to convert to direct encoding. We do this by predicting the compression ration
319+
* Choose a dictionary column to convert to direct encoding. We do this by predicting the compression ratio
259320
* of the stripe if a singe column is flipped to direct. So for each column, we try to predict the row count
260321
* when we will hit a stripe flush limit if that column were converted to direct. Once we know the row count, we
261322
* calculate the predicted compression ratio.
262323
*
263324
* @param totalNonDictionaryBytes current size of the stripe without non-dictionary columns
264325
* @param stripeRowCount current number of rows in the stripe
265-
* @return the column that would produce the best stripe compression ration if converted to direct
326+
* @return the column that would produce the best stripe compression ratio if converted to direct
266327
*/
267328
private DictionaryCompressionProjection selectDictionaryColumnToConvert(int totalNonDictionaryBytes, int stripeRowCount)
268329
{
@@ -305,7 +366,7 @@ private DictionaryCompressionProjection selectDictionaryColumnToConvert(int tota
305366
long currentIndexBytes = totalDictionaryIndexBytes - column.getIndexBytes();
306367
long currentTotalBytes = currentRawBytes + currentDictionaryBytes + currentIndexBytes;
307368

308-
// estimate the size of each new row if we were convert this column to direct
369+
// estimate the size of each new row if we were to convert this column to direct
309370
double rawBytesPerFutureRow = totalNonDictionaryBytesPerRow + column.getRawBytesPerRow();
310371
double dictionaryBytesPerFutureRow = totalDictionaryBytesPerNewRow - column.getDictionaryBytesPerFutureRow();
311372
double indexBytesPerFutureRow = totalDictionaryIndexBytesPerRow - column.getIndexBytesPerRow();
@@ -317,7 +378,7 @@ private DictionaryCompressionProjection selectDictionaryColumnToConvert(int tota
317378
long rowsToStripeRowLimit = stripeMaxRowCount - stripeRowCount;
318379
long rowsToLimit = Longs.min(rowsToDictionaryMemoryLimit, rowsToStripeMemoryLimit, rowsToStripeRowLimit);
319380

320-
// predict the compression ratio at that limit if we were convert this column to direct
381+
// predict the compression ratio at that limit if we were to convert this column to direct
321382
long predictedUncompressedSizeAtLimit = totalNonDictionaryBytes + totalDictionaryRawBytes + (totalUncompressedBytesPerRow * rowsToLimit);
322383
long predictedCompressedSizeAtLimit = (long) (currentTotalBytes + (totalBytesPerFutureRow * rowsToLimit));
323384
double predictedCompressionRatioAtLimit = 1.0 * predictedUncompressedSizeAtLimit / predictedCompressedSizeAtLimit;
@@ -371,7 +432,8 @@ public interface DictionaryColumn
371432
boolean isDirectEncoded();
372433
}
373434

374-
private static class DictionaryColumnManager
435+
@VisibleForTesting
436+
static class DictionaryColumnManager
375437
{
376438
private final DictionaryColumn dictionaryColumn;
377439

@@ -481,6 +543,12 @@ public boolean isDirectEncoded()
481543
{
482544
return dictionaryColumn.isDirectEncoded();
483545
}
546+
547+
@VisibleForTesting
548+
public DictionaryColumn getDictionaryColumn()
549+
{
550+
return dictionaryColumn;
551+
}
484552
}
485553

486554
private static class DictionaryCompressionProjection

presto-orc/src/main/java/com/facebook/presto/orc/OrcWriter.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,14 +317,18 @@ public OrcWriter(
317317
}
318318
}
319319
this.columnWriters = columnWriters.build();
320-
this.dictionaryMaxMemoryBytes = toIntExact(
321-
requireNonNull(options.getDictionaryMaxMemory(), "dictionaryMaxMemory is null").toBytes());
320+
this.dictionaryMaxMemoryBytes = toIntExact(options.getDictionaryMaxMemory().toBytes());
321+
int dictionaryMemoryAlmostFullRangeBytes = toIntExact(options.getDictionaryMemoryAlmostFullRange().toBytes());
322+
int dictionaryUsefulCheckColumnSizeBytes = toIntExact(options.getDictionaryUsefulCheckColumnSize().toBytes());
322323
this.dictionaryCompressionOptimizer = new DictionaryCompressionOptimizer(
323324
dictionaryColumnWriters.build(),
324325
stripeMinBytes,
325326
stripeMaxBytes,
326327
stripeMaxRowCount,
327-
dictionaryMaxMemoryBytes);
328+
dictionaryMaxMemoryBytes,
329+
dictionaryMemoryAlmostFullRangeBytes,
330+
dictionaryUsefulCheckColumnSizeBytes,
331+
options.getDictionaryUsefulCheckPerChunkFrequency());
328332

329333
for (Entry<String, String> entry : this.userMetadata.entrySet()) {
330334
recordValidation(validation -> validation.addMetadataProperty(entry.getKey(), utf8Slice(entry.getValue())));

0 commit comments

Comments
 (0)