Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions tensorflow/lite/kernels/internal/portable_tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"

#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>

Expand Down Expand Up @@ -92,6 +93,56 @@ void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements,
}
}

void UnpackPackedIntToInt8(const int8_t* src_buffer, int num_elements,
int bit_width, int8_t* dst_buffer) {
assert(bit_width == 2 || bit_width == 4);
if (bit_width == 4) {
// num_elements means the number of elements regardless of packed or
// unpacked. For example, 3 elements means both
// 1) Packed: 3 int4's = 12 bit -> 16 bits (padded) = 2 bytes.
// stored in src_buffer[0] and src_buffer[1] (i = 0..1)
// 2) Unpacked: 3 int8's = 3 bytes.
//. stored in dst_buffer[0], dst_buffer[1] and dst_buffer[2] (j = 0..2)
for (int i = 0; i < num_elements / 2; i++) {
int8_t byte = src_buffer[i];
// Shift left first so that sign is properly extended when shifted right
int8_t lower = static_cast<int8_t>(byte << 4) >> 4;
int8_t higher = byte >> 4;
dst_buffer[2 * i] = lower;
dst_buffer[2 * i + 1] = higher;
}

// If the buffer size is odd, extract the final lower nibble.
if (num_elements % 2 != 0) {
dst_buffer[num_elements - 1] =
static_cast<int8_t>(src_buffer[num_elements / 2] << 4) >> 4;
}
} else if (bit_width == 2) {
for (int i = 0; i < num_elements / 4; i++) {
int8_t byte = src_buffer[i];
// Shift left first so that sign is properly extended when shifted right
int8_t val1 = static_cast<int8_t>(byte << 6) >> 6;
int8_t val2 = static_cast<int8_t>((byte << 4) & 0xFF) >> 6;
int8_t val3 = static_cast<int8_t>((byte << 2) & 0xFF) >> 6;
int8_t val4 = byte >> 6;
dst_buffer[4 * i] = val1;
dst_buffer[4 * i + 1] = val2;
dst_buffer[4 * i + 2] = val3;
dst_buffer[4 * i + 3] = val4;
}

// Handle the remaining elements.
int remaining_elements = num_elements % 4;
if (remaining_elements > 0) {
int8_t byte = src_buffer[num_elements / 4];
for (int i = 0; i < remaining_elements; i++) {
dst_buffer[num_elements - remaining_elements + i] =
static_cast<int8_t>((byte << (6 - 2 * i)) & 0xFF) >> 6;
}
}
}
}

void PackInt8IntoDenseInt4(const int8_t* src_buffer, int num_elements,
int8_t* dst_buffer) {
// num_elements means the number of elements regardless of packed or unpacked.
Expand Down
17 changes: 17 additions & 0 deletions tensorflow/lite/kernels/internal/portable_tensor_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,23 @@ void ApplySignbitToVector(const float* __restrict__ vector, int v_size,
void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements,
int8_t* dst_buffer);

// Unpack or inflate `src_buffer` by taking each byte and splitting it into
// multiple elements into `dst_buffer`. Supports 2-bit and 4-bit packed integers
// Parameters:
// src_buffer : Densely packed buffer containing int2 or int4 values.
// num_elements : Number of unpacked elements to be read from the buffer.
// This should be equal to the size of `dst_buffer`.
// bit_width : The bit width of the packed elements (either 2 or 4).
// dst_buffer : Buffer to unpack into. Should be allocated by the caller.
// Size should be at least `num_elements`.
// Notes:
// For 4-bit unpacking: e.g., `src_buffer = {0x12, 0x34};` (num_elements = 4)
// will return `dst_buffer = {0x02, 0x01, 0x04, 0x03}`.
// For 2-bit unpacking: e.g., `src_buffer = {0x12};` (num_elements = 4)
// will return `dst_buffer = {0x02, 0x00, 0x01, 0x00}` (sign extended).
void UnpackPackedIntToInt8(const int8_t* src_buffer, int num_elements,
int bit_width, int8_t* dst_buffer);

// Pack `src_buffer` into a densely packed buffer of int4 values.
// Parameters:
// src_buffer : Buffer containing int4 values stored in int8 memory.
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/tools/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from tflite_micro.tensorflow.lite.python import schema_py_generated as schema_fb
else:
# This file is part of tflite_runtime package.
from tflite_runtime import schema_py_generated as schema_fb
from tflite_micro.tensorflow.lite_runtime import schema_py_generated as schema_fb

# A CSS description for making the visualizer
_CSS = """
Expand Down
Loading