From a98e5ae5dce460eca90d3a3f8808397951479b09 Mon Sep 17 00:00:00 2001 From: Jeremy Maitin-Shepard Date: Tue, 29 Apr 2025 23:14:17 -0700 Subject: [PATCH] Add json data type --- codecs/vlen-utf8/README.md | 11 +++++++++-- data-types/json/README.md | 33 +++++++++++++++++++++++++++++++++ data-types/json/schema.json | 20 ++++++++++++++++++++ 3 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 data-types/json/README.md create mode 100644 data-types/json/schema.json diff --git a/codecs/vlen-utf8/README.md b/codecs/vlen-utf8/README.md index 2b6e830..2c463ac 100644 --- a/codecs/vlen-utf8/README.md +++ b/codecs/vlen-utf8/README.md @@ -27,12 +27,19 @@ For example, the array metadata below specifies that the array contains variable This is a `array -> bytes` codec. -This codec is only compatible with the [`"string"`](../../data-types/string/README.md) data type. +This codec is only compatible with the +[`"string"`](../../data-types/string/README.md) and +[`"json"`](../../data-types/json/README.md) data types. In the encoded format, each chunk is prefixed with a 32-bit little-endian unsigned integer (u32le) that specifies the number of elements in the chunk. This prefix is followed by a sequence of encoded elements in lexicographical order. Each element in the sequence is encoded by a u32le representing the number of bytes followed by the bytes themselves. -The bytes for each element are obtained by encoding the element as UTF8 bytes. + +For the `"string"` data type, the bytes for each element are obtained by +encoding the element as UTF8 bytes. + +For the `"json"` data type, the bytes for each element are obtained by encoding +the element as JSON (which is itself valid UTF8). See https://numcodecs.readthedocs.io/en/stable/other/vlen.html#vlenutf8 for details about the encoding. diff --git a/data-types/json/README.md b/data-types/json/README.md new file mode 100644 index 0000000..ef97c8d --- /dev/null +++ b/data-types/json/README.md @@ -0,0 +1,33 @@ +# json data type + +Defines a data type for arbitrary JSON values. + +## Permitted fill values + +The value of the `fill_value` metadata may be any JSON value. + +## Example + +For example, the array metadata below specifies that the array contains JSON values: + +```json +{ + "data_type": "json", + "fill_value": {"some": "value"}, + "codecs": [{ + "name": "vlen-utf8" + }], +} +``` + +## Notes + +Currently, this data type is only compatible with the [`"vlen-utf8"`](../../codecs/vlen-utf8/README.md) codec. + +## Change log + +No changes yet. + +## Current maintainers + +* Jeremy Maitin-Shepard ([@jbms](https://github.com/jbms)), Google diff --git a/data-types/json/schema.json b/data-types/json/schema.json new file mode 100644 index 0000000..2669718 --- /dev/null +++ b/data-types/json/schema.json @@ -0,0 +1,20 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "oneOf": [ + { + "type": "object", + "properties": { + "name": { + "const": "json" + }, + "configuration": { + "type": "object", + "additionalProperties": false + } + }, + "required": ["name"], + "additionalProperties": false + }, + { "const": "json" } + ] +}