diff --git a/bench/algorithm/json-serde/1.go b/bench/algorithm/json-serde/1.go index 130efd49..8c9e882c 100644 --- a/bench/algorithm/json-serde/1.go +++ b/bench/algorithm/json-serde/1.go @@ -10,24 +10,57 @@ import ( ) func main() { + if err := run(); err != nil { + fmt.Fprintln(os.Stderr, err) + } +} + +func run() error { fileName := "sample" n := 10 + if len(os.Args) > 1 { fileName = os.Args[1] } if len(os.Args) > 2 { - n, _ = strconv.Atoi(os.Args[2]) + var err error + n, err = strconv.Atoi(os.Args[2]) + if err != nil { + return err + } + } + + jsonStr, err := ioutil.ReadFile(fileName + ".json") + if err != nil { + return err } + var data GeoData - jsonStr, _ := ioutil.ReadFile(fileName + ".json") - json.Unmarshal([]byte(jsonStr), &data) - printHash(data.ToJsonString()) + if err := json.Unmarshal([]byte(jsonStr), &data); err != nil { + return err + } + + bytes, err := json.Marshal(data) + if err != nil { + return err + } + printHash(bytes) + array := make([]GeoData, 0, n) for i := 0; i < n; i++ { - json.Unmarshal([]byte(jsonStr), &data) + if err := json.Unmarshal([]byte(jsonStr), &data); err != nil { + return err + } array = append(array, data) } - printHash(ToJsonString(array)) + + bytes, err = json.Marshal(array) + if err != nil { + return err + } + printHash(bytes) + + return nil } func printHash(json []byte) { @@ -41,20 +74,6 @@ type GeoData struct { Features []Feature `json:"features"` } -func ToJsonString(array []GeoData) []byte { - if bytes, err := json.Marshal(array); err == nil { - return bytes - } - return []byte{} -} - -func (data *GeoData) ToJsonString() []byte { - if bytes, err := json.Marshal(data); err == nil { - return bytes - } - return []byte{} -} - type Feature struct { Type string `json:"type"` Properties Properties `json:"properties"` diff --git a/bench/algorithm/json-serde/2-ffi.go b/bench/algorithm/json-serde/2-ffi.go index 01b5be12..c2ee0e1d 100644 --- a/bench/algorithm/json-serde/2-ffi.go +++ b/bench/algorithm/json-serde/2-ffi.go @@ -11,24 +11,57 @@ import ( ) func main() { + if err := run(); err != nil { + fmt.Fprintln(os.Stderr, err) + } +} + +func run() error { fileName := "sample" n := 10 if len(os.Args) > 1 { + fileName = os.Args[1] } if len(os.Args) > 2 { - n, _ = strconv.Atoi(os.Args[2]) + var err error + n, err = strconv.Atoi(os.Args[2]) + if err != nil { + return err + } + } + + jsonStr, err := ioutil.ReadFile(fileName + ".json") + if err != nil { + return err } + var data GeoData - jsonStr, _ := ioutil.ReadFile(fileName + ".json") - json.Unmarshal([]byte(jsonStr), &data) - printHash(data.ToJsonString()) + if err := json.Unmarshal(jsonStr, &data); err != nil { + return err + } + + bytes, err := json.Marshal(data) + if err != nil { + return err + } + printHash(bytes) + array := make([]GeoData, 0, n) for i := 0; i < n; i++ { - json.Unmarshal([]byte(jsonStr), &data) + if err := json.Unmarshal(jsonStr, &data); err != nil { + return err + } array = append(array, data) } - printHash(ToJsonString(array)) + + bytes, err = json.Marshal(array) + if err != nil { + return err + } + printHash(bytes) + + return nil } func printHash(json []byte) { @@ -42,20 +75,6 @@ type GeoData struct { Features []Feature `json:"features"` } -func ToJsonString(array []GeoData) []byte { - if bytes, err := json.Marshal(array); err == nil { - return bytes - } - return []byte{} -} - -func (data *GeoData) ToJsonString() []byte { - if bytes, err := json.Marshal(data); err == nil { - return bytes - } - return []byte{} -} - type Feature struct { Type string `json:"type"` Properties Properties `json:"properties"` diff --git a/bench/algorithm/json-serde/2-streaming.zig b/bench/algorithm/json-serde/2-streaming.zig new file mode 100644 index 00000000..31b04840 --- /dev/null +++ b/bench/algorithm/json-serde/2-streaming.zig @@ -0,0 +1,120 @@ +const std = @import("std"); +const json = std.json; + +const global_allocator = std.heap.c_allocator; + +pub fn main() !void { + const args = try std.process.argsAlloc(global_allocator); + defer std.process.argsFree(global_allocator, args); + + const file = if (args.len > 1) blk: { + var file_name = try std.mem.concat(global_allocator, u8, &.{ args[1], ".json" }); + defer global_allocator.free(file_name); + break :blk try std.fs.cwd().openFile(file_name, .{}); + } else try std.fs.cwd().openFile("sample.json", .{}); + + var n: usize = 3; + if (args.len > 2) { + n = try std.fmt.parseInt(usize, args[2], 10); + } + + const json_str = try file.readToEndAlloc(global_allocator, std.math.maxInt(u32)); + defer global_allocator.free(json_str); + { + var tokens = json.TokenStream.init(json_str); + const data = try json.parse(GeoData, &tokens, .{ .allocator = global_allocator }); + defer json.parseFree(GeoData, data, .{ .allocator = global_allocator }); + + var md5 = StreamingMd5.init(); + try json.stringify(data, .{}, md5.writer()); + md5.printHash(); + } + + { + var array = std.ArrayList(GeoData).init(global_allocator); + defer { + for (array.items) |data| + json.parseFree(GeoData, data, .{ .allocator = global_allocator }); + array.deinit(); + } + var i: usize = 0; + while (i < n) : (i += 1) { + var tokens = json.TokenStream.init(json_str); + const data = try json.parse(GeoData, &tokens, .{ .allocator = global_allocator }); + try array.append(data); + } + + var md5 = StreamingMd5.init(); + try json.stringify(array.items, .{}, md5.writer()); + md5.printHash(); + } +} + +const Md5 = std.crypto.hash.Md5; + +const StreamingMd5 = struct { + md: Md5, + + pub fn init() StreamingMd5 { + return .{ .md = Md5.init(.{}) }; + } + + pub fn writer(self: *StreamingMd5) std.io.Writer(*StreamingMd5, error{}, StreamingMd5.update) { + return .{ .context = self }; + } + + fn update(self: *StreamingMd5, buf: []const u8) error{}!usize { + self.md.update(buf); + return buf.len; + } + + pub fn printHash(self: *StreamingMd5) void { + var out: [Md5.digest_length]u8 = undefined; + self.md.final(&out); + const stdout = std.io.getStdOut().writer(); + stdout.print("{s}\n", .{std.fmt.fmtSliceHexLower(&out)}) catch {}; + } +}; + +const GeoData = struct { + type: []const u8, + features: []const Feature, +}; +const Feature = struct { + type: []const u8, + properties: Properties, + geometry: Geometry, +}; +const Properties = struct { name: []const u8 }; +const Geometry = struct { + type: []const u8, + coordinates: []const []const [2]f64, + // provide a custom jsonStringify + // - this is only necessary to remove spaces between coordinates array + // and end up with the correct md5 (compared with 1.js) + pub fn jsonStringify( + value: Geometry, + _: json.StringifyOptions, + out_stream: anytype, + ) @TypeOf(out_stream).Error!void { + const typestr = + \\{"type":" + ; + _ = try out_stream.write(typestr); + _ = try out_stream.write(value.type); + const coordsstr = + \\","coordinates":[ + ; + _ = try out_stream.write(coordsstr); + for (value.coordinates, 0..) |row, rowi| { + if (rowi != 0) _ = try out_stream.write(","); + _ = try out_stream.write("["); + for (row, 0..) |col, coli| { + if (coli != 0) _ = try out_stream.write(","); + try out_stream.print("[{d},{d}]", .{ col[0], col[1] }); + } + _ = try out_stream.write("]"); + } + _ = try out_stream.write("]}"); + } +}; diff --git a/bench/algorithm/json-serde/3-streaming.go b/bench/algorithm/json-serde/3-streaming.go new file mode 100644 index 00000000..fc2ea66d --- /dev/null +++ b/bench/algorithm/json-serde/3-streaming.go @@ -0,0 +1,108 @@ +package main + +import ( + "crypto/md5" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "strconv" +) + +func main() { + if err := run(); err != nil { + fmt.Fprintln(os.Stderr, err) + } +} + +func run() error { + fileName := "sample" + n := 10 + + if len(os.Args) > 1 { + fileName = os.Args[1] + } + if len(os.Args) > 2 { + var err error + n, err = strconv.Atoi(os.Args[2]) + if err != nil { + return err + } + } + + var data GeoData + jsonStr, err := ioutil.ReadFile(fileName + ".json") + if err != nil { + return err + } + + if err := json.Unmarshal([]byte(jsonStr), &data); err != nil { + return err + } + if err := encodeHash(data); err != nil { + return err + } + + array := make([]GeoData, 0, n) + for i := 0; i < n; i++ { + var data GeoData + if err := json.Unmarshal([]byte(jsonStr), &data); err != nil { + return err + } + array = append(array, data) + } + + if err := encodeHash(array); err != nil { + return err + } + return nil +} + +type lastNewlineIgnorerWriter struct { + w io.Writer +} + +func (w lastNewlineIgnorerWriter) Write(b []byte) (int, error) { + if b[len(b)-1] != '\n' { + return w.w.Write(b) + } + + _, err := w.w.Write(b[:len(b)-1]) + if err != nil { + return 0, err + } + return len(b), nil +} + +func encodeHash(data any) error { + hasher := md5.New() + // Ignore the last byte if it is a newline character, streaming encoder + // adds it to the end of the json. + encoder := json.NewEncoder(lastNewlineIgnorerWriter{w: hasher}) + if err := encoder.Encode(data); err != nil { + return err + } + fmt.Printf("%x\n", hasher.Sum(nil)) + return nil +} + +type GeoData struct { + Type string `json:"type"` + Features []Feature `json:"features"` +} + +type Feature struct { + Type string `json:"type"` + Properties Properties `json:"properties"` + Geometry Geometry `json:"geometry"` +} + +type Properties struct { + Name string `json:"name"` +} + +type Geometry struct { + Type string `json:"type"` + Coordinates [][][2]float64 `json:"coordinates"` +} diff --git a/bench/algorithm/json-serde/4-ffi-streaming.go b/bench/algorithm/json-serde/4-ffi-streaming.go new file mode 100644 index 00000000..b94664d9 --- /dev/null +++ b/bench/algorithm/json-serde/4-ffi-streaming.go @@ -0,0 +1,111 @@ +package main + +import ( + "crypto/md5" + "fmt" + "io" + "io/ioutil" + "os" + "strconv" + + json "github.com/bytedance/sonic" + "github.com/bytedance/sonic/encoder" +) + +func main() { + if err := run(); err != nil { + fmt.Fprintln(os.Stderr, err) + } +} + +func run() error { + fileName := "sample" + n := 10 + if len(os.Args) > 1 { + + fileName = os.Args[1] + } + if len(os.Args) > 2 { + var err error + n, err = strconv.Atoi(os.Args[2]) + if err != nil { + return err + } + } + + jsonStr, err := ioutil.ReadFile(fileName + ".json") + if err != nil { + return err + } + + var data GeoData + if err := json.Unmarshal([]byte(jsonStr), &data); err != nil { + return err + } + + if err := encodeHash(data); err != nil { + return err + } + + array := make([]GeoData, 0, n) + for i := 0; i < n; i++ { + if err := json.Unmarshal([]byte(jsonStr), &data); err != nil { + return err + } + array = append(array, data) + } + + if err := encodeHash(array); err != nil { + return err + } + + return nil +} + +type lastNewlineIgnorerWriter struct { + w io.Writer +} + +func (w lastNewlineIgnorerWriter) Write(b []byte) (int, error) { + if b[len(b)-1] != '\n' { + return w.w.Write(b) + } + + _, err := w.w.Write(b[:len(b)-1]) + if err != nil { + return 0, err + } + return len(b), nil +} + +func encodeHash(data any) error { + hasher := md5.New() + // Ignore the last byte if it is a newline character, streaming encoder + // adds it to the end of the json. + encoder := encoder.NewStreamEncoder(lastNewlineIgnorerWriter{w: hasher}) + if err := encoder.Encode(data); err != nil { + return err + } + fmt.Printf("%x\n", hasher.Sum(nil)) + return nil +} + +type GeoData struct { + Type string `json:"type"` + Features []Feature `json:"features"` +} + +type Feature struct { + Type string `json:"type"` + Properties Properties `json:"properties"` + Geometry Geometry `json:"geometry"` +} + +type Properties struct { + Name string `json:"name"` +} + +type Geometry struct { + Type string `json:"type"` + Coordinates [][][2]float64 `json:"coordinates"` +} diff --git a/bench/algorithm/json-serde/4-streaming.rs b/bench/algorithm/json-serde/4-streaming.rs new file mode 100644 index 00000000..d92e17ef --- /dev/null +++ b/bench/algorithm/json-serde/4-streaming.rs @@ -0,0 +1,72 @@ +use serde::{Deserialize, Serialize, Serializer}; +use std::fs; + +fn main() -> anyhow::Result<()> { + let file_name = std::env::args_os() + .nth(1) + .and_then(|s| s.into_string().ok()) + .unwrap_or("sample".to_string()); + let n = std::env::args_os() + .nth(2) + .and_then(|s| s.into_string().ok()) + .and_then(|s| s.parse().ok()) + .unwrap_or(10); + let json_str = fs::read_to_string(format!("{}.json", file_name))?; + let json: GeoData = serde_json::from_str(&json_str)?; + + let mut md_ctx = md5::Context::new(); + serde_json::to_writer(&mut md_ctx, &json)?; + println!("{:x}", md_ctx.compute()); + + let mut array = Vec::with_capacity(n); + for _i in 0..n { + let json: GeoData = serde_json::from_str(&json_str)?; + array.push(json); + } + + let mut md_ctx = md5::Context::new(); + serde_json::to_writer(&mut md_ctx, &array)?; + println!("{:x}", md_ctx.compute()); + + Ok(()) +} + +#[derive(Deserialize, Serialize, Debug, Default)] +struct GeoData { + r#type: String, + features: Vec, +} + +#[derive(Deserialize, Serialize, Debug, Default)] +struct Feature { + r#type: String, + properties: Properties, + geometry: Geometry, +} + +#[derive(Deserialize, Serialize, Debug, Default)] +struct Properties { + name: String, +} + +#[derive(Deserialize, Debug, Default)] +struct MyF64(f64); + +impl Serialize for MyF64 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + if self.0.fract() == 0.0 { + serializer.serialize_i64(self.0 as i64) + } else { + serializer.serialize_f64(self.0) + } + } +} + +#[derive(Deserialize, Serialize, Debug, Default)] +struct Geometry { + r#type: String, + coordinates: Vec>, +} diff --git a/bench/bench_go.yaml b/bench/bench_go.yaml index 34379247..913f422c 100644 --- a/bench/bench_go.yaml +++ b/bench/bench_go.yaml @@ -42,6 +42,7 @@ problems: - name: json-serde source: - 1.go + - 3-streaming.go - name: coro-prime-sieve source: - 1.go diff --git a/bench/bench_go_ffi.yaml b/bench/bench_go_ffi.yaml index e2da6285..70a9b3e4 100644 --- a/bench/bench_go_ffi.yaml +++ b/bench/bench_go_ffi.yaml @@ -3,6 +3,7 @@ problems: - name: json-serde source: - 2-ffi.go + - 4-ffi-streaming.go compiler_version_command: go version compiler_version_regex: runtime_version_parameter: diff --git a/bench/bench_rust.yaml b/bench/bench_rust.yaml index ed3fdc37..eea4baa0 100644 --- a/bench/bench_rust.yaml +++ b/bench/bench_rust.yaml @@ -59,6 +59,7 @@ problems: - 1.rs - 2.rs - 3.rs + - 4-streaming.rs - name: coro-prime-sieve source: - 1.rs diff --git a/bench/bench_zig.yaml b/bench/bench_zig.yaml index ffe8b368..eec2d19d 100644 --- a/bench/bench_zig.yaml +++ b/bench/bench_zig.yaml @@ -46,6 +46,7 @@ problems: - name: json-serde source: - 1.zig + - 2-streaming.zig # - name: coro-prime-sieve # source: # - 1.zig