From ac54ea9e008f3029df8345514077d2328d00cc7b Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 30 Oct 2024 12:53:11 -0500 Subject: [PATCH 01/20] Migrate to a v2 fork. --- Cargo.toml | 12 +++++---- README.md | 41 ++++++++++++++++-------------- extras/data-tests/Cargo.toml | 2 +- extras/data-tests/src/main.rs | 2 +- extras/simple-bench/Cargo.toml | 2 +- extras/simple-bench/README.md | 6 ++--- extras/simple-bench/src/main.rs | 4 +-- fuzz/fuzz_targets/fast_float.rs | 4 +-- fuzz/fuzz_targets/roundtrip_f64.rs | 2 +- src/lib.rs | 4 +-- tests/test_api.rs | 2 +- tests/test_basic.rs | 6 ++--- tests/test_exhaustive.rs | 2 +- tests/test_random.rs | 2 +- 14 files changed, 48 insertions(+), 43 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5da5c9d..7073ca0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,9 @@ [package] -name = "fast-float" -version = "0.2.0" -authors = ["Ivan Smirnov "] -repository = "https://github.com/aldanor/fast-float-rust" -documentation = "https://docs.rs/fast-float" +name = "fast-float2" +version = "0.2.1" +authors = ["Ivan Smirnov ", "Alex Huszagh "] +repository = "https://github.com/Alexhuszagh/fast-float-rust" +documentation = "https://docs.rs/fast-float2" description = "Fast floating-point number parser." keywords = ["parser", "parsing", "parse", "float", "no-std"] categories = ["parser-implementations", "parsing", "text-processing", "algorithms", "no-std"] @@ -12,6 +12,8 @@ license = "MIT OR Apache-2.0" autobenches = false edition = "2018" exclude = ["benches/*", "extras/*"] +# FIXME: rust-version is not supported until 1.56.0. +rust-version = "1.37" [features] default = ["std"] diff --git a/README.md b/README.md index fde2755..f65df04 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -fast-float -========== +fast-float2 +=========== -[![Build](https://github.com/aldanor/fast-float-rust/workflows/CI/badge.svg)](https://github.com/aldanor/fast-float-rust/actions?query=branch%3Amaster) -[![Latest Version](https://img.shields.io/crates/v/fast-float.svg)](https://crates.io/crates/fast-float) -[![Documentation](https://docs.rs/fast-float/badge.svg)](https://docs.rs/fast-float) +[![Build](https://github.com/Alexhuszagh/fast-float-rust/workflows/CI/badge.svg)](https://github.com/Alexhuszagh/fast-float-rust/actions?query=branch%3Amaster) +[![Latest Version](https://img.shields.io/crates/v/fast-float2.svg)](https://crates.io/crates/fast-float2) +[![Documentation](https://docs.rs/fast-float2/badge.svg)](https://docs.rs/fast-float2) [![Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![Rustc 1.37+](https://img.shields.io/badge/rustc-1.37+-lightgray.svg)](https://blog.rust-lang.org/2019/08/15/Rust-1.37.0.html) @@ -12,7 +12,7 @@ This crate provides a super-fast decimal number parser from strings into floats. ```toml [dependencies] -fast-float = "0.2" +fast-float2 = "0.2.1" ``` There are no dependencies and the crate can be used in a no_std context by disabling the "std" feature. @@ -21,10 +21,10 @@ There are no dependencies and the crate can be used in a no_std context by disab ## Usage -There's two top-level functions provided: -[`parse()`](https://docs.rs/fast-float/latest/fast_float/fn.parse.html) and +There's two top-level functions provided: +[`parse()`](https://docs.rs/fast-float/latest/fast_float/fn.parse.html) and [`parse_partial()`](https://docs.rs/fast-float/latest/fast_float/fn.parse_partial.html), both taking -either a string or a bytes slice and parsing the input into either `f32` or `f64`: +either a string or a bytes slice and parsing the input into either `f32` or `f64`: - `parse()` treats the whole string as a decimal number and returns an error if there are invalid characters or if the string is empty. @@ -39,12 +39,12 @@ Example: ```rust // Parse the entire string as a decimal number. let s = "1.23e-02"; -let x: f32 = fast_float::parse(s).unwrap(); +let x: f32 = fast_float2::parse(s).unwrap(); assert_eq!(x, 0.0123); // Parse as many characters as possible as a decimal number. let s = "1.23e-02foo"; -let (x, n) = fast_float::parse_partial::(s).unwrap(); +let (x, n) = fast_float2::parse_partial::(s).unwrap(); assert_eq!(x, 0.0123); assert_eq!(n, 8); assert_eq!(&s[n..], "foo"); @@ -53,19 +53,22 @@ assert_eq!(&s[n..], "foo"); ## Details This crate is a direct port of Daniel Lemire's [`fast_float`](https://github.com/fastfloat/fast_float) -C++ library (valuable discussions with Daniel while porting it helped shape the crate and get it to +C++ library (valuable discussions with Daniel while porting it helped shape the crate and get it to the performance level it's at now), with some Rust-specific tweaks. Please see the original repository for many useful details regarding the algorithm and the implementation. -The parser is locale-independent. The resulting value is the closest floating-point values (using either -`f32` or `f64`), using the "round to even" convention for values that would otherwise fall right in-between -two values. That is, we provide exact parsing according to the IEEE standard. +The parser is locale-independent. The resulting value is the closest floating-point values (using either +`f32` or `f64`), using the "round to even" convention for values that would otherwise fall right in-between +two values. That is, we provide exact parsing according to the IEEE standard. Infinity and NaN values can be parsed, along with scientific notation. Both little-endian and big-endian platforms are equally supported, with extra optimizations enabled on little-endian architectures. +Since [fast-float-rust](https://github.com/aldanor/fast-float-rust) is unmaintained, this is a fork +containing the patches and security updates. + ## Testing There are a few ways this crate is tested: @@ -80,7 +83,7 @@ There are a few ways this crate is tested: ## Performance The presented parser seems to beat all of the existing C/C++/Rust float parsers known to us at the -moment by a large margin, in all of the datasets we tested it on so far – see detailed benchmarks +moment by a large margin, in all of the datasets we tested it on so far – see detailed benchmarks below (the only exception being the original fast_float C++ library, of course – performance of which is within noise bounds of this crate). On modern machines like Apple M1, parsing throughput can reach up to 1.5 GB/s. @@ -103,7 +106,7 @@ C++ library, here are few brief notes: ## Benchmarks -Below are tables of best timings in nanoseconds for parsing a single number +Below are tables of best timings in nanoseconds for parsing a single number into a 64-bit float. #### Intel i7-4771 @@ -169,12 +172,12 @@ AMD Rome, Linux, Rust 1.49. #### Notes -- The two test files referred above can be found in +- The two test files referred above can be found in [this](https://github.com/lemire/simple_fastfloat_benchmark) repository. - The Rust part of the table (along with a few other benchmarks) can be generated via the benchmark tool that can be found under `extras/simple-bench` of this repo. - The C/C++ part of the table (along with a few other benchmarks and parsers) can be - generated via a C++ utility that can be found in + generated via a C++ utility that can be found in [this](https://github.com/lemire/simple_fastfloat_benchmark) repository.
diff --git a/extras/data-tests/Cargo.toml b/extras/data-tests/Cargo.toml index e04d88f..5cb96b5 100644 --- a/extras/data-tests/Cargo.toml +++ b/extras/data-tests/Cargo.toml @@ -8,4 +8,4 @@ license = "MIT OR Apache-2.0" publish = false [dependencies] -fast-float = { path = "../.." } +fast-float2 = { path = "../.." } diff --git a/extras/data-tests/src/main.rs b/extras/data-tests/src/main.rs index 7487b0c..3be4d02 100644 --- a/extras/data-tests/src/main.rs +++ b/extras/data-tests/src/main.rs @@ -22,7 +22,7 @@ impl TestCase { } } - fn execute_one(&self, expected: F) { + fn execute_one(&self, expected: F) { let r = F::parse_float_partial(&self.string); if !r.is_ok() { dbg!(self); diff --git a/extras/simple-bench/Cargo.toml b/extras/simple-bench/Cargo.toml index 5cd59c8..1a3e648 100644 --- a/extras/simple-bench/Cargo.toml +++ b/extras/simple-bench/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0" publish = false [dependencies] -fast-float = { path = "../.." } +fast-float2 = { path = "../.." } structopt = "0.3" anyhow = "1.0" lexical = "5.2" diff --git a/extras/simple-bench/README.md b/extras/simple-bench/README.md index 10fdbbc..fa6a3b8 100644 --- a/extras/simple-bench/README.md +++ b/extras/simple-bench/README.md @@ -1,4 +1,4 @@ -This crate provides a utility for benchmarking the `fast-float` crate against +This crate provides a utility for benchmarking the `fast-float2` crate against `lexical_core` and standard library's `FromStr`. To run a file-based test: @@ -18,8 +18,8 @@ To run a randomized test: cargo run --release -- random uniform ``` -For more details and options (choosing a different random generator, storing -randomized inputs to a file, changing the number of runs, or switching between +For more details and options (choosing a different random generator, storing +randomized inputs to a file, changing the number of runs, or switching between 32-bit and 64-bit floats), refer to help: ``` diff --git a/extras/simple-bench/src/main.rs b/extras/simple-bench/src/main.rs index 9428fad..5dbe06d 100644 --- a/extras/simple-bench/src/main.rs +++ b/extras/simple-bench/src/main.rs @@ -10,7 +10,7 @@ use fastrand::Rng; use lexical::FromLexical; use structopt::StructOpt; -use fast_float::FastFloat; +use fast_float2::FastFloat; use random::RandomGen; @@ -138,7 +138,7 @@ impl Method { let data = &input.data; let times = match self { Self::FastFloat => run_bench(data, repeat, |s: &str| { - fast_float::parse_partial::(s).unwrap_or_default().0 + fast_float2::parse_partial::(s).unwrap_or_default().0 }), Self::Lexical => run_bench(data, repeat, |s: &str| { lexical_core::parse_partial::(s.as_bytes()) diff --git a/fuzz/fuzz_targets/fast_float.rs b/fuzz/fuzz_targets/fast_float.rs index 733c5f8..581e5d7 100644 --- a/fuzz/fuzz_targets/fast_float.rs +++ b/fuzz/fuzz_targets/fast_float.rs @@ -11,6 +11,6 @@ fn black_box(dummy: T) -> T { } fuzz_target!(|data: &[u8]| { - let _ = black_box(::fast_float::parse::(data)); - let _ = black_box(::fast_float::parse::(data)); + let _ = black_box(::fast_float2::parse::(data)); + let _ = black_box(::fast_float2::parse::(data)); }); diff --git a/fuzz/fuzz_targets/roundtrip_f64.rs b/fuzz/fuzz_targets/roundtrip_f64.rs index 4c92e9f..bb3506c 100644 --- a/fuzz/fuzz_targets/roundtrip_f64.rs +++ b/fuzz/fuzz_targets/roundtrip_f64.rs @@ -5,7 +5,7 @@ use libfuzzer_sys::fuzz_target; // is small enough that we can test it exhaustively fn check_roundtrip(float: f64, string: impl AsRef) { - let result = ::fast_float::parse::(string.as_ref()).unwrap(); + let result = ::fast_float2::parse::(string.as_ref()).unwrap(); if float.is_nan() { assert!(result.is_nan()); } else { diff --git a/src/lib.rs b/src/lib.rs index aef86be..a90ad36 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,12 +20,12 @@ //! ```rust //! // Parse the entire string as a decimal number. //! let s = "1.23e-02"; -//! let x: f32 = fast_float::parse(s).unwrap(); +//! let x: f32 = fast_float2::parse(s).unwrap(); //! assert_eq!(x, 0.0123); //! //! // Parse as many characters as possible as a decimal number. //! let s = "1.23e-02foo"; -//! let (x, n) = fast_float::parse_partial::(s).unwrap(); +//! let (x, n) = fast_float2::parse_partial::(s).unwrap(); //! assert_eq!(x, 0.0123); //! assert_eq!(n, 8); //! assert_eq!(&s[n..], "foo"); diff --git a/tests/test_api.rs b/tests/test_api.rs index d0e1615..ff04b74 100644 --- a/tests/test_api.rs +++ b/tests/test_api.rs @@ -1,4 +1,4 @@ -use fast_float::{parse, parse_partial, FastFloat}; +use fast_float2::{parse, parse_partial, FastFloat}; macro_rules! check_ok { ($s:expr, $x:expr) => { diff --git a/tests/test_basic.rs b/tests/test_basic.rs index 235990f..f6eb4c8 100644 --- a/tests/test_basic.rs +++ b/tests/test_basic.rs @@ -24,7 +24,7 @@ macro_rules! check { let string = String::from($s); let s = string.as_bytes(); let expected: $ty = $e; - let result = fast_float::parse::<$ty, _>(s).unwrap(); + let result = fast_float2::parse::<$ty, _>(s).unwrap(); assert_eq!(result, expected); let lex = lexical_core::parse::<$ty>(s).unwrap(); assert_eq!(result, lex); @@ -411,7 +411,7 @@ fn test_f64_pow10() { for i in -308..=308 { let s = format!("1e{}", i); let v = f64::from_str(&s).unwrap(); - assert_eq!(fast_float::parse::(s).unwrap(), v); + assert_eq!(fast_float2::parse::(s).unwrap(), v); } } @@ -420,6 +420,6 @@ fn test_f32_pow10() { for i in -38..=38 { let s = format!("1e{}", i); let v = f32::from_str(&s).unwrap(); - assert_eq!(fast_float::parse::(s).unwrap(), v); + assert_eq!(fast_float2::parse::(s).unwrap(), v); } } diff --git a/tests/test_exhaustive.rs b/tests/test_exhaustive.rs index 308b9b4..3b77b39 100644 --- a/tests/test_exhaustive.rs +++ b/tests/test_exhaustive.rs @@ -5,7 +5,7 @@ fn test_f32_exhaustive_ryu() { for i in 0..0xFFFF_FFFF_u32 { let a: f32 = unsafe { core::mem::transmute(i) }; let s = buf.format(a); - let b: f32 = fast_float::parse(s).unwrap(); + let b: f32 = fast_float2::parse(s).unwrap(); assert!(a == b || (a.is_nan() && b.is_nan())); } } diff --git a/tests/test_random.rs b/tests/test_random.rs index 1dadd74..4d54dee 100644 --- a/tests/test_random.rs +++ b/tests/test_random.rs @@ -9,7 +9,7 @@ fn test_f64_random_from_u64() { let i: u64 = rng.u64(0..0xFFFF_FFFF_FFFF_FFFF); let a: f64 = unsafe { core::mem::transmute(i) }; let s = buf.format(a); - let b: f64 = fast_float::parse(s).unwrap(); + let b: f64 = fast_float2::parse(s).unwrap(); assert!(a == b || (a.is_nan() && b.is_nan())); } } From dd9ebc2c66c798dd76457503c7bc2452ad99448e Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 30 Oct 2024 12:56:20 -0500 Subject: [PATCH 02/20] Update CI for older versions. --- .github/workflows/ci.yml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3d7eb0e..271f136 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: strategy: fail-fast: false matrix: - rust: [1.37.0, stable, nightly] + rust: [1.56.0, stable, nightly] steps: - uses: actions/checkout@v2 with: @@ -23,6 +23,23 @@ jobs: - run: cargo test - run: cd extras/data-tests && cargo run --release + msrv: + name: Rust ${{matrix.rust}} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + rust: [1.37.0] + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{matrix.rust}} + - run: cargo check + - run: cargo build + cross: name: Rust ${{matrix.target}} runs-on: ubuntu-latest From cdcde1e2f4a8df505bd134ef3e147e07b6269714 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 30 Oct 2024 16:52:43 -0500 Subject: [PATCH 03/20] Document maintenance mode. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index f65df04..72118a3 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,8 @@ There are no dependencies and the crate can be used in a no_std context by disab *Compiler support: rustc 1.37+.* +This crate is in maintenance mode for bug fixes (especially security patches): minimal feature enhancements will be accepted. This implementation has been adopted by the Rust standard library: if you do not need parsing directly from bytes and/or partial parsers, you should use [FromStr](https://doc.rust-lang.org/std/str/trait.FromStr.html) for [f32](https://doc.rust-lang.org/std/primitive.f32.html) or [f64](https://doc.rust-lang.org/std/primitive.f64.html) instead. + ## Usage There's two top-level functions provided: From a09726594b5e1bc60e211ceb97f622fab3ce6700 Mon Sep 17 00:00:00 2001 From: Alexander Huszagh Date: Wed, 30 Oct 2024 18:05:36 -0500 Subject: [PATCH 04/20] Create SECURITY.md --- SECURITY.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..ba992a3 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,3 @@ +# Security Policy + +This crate is in maintenance mode, so only the latest version is supported and will be receiving bug fixes. If you have a security vulnerability, please reach out to me privately at [ahuszagh@gmail.com](mailto:ahuszagh@gmail.com). Other forms of communication may not reach me. From b5904d4b783661d04cd2d5d399383a51351e3391 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 30 Oct 2024 19:47:38 -0500 Subject: [PATCH 05/20] Add fast-float to the bench comparisons. --- extras/simple-bench/Cargo.toml | 1 + extras/simple-bench/src/main.rs | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/extras/simple-bench/Cargo.toml b/extras/simple-bench/Cargo.toml index 1a3e648..078cf97 100644 --- a/extras/simple-bench/Cargo.toml +++ b/extras/simple-bench/Cargo.toml @@ -14,3 +14,4 @@ anyhow = "1.0" lexical = "5.2" lexical-core = "0.7" fastrand = "1.4" +fast-float = "0.2" diff --git a/extras/simple-bench/src/main.rs b/extras/simple-bench/src/main.rs index 5dbe06d..16a520d 100644 --- a/extras/simple-bench/src/main.rs +++ b/extras/simple-bench/src/main.rs @@ -108,6 +108,7 @@ fn run_bench T>( #[derive(Debug, Copy, Clone, Eq, PartialEq)] enum Method { FastFloat, + FastFloat2, Lexical, FromStr, } @@ -123,13 +124,14 @@ fn type_str(float32: bool) -> &'static str { impl Method { pub fn name(&self) -> &'static str { match self { + Self::FastFloat2 => "fast-float2", Self::FastFloat => "fast-float", Self::Lexical => "lexical", Self::FromStr => "from_str", } } - fn run_as( + fn run_as( &self, input: &Input, repeat: usize, @@ -137,9 +139,12 @@ impl Method { ) -> BenchResult { let data = &input.data; let times = match self { - Self::FastFloat => run_bench(data, repeat, |s: &str| { + Self::FastFloat2 => run_bench(data, repeat, |s: &str| { fast_float2::parse_partial::(s).unwrap_or_default().0 }), + Self::FastFloat => run_bench(data, repeat, |s: &str| { + fast_float::parse_partial::(s).unwrap_or_default().0 + }), Self::Lexical => run_bench(data, repeat, |s: &str| { lexical_core::parse_partial::(s.as_bytes()) .unwrap_or_default() @@ -165,7 +170,7 @@ impl Method { } pub fn all() -> &'static [Self] { - &[Method::FastFloat, Method::Lexical, Method::FromStr] + &[Method::FastFloat2, Method::FastFloat, Method::Lexical, Method::FromStr] } } @@ -279,7 +284,7 @@ fn main() { let methods = if !opt.only_fast_float && !matches!(&opt.command, &Cmd::All {..}) { Method::all().into() } else { - vec![Method::FastFloat] + vec![Method::FastFloat2] }; let inputs = match opt.command { From d36f96bf23ad5158cad069227e7c77f16cc18f61 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 30 Oct 2024 20:03:45 -0500 Subject: [PATCH 06/20] Increment dependencies for tests and benches. --- .github/workflows/ci.yml | 2 +- Cargo.toml | 8 ++++---- extras/simple-bench/Cargo.toml | 6 +++--- tests/test_random.rs | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 271f136..9ccc3a2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: strategy: fail-fast: false matrix: - rust: [1.56.0, stable, nightly] + rust: [1.63.0, stable, nightly] steps: - uses: actions/checkout@v2 with: diff --git a/Cargo.toml b/Cargo.toml index 7073ca0..de3d776 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,11 +20,11 @@ default = ["std"] std = [] [dev-dependencies] -lexical-core = "0.7" -hexf-parse = "0.1" +lexical-core = "1.0.2" +hexf-parse = "0.2.1" ryu = "1.0" -fastrand = "1.4" -num-bigint = "0.3" +fastrand = "2.1.1" +num-bigint = "0.4.6" [workspace] members = [".", "extras/data-tests", "extras/simple-bench"] diff --git a/extras/simple-bench/Cargo.toml b/extras/simple-bench/Cargo.toml index 078cf97..0f82574 100644 --- a/extras/simple-bench/Cargo.toml +++ b/extras/simple-bench/Cargo.toml @@ -11,7 +11,7 @@ publish = false fast-float2 = { path = "../.." } structopt = "0.3" anyhow = "1.0" -lexical = "5.2" -lexical-core = "0.7" -fastrand = "1.4" +lexical = "7.0.2" +lexical-core = "1.0.2" +fastrand = "2.1.1" fast-float = "0.2" diff --git a/tests/test_random.rs b/tests/test_random.rs index 4d54dee..ce47401 100644 --- a/tests/test_random.rs +++ b/tests/test_random.rs @@ -3,7 +3,7 @@ fn test_f64_random_from_u64() { const N_ITER: u64 = 1 << 32; - let rng = fastrand::Rng::with_seed(0); + let mut rng = fastrand::Rng::with_seed(0); let mut buf = ryu::Buffer::new(); for _ in 0..N_ITER { let i: u64 = rng.u64(0..0xFFFF_FFFF_FFFF_FFFF); From 9d91b3aa214fe8588208e059e364feb4ca31219a Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 30 Oct 2024 23:21:42 -0500 Subject: [PATCH 07/20] Remove all non-local unsafety. This patches a lot of the wrappers in `AsciiStr` being marked as safe but not being safe except within the context, using raw pointer dereferences without local bounds checks. This is extensively documented in #37: https://github.com/aldanor/fast-float-rust/issues/37 `AsciiStr` has been re-written as a result, and unsafe functions marked as safe have been either converted to safe variants where the compiled checks can be ellided or marked as unsafe so the caller knows to upholds the safety invariants. --- src/common.rs | 165 +++++++++++++++++++++++++++++++------------------ src/decimal.rs | 10 ++- src/number.rs | 72 ++++++++++++--------- src/parse.rs | 4 -- 4 files changed, 156 insertions(+), 95 deletions(-) diff --git a/src/common.rs b/src/common.rs index e46ded0..3c6b9a6 100644 --- a/src/common.rs +++ b/src/common.rs @@ -18,15 +18,39 @@ impl<'a> AsciiStr<'a> { } } + pub fn len(&self) -> usize { + self.end as usize - self.ptr as usize + } + + /// # Safety + /// + /// Safe if `n <= self.len()` #[inline] - pub fn step_by(&mut self, n: usize) -> &mut Self { + pub unsafe fn step_by(&mut self, n: usize) -> &mut Self { + debug_assert!(n <= self.len(), "buffer overflow: stepping by greater than our buffer length."); + // SAFETY: Safe if `n <= self.len()` unsafe { self.ptr = self.ptr.add(n) }; self } + /// # Safety + /// + /// Safe if `!self.is_empty()` + #[inline] + pub unsafe fn step(&mut self) -> &mut Self { + debug_assert!(!self.is_empty(), "buffer overflow: buffer is empty."); + // SAFETY: Safe if the buffer is not empty, that is, `self.len() >= 1` + unsafe { self.step_by(1) } + } + #[inline] - pub fn step(&mut self) -> &mut Self { - self.step_by(1) + pub fn step_if(&mut self, c: u8) -> bool { + let stepped = self.first_is(c); + if stepped { + // SAFETY: safe since we have at least 1 character in the buffer + unsafe { self.step() }; + } + stepped } #[inline] @@ -34,100 +58,124 @@ impl<'a> AsciiStr<'a> { self.ptr == self.end } + /// # Safety + /// + /// Safe if `!self.is_empty()` #[inline] - pub fn first(&self) -> u8 { + pub unsafe fn first_unchecked(&self) -> u8 { + debug_assert!(!self.is_empty(), "attempting to get first value of empty buffer."); unsafe { *self.ptr } } #[inline] - pub fn first_is(&self, c: u8) -> bool { - self.first() == c + pub fn first(&self) -> Option { + if !self.is_empty() { + // SAFETY: safe since `!self.is_empty()` + Some(unsafe { self.first_unchecked() }) + } else { + None + } } #[inline] - pub fn first_either(&self, c1: u8, c2: u8) -> bool { - let c = self.first(); - c == c1 || c == c2 + pub fn first_is(&self, c: u8) -> bool { + self.first() == Some(c) } #[inline] - pub fn check_first(&self, c: u8) -> bool { - !self.is_empty() && self.first() == c + pub fn first_is2(&self, c1: u8, c2: u8) -> bool { + if let Some(c) = self.first() { + c == c1 || c == c2 + } else { + false + } } #[inline] - pub fn check_first_either(&self, c1: u8, c2: u8) -> bool { - !self.is_empty() && (self.first() == c1 || self.first() == c2) + pub fn first_is_digit(&self) -> bool { + if let Some(c) = self.first() { + c.is_ascii_digit() + } else { + false + } } #[inline] - pub fn check_first_digit(&self) -> bool { - !self.is_empty() && self.first().is_ascii_digit() + pub fn first_digit(&self) -> Option { + self.first().and_then(|x| if x.is_ascii_digit() { + Some(x - b'0') + } else { + None + }) } #[inline] - pub fn parse_digits(&mut self, mut func: impl FnMut(u8)) { - while !self.is_empty() && self.first().is_ascii_digit() { - func(self.first() - b'0'); - self.step(); + pub fn try_read_digit(&mut self) -> Option { + if let Some(digit) = self.first_digit() { + // SAFETY: Safe since `first_digit` means the buffer is not empty + unsafe { self.step() }; + Some(digit) + } else { + None } } #[inline] - pub fn check_len(&self, n: usize) -> bool { - let len = self.end as usize - self.ptr as usize; - n <= len + pub fn parse_digits(&mut self, mut func: impl FnMut(u8)) { + while let Some(digit) = self.try_read_digit() { + func(digit); + } } #[inline] pub fn try_read_u64(&self) -> Option { - if self.check_len(8) { - Some(self.read_u64()) + if self.len() >= 8 { + Some(unsafe { self.read_u64_unchecked() }) } else { None } } + /// # Safety + /// + /// Safe if `self.len() >= 8` #[inline] - pub fn read_u64(&self) -> u64 { - debug_assert!(self.check_len(8)); + pub unsafe fn read_u64_unchecked(&self) -> u64 { + debug_assert!(self.len() >= 8, "overflowing buffer: buffer is not 8 bytes long"); let src = self.ptr as *const u64; + // SAFETY: Safe if `self.len() >= 8` u64::from_le(unsafe { ptr::read_unaligned(src) }) } #[inline] pub fn offset_from(&self, other: &Self) -> isize { - isize::wrapping_sub(self.ptr as _, other.ptr as _) // assuming the same end + isize::wrapping_sub(self.ptr as isize, other.ptr as isize) // assuming the same end } } // Most of these are inherently unsafe; we assume we know what we're calling and when. pub trait ByteSlice: AsRef<[u8]> + AsMut<[u8]> { - #[inline] - fn get_at(&self, i: usize) -> u8 { - unsafe { *self.as_ref().get_unchecked(i) } - } - - #[inline] - fn get_first(&self) -> u8 { - debug_assert!(!self.as_ref().is_empty()); - self.get_at(0) - } - #[inline] fn check_first(&self, c: u8) -> bool { - !self.as_ref().is_empty() && self.get_first() == c + self.as_ref().first() == Some(&c) } #[inline] fn check_first2(&self, c1: u8, c2: u8) -> bool { - !self.as_ref().is_empty() && (self.get_first() == c1 || self.get_first() == c2) + if let Some(&c) = self.as_ref().first() { + c == c1 || c == c2 + } else { + false + } } #[inline] fn eq_ignore_case(&self, u: &[u8]) -> bool { - debug_assert!(self.as_ref().len() >= u.len()); - let d = (0..u.len()).fold(0, |d, i| d | self.get_at(i) ^ u.get_at(i)); + let s = self.as_ref(); + if s.len() < u.len() { + return false; + } + let d = (0..u.len()).fold(0, |d, i| d | s[i] ^ u[i]); d == 0 || d == 32 } @@ -145,26 +193,25 @@ pub trait ByteSlice: AsRef<[u8]> + AsMut<[u8]> { s } + /// # Safety + /// + /// Safe if `self.len() >= 8`. #[inline] - fn skip_chars2(&self, c1: u8, c2: u8) -> &[u8] { - let mut s = self.as_ref(); - while !s.is_empty() && (s.get_first() == c1 || s.get_first() == c2) { - s = s.advance(1); - } - s - } - - #[inline] - fn read_u64(&self) -> u64 { + unsafe fn read_u64(&self) -> u64 { debug_assert!(self.as_ref().len() >= 8); let src = self.as_ref().as_ptr() as *const u64; + // SAFETY: safe if `self.len() >= 8`. u64::from_le(unsafe { ptr::read_unaligned(src) }) } + /// # Safety + /// + /// Safe if `self.len() >= 8`. #[inline] - fn write_u64(&mut self, value: u64) { + unsafe fn write_u64(&mut self, value: u64) { debug_assert!(self.as_ref().len() >= 8); let dst = self.as_mut().as_mut_ptr() as *mut u64; + // SAFETY: safe if `self.len() >= 8`. unsafe { ptr::write_unaligned(dst, u64::to_le(value)) }; } } @@ -180,8 +227,8 @@ pub fn is_8digits(v: u64) -> bool { #[inline] pub fn parse_digits(s: &mut &[u8], mut f: impl FnMut(u8)) { - while !s.is_empty() { - let c = s.get_first().wrapping_sub(b'0'); + while let Some(&ch) = s.first() { + let c = ch.wrapping_sub(b'0'); if c < 10 { f(c); *s = s.advance(1); @@ -215,14 +262,14 @@ mod tests { fn test_read_write_u64() { let bytes = b"01234567"; let string = AsciiStr::new(bytes); - let int = string.read_u64(); - assert_eq!(int, 0x3736353433323130); + let int = string.try_read_u64(); + assert_eq!(int, Some(0x3736353433323130)); - let int = bytes.read_u64(); + let int = unsafe { bytes.read_u64() }; assert_eq!(int, 0x3736353433323130); let mut slc = [0u8; 8]; - slc.write_u64(0x3736353433323130); + unsafe { slc.write_u64(0x3736353433323130) }; assert_eq!(&slc, bytes); } } diff --git a/src/decimal.rs b/src/decimal.rs index 2e7aaa0..1c97002 100644 --- a/src/decimal.rs +++ b/src/decimal.rs @@ -189,9 +189,11 @@ impl Decimal { #[inline] pub fn parse_decimal(mut s: &[u8]) -> Decimal { // can't fail since it follows a call to parse_number + assert!(!s.is_empty(), "the buffer cannot be empty since it follows a call to parse_number"); let mut d = Decimal::default(); let start = s; - let c = s.get_first(); + + let c = s[0]; d.negative = c == b'-'; if c == b'-' || c == b'+' { s = s.advance(1); @@ -205,11 +207,13 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal { s = s.skip_chars(b'0'); } while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS { - let v = s.read_u64(); + // SAFETY: Safe since `s.len() >= 8` + let v = unsafe { s.read_u64() }; if !is_8digits(v) { break; } - d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030); + // SAFETY: Safe since `num_digits + 8 < Decimal::MAX_DIGITS` + unsafe { d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030) }; d.num_digits += 8; s = s.advance(8); } diff --git a/src/number.rs b/src/number.rs index b3d95f5..4ad3dee 100644 --- a/src/number.rs +++ b/src/number.rs @@ -84,16 +84,18 @@ fn parse_8digits(mut v: u64) -> u64 { #[inline] fn try_parse_digits(s: &mut AsciiStr<'_>, x: &mut u64) { s.parse_digits(|digit| { - *x = x.wrapping_mul(10).wrapping_add(digit as _); // overflows to be handled later + *x = x.wrapping_mul(10).wrapping_add(digit as u64); // overflows to be handled later }); } #[inline] fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) { - while *x < MIN_19DIGIT_INT && !s.is_empty() && s.first().is_ascii_digit() { - let digit = s.first() - b'0'; - *x = (*x * 10) + digit as u64; // no overflows here - s.step(); + while *x < MIN_19DIGIT_INT { + if let Some(digit) = s.try_read_digit() { + *x = (*x * 10) + digit as u64; // no overflows here + } else { + break; + } } } @@ -105,13 +107,15 @@ fn try_parse_8digits(s: &mut AsciiStr<'_>, x: &mut u64) { *x = x .wrapping_mul(1_0000_0000) .wrapping_add(parse_8digits(v)); - s.step_by(8); + // SAFETY: safe since there is at least 8 bytes from `try_read_u64`. + unsafe { s.step_by(8) }; if let Some(v) = s.try_read_u64() { if is_8digits(v) { *x = x .wrapping_mul(1_0000_0000) .wrapping_add(parse_8digits(v)); - s.step_by(8); + // SAFETY: safe since there is at least 8 bytes from `try_read_u64`. + unsafe { s.step_by(8) }; } } } @@ -120,16 +124,22 @@ fn try_parse_8digits(s: &mut AsciiStr<'_>, x: &mut u64) { #[inline] fn parse_scientific(s: &mut AsciiStr<'_>) -> i64 { + if !s.first_is2(b'e', b'E') { + return 0; + } + // the first character is 'e'/'E' and scientific mode is enabled let start = *s; - s.step(); + // SAFETY: safe since there is at least 1 character which is `e` or `E` + unsafe { s.step() }; let mut exp_num = 0_i64; let mut neg_exp = false; - if !s.is_empty() && s.first_either(b'-', b'+') { + if s.first_is2(b'-', b'+') { neg_exp = s.first_is(b'-'); - s.step(); + // SAFETY: safe since there's at least 1 character in the buffer + unsafe { s.step() }; } - if s.check_first_digit() { + if s.first_is_digit() { s.parse_digits(|digit| { if exp_num < 0x10000 { exp_num = 10 * exp_num + digit as i64; // no overflows here @@ -148,22 +158,24 @@ fn parse_scientific(s: &mut AsciiStr<'_>) -> i64 { #[inline] pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { - debug_assert!(!s.is_empty()); + if s.is_empty() { + return None; + } let mut s = AsciiStr::new(s); let start = s; // handle optional +/- sign let mut negative = false; - if s.first() == b'-' { + if s.step_if(b'-') { negative = true; - if s.step().is_empty() { + if s.is_empty() { return None; } - } else if s.first() == b'+' && s.step().is_empty() { + } else if s.step_if(b'+') && s.is_empty() { return None; } - debug_assert!(!s.is_empty()); + debug_assert!(!s.is_empty(), "should not have empty buffer after sign checks"); // parse initial digits before dot let mut mantissa = 0_u64; @@ -175,8 +187,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { let mut n_after_dot = 0; let mut exponent = 0_i64; let int_end = s; - if s.check_first(b'.') { - s.step(); + if s.step_if(b'.') { let before = s; try_parse_8digits(&mut s, &mut mantissa); try_parse_digits(&mut s, &mut mantissa); @@ -190,11 +201,8 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { } // handle scientific format - let mut exp_number = 0_i64; - if s.check_first_either(b'e', b'E') { - exp_number = parse_scientific(&mut s); - exponent += exp_number; - } + let exp_number = parse_scientific(&mut s); + exponent += exp_number; let len = s.offset_from(&start) as _; @@ -214,9 +222,12 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { n_digits -= 19; let mut many_digits = false; let mut p = digits_start; - while p.check_first_either(b'0', b'.') { - n_digits -= p.first().saturating_sub(b'0' - 1) as isize; // '0' = b'.' + 2 - p.step(); + while p.first_is2(b'0', b'.') { + // SAFETY: safe since there's at least 1 element that is `0` or `.`. + let byte = unsafe { p.first_unchecked() }; + n_digits -= byte.saturating_sub(b'0' - 1) as isize; // '0' = b'.' + 2 + // SAFETY: safe since there's at least 1 element from the `first_is2` check. + unsafe { p.step() }; } if n_digits > 0 { // at this point we have more than 19 significant digits, let's try again @@ -227,7 +238,10 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { exponent = if mantissa >= MIN_19DIGIT_INT { int_end.offset_from(&s) // big int } else { - s.step(); // fractional component, skip the '.' + // SAFETY: safe since `s` is at the digits start, so we have + // at least 1 digit from `ndigits > 0`. + debug_assert!(s.first_is(b'.'), "first character for the fraction must be a decimal"); + unsafe { s.step() }; // fractional component, skip the '.' let before = s; try_parse_19digits(&mut s, &mut mantissa); -s.offset_from(&before) @@ -261,14 +275,14 @@ pub fn parse_inf_nan(s: &[u8]) -> Option<(F, usize)> { } else if s.eq_ignore_case(b"inf") { return Some((F::INFINITY, parse_inf_rest(s))); } else if s.len() >= 4 { - if s.get_first() == b'+' { + if s[0] == b'+' { let s = s.advance(1); if s.eq_ignore_case(b"nan") { return Some((F::NAN, 4)); } else if s.eq_ignore_case(b"inf") { return Some((F::INFINITY, 1 + parse_inf_rest(s))); } - } else if s.get_first() == b'-' { + } else if s[0] == b'-' { let s = s.advance(1); if s.eq_ignore_case(b"nan") { return Some((F::NEG_NAN, 4)); diff --git a/src/parse.rs b/src/parse.rs index 9c592d4..fe9b09e 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -5,10 +5,6 @@ use crate::simple::parse_long_mantissa; #[inline] pub fn parse_float(s: &[u8]) -> Option<(F, usize)> { - if s.is_empty() { - return None; - } - let (num, rest) = match parse_number(s) { Some(r) => r, None => return parse_inf_nan(s), From 6f430e6ea7153e783d730220e423eebd9dc03cb8 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 30 Oct 2024 23:39:13 -0500 Subject: [PATCH 08/20] Ensure checked indexing is used for the power-of-5 table lookup. --- src/binary.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/binary.rs b/src/binary.rs index 1d6eadd..93a16b8 100644 --- a/src/binary.rs +++ b/src/binary.rs @@ -82,7 +82,12 @@ fn compute_product_approx(q: i64, w: u64, precision: usize) -> (u64, u64) { 0xFFFF_FFFF_FFFF_FFFF_u64 }; let index = (q - SMALLEST_POWER_OF_FIVE as i64) as usize; - let (lo5, hi5) = unsafe { *POWER_OF_FIVE_128.get_unchecked(index) }; + // NOTE: this cannot be ellided by the compiler, but the proof the index + // must be within the bounds is non-trivial, especially because this + // comes from a parsed result. Since this is unlikely to have any major + // performance implications, as is determined empirically, we keep the + // bounds check despite the performance hit. + let (lo5, hi5) = POWER_OF_FIVE_128[index]; let (mut first_lo, mut first_hi) = full_multiplication(w, lo5); if first_hi & mask == mask { let (_, second_hi) = full_multiplication(w, hi5); From 4b079a51feb0fdb35cb3039f662cea1824437139 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 30 Oct 2024 23:40:55 -0500 Subject: [PATCH 09/20] Update changelog. --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c8d0e85..2c4d868 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## Unreleased + +- Remove most uses of unsafe. +- Remove non-local safety invariants to prevent unsoundness. + +## 0.2.1 + +- Fix undefined behavior in checking the buffer length. + ## 0.2.0 - Fixed an edge case where long decimals with trailing zeros were truncated. From ea01d16c2cb5c9ce9bde4579673fc8d0fa5a2bf7 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 30 Oct 2024 23:46:53 -0500 Subject: [PATCH 10/20] Add in our rust formatters. --- extras/data-tests/src/main.rs | 10 +---- extras/simple-bench/src/main.rs | 67 ++++++++++++++------------------- rustfmt.toml | 19 ++++++++++ src/binary.rs | 17 ++++++--- src/common.rs | 21 +++++++---- src/decimal.rs | 9 ++++- src/float.rs | 11 +++--- src/lib.rs | 49 ++++++++++++++---------- src/number.rs | 14 +++---- src/simple.rs | 10 +++-- src/table.rs | 10 +++-- tests/test_basic.rs | 44 +++++----------------- 12 files changed, 146 insertions(+), 135 deletions(-) create mode 100644 rustfmt.toml diff --git a/extras/data-tests/src/main.rs b/extras/data-tests/src/main.rs index 3be4d02..5acf599 100644 --- a/extras/data-tests/src/main.rs +++ b/extras/data-tests/src/main.rs @@ -31,10 +31,7 @@ impl TestCase { let (value, len) = r.unwrap(); if len != self.string.len() || value != expected { if len != self.string.len() { - eprintln!( - "Expected empty string remainder, got: {:?}", - self.string.len() - len - ); + eprintln!("Expected empty string remainder, got: {:?}", self.string.len() - len); } if value != expected { eprintln!("Expected output {}, got {}", expected, value); @@ -51,10 +48,7 @@ impl TestCase { fn parse_test_file(filename: impl AsRef) -> impl Iterator { let file = File::open(filename).unwrap(); - BufReader::new(file) - .lines() - .map(Result::unwrap) - .map(TestCase::parse) + BufReader::new(file).lines().map(Result::unwrap).map(TestCase::parse) } fn run_test_cases(filename: impl AsRef) -> usize { diff --git a/extras/simple-bench/src/main.rs b/extras/simple-bench/src/main.rs index 16a520d..aaa2d49 100644 --- a/extras/simple-bench/src/main.rs +++ b/extras/simple-bench/src/main.rs @@ -6,20 +6,14 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use std::time::Instant; +use fast_float2::FastFloat; use fastrand::Rng; use lexical::FromLexical; -use structopt::StructOpt; - -use fast_float2::FastFloat; - use random::RandomGen; +use structopt::StructOpt; #[derive(Debug, StructOpt)] -#[structopt( - name = "fast-float-simple-bench", - about = "fast-float benchmark utility", - no_version -)] +#[structopt(name = "fast-float-simple-bench", about = "fast-float benchmark utility", no_version)] struct Opt { /// Parse numbers as float32 (default is float64) #[structopt(short, long = "32")] @@ -146,9 +140,7 @@ impl Method { fast_float::parse_partial::(s).unwrap_or_default().0 }), Self::Lexical => run_bench(data, repeat, |s: &str| { - lexical_core::parse_partial::(s.as_bytes()) - .unwrap_or_default() - .0 + lexical_core::parse_partial::(s.as_bytes()).unwrap_or_default().0 }), Self::FromStr => run_bench(data, repeat, |s: &str| s.parse::().unwrap_or_default()), }; @@ -180,12 +172,8 @@ fn print_report(results: &[BenchResult], title: &str) { println!("| {:^width$} |", title, width = width); println!("|{:=>(); + let mut metrics = res.times.iter().map(|&t| transform(t, n, b)).collect::>(); metrics.sort_by(|a, b| a.partial_cmp(b).unwrap()); for &(_, idx) in columns { print!("{:>w$.2}", metrics[idx], w = w); @@ -240,23 +224,23 @@ struct Input { impl Input { pub fn from_file(filename: impl AsRef) -> Self { let filename = filename.as_ref(); - let data = fs::read_to_string(&filename) - .unwrap() - .trim() - .lines() - .map(String::from) - .collect(); + let data = + fs::read_to_string(&filename).unwrap().trim().lines().map(String::from).collect(); let name = filename.file_name().unwrap().to_str().unwrap().into(); - Self { data, name } + Self { + data, + name, + } } pub fn from_random(gen: RandomGen, count: usize, seed: u64) -> Self { let mut rng = Rng::with_seed(seed); - let data = iter::repeat_with(|| gen.gen(&mut rng)) - .take(count) - .collect(); + let data = iter::repeat_with(|| gen.gen(&mut rng)).take(count).collect(); let name = format!("{}", gen); - Self { data, name } + Self { + data, + name, + } } pub fn count(&self) -> usize { @@ -281,14 +265,16 @@ impl Input { fn main() { let opt: Opt = StructOpt::from_args(); - let methods = if !opt.only_fast_float && !matches!(&opt.command, &Cmd::All {..}) { + let methods = if !opt.only_fast_float && !matches!(&opt.command, &Cmd::All { .. }) { Method::all().into() } else { vec![Method::FastFloat2] }; let inputs = match opt.command { - Cmd::File { filename } => vec![Input::from_file(filename)], + Cmd::File { + filename, + } => vec![Input::from_file(filename)], Cmd::Random { gen, count, @@ -300,8 +286,11 @@ fn main() { fs::write(filename, input.data.join("\n")).unwrap(); } vec![input] - } - Cmd::All { count, seed } => { + }, + Cmd::All { + count, + seed, + } => { let mut inputs = vec![]; let data_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("ext/data"); inputs.push(Input::from_file(data_dir.join("mesh.txt"))); @@ -310,7 +299,7 @@ fn main() { inputs.push(Input::from_random(gen, count, seed)) } inputs - } + }, }; let mut results = vec![]; diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..164014c --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,19 @@ +# Requires nightly to do proper formatting. +use_small_heuristics = "Off" +use_field_init_shorthand = true +trailing_semicolon = true +newline_style = "Unix" +match_block_trailing_comma = true +empty_item_single_line = false +enum_discrim_align_threshold = 40 +fn_params_layout = "Tall" +fn_single_line = false +format_macro_matchers = true +format_macro_bodies = true +imports_indent = "Block" +imports_layout = "HorizontalVertical" +indent_style = "Block" +match_arm_blocks = true +overflow_delimited_expr = true +group_imports = "StdExternalCrate" +wrap_comments = true diff --git a/src/binary.rs b/src/binary.rs index 93a16b8..da2751f 100644 --- a/src/binary.rs +++ b/src/binary.rs @@ -33,7 +33,10 @@ pub fn compute_float(q: i64, mut w: u64) -> AdjustedMantissa { mantissa += mantissa & 1; mantissa >>= 1; power2 = (mantissa >= (1_u64 << F::MANTISSA_EXPLICIT_BITS)) as i32; - return AdjustedMantissa { mantissa, power2 }; + return AdjustedMantissa { + mantissa, + power2, + }; } if lo <= 1 && q >= F::MIN_EXPONENT_ROUND_TO_EVEN as i64 @@ -53,7 +56,10 @@ pub fn compute_float(q: i64, mut w: u64) -> AdjustedMantissa { if power2 >= F::INFINITE_POWER { return am_inf; } - AdjustedMantissa { mantissa, power2 } + AdjustedMantissa { + mantissa, + power2, + } } #[inline] @@ -67,9 +73,10 @@ fn full_multiplication(a: u64, b: u64) -> (u64, u64) { (r as u64, (r >> 64) as u64) } -// This will compute or rather approximate w * 5**q and return a pair of 64-bit words -// approximating the result, with the "high" part corresponding to the most significant -// bits and the low part corresponding to the least significant bits. +// This will compute or rather approximate w * 5**q and return a pair of 64-bit +// words approximating the result, with the "high" part corresponding to the +// most significant bits and the low part corresponding to the least significant +// bits. #[inline] fn compute_product_approx(q: i64, w: u64, precision: usize) -> (u64, u64) { debug_assert!(q >= SMALLEST_POWER_OF_FIVE as i64); diff --git a/src/common.rs b/src/common.rs index 3c6b9a6..97e514f 100644 --- a/src/common.rs +++ b/src/common.rs @@ -27,7 +27,10 @@ impl<'a> AsciiStr<'a> { /// Safe if `n <= self.len()` #[inline] pub unsafe fn step_by(&mut self, n: usize) -> &mut Self { - debug_assert!(n <= self.len(), "buffer overflow: stepping by greater than our buffer length."); + debug_assert!( + n <= self.len(), + "buffer overflow: stepping by greater than our buffer length." + ); // SAFETY: Safe if `n <= self.len()` unsafe { self.ptr = self.ptr.add(n) }; self @@ -102,10 +105,12 @@ impl<'a> AsciiStr<'a> { #[inline] pub fn first_digit(&self) -> Option { - self.first().and_then(|x| if x.is_ascii_digit() { - Some(x - b'0') - } else { - None + self.first().and_then(|x| { + if x.is_ascii_digit() { + Some(x - b'0') + } else { + None + } }) } @@ -153,7 +158,8 @@ impl<'a> AsciiStr<'a> { } } -// Most of these are inherently unsafe; we assume we know what we're calling and when. +// Most of these are inherently unsafe; we assume we know what we're calling and +// when. pub trait ByteSlice: AsRef<[u8]> + AsMut<[u8]> { #[inline] fn check_first(&self, c: u8) -> bool { @@ -216,7 +222,8 @@ pub trait ByteSlice: AsRef<[u8]> + AsMut<[u8]> { } } -impl ByteSlice for [u8] {} +impl ByteSlice for [u8] { +} #[inline] pub fn is_8digits(v: u64) -> bool { diff --git a/src/decimal.rs b/src/decimal.rs index 1c97002..135137c 100644 --- a/src/decimal.rs +++ b/src/decimal.rs @@ -33,7 +33,8 @@ impl PartialEq for Decimal { } } -impl Eq for Decimal {} +impl Eq for Decimal { +} impl Default for Decimal { fn default() -> Self { @@ -253,7 +254,11 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal { exp_num = 10 * exp_num + digit as i32; } }); - d.decimal_point += if neg_exp { -exp_num } else { exp_num }; + d.decimal_point += if neg_exp { + -exp_num + } else { + exp_num + }; } for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW { d.digits[i] = 0; diff --git a/src/float.rs b/src/float.rs index b4e9c68..fbb2a0a 100644 --- a/src/float.rs +++ b/src/float.rs @@ -45,7 +45,8 @@ pub trait Float: fn pow10_fast_path(exponent: usize) -> Self; } -impl private::Sealed for f32 {} +impl private::Sealed for f32 { +} impl Float for f32 { const INFINITY: Self = core::f32::INFINITY; @@ -78,14 +79,14 @@ impl Float for f32 { #[inline] fn pow10_fast_path(exponent: usize) -> Self { #[allow(clippy::use_self)] - const TABLE: [f32; 16] = [ - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0., - ]; + const TABLE: [f32; 16] = + [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0.]; TABLE[exponent & 15] } } -impl private::Sealed for f64 {} +impl private::Sealed for f64 { +} impl Float for f64 { const INFINITY: Self = core::f64::INFINITY; diff --git a/src/lib.rs b/src/lib.rs index a90ad36..1a4a4c2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,19 +1,24 @@ -//! This crate provides a super-fast decimal number parser from strings into floats. +//! This crate provides a super-fast decimal number parser from strings into +//! floats. //! //! ## Usage //! //! There's two top-level functions provided: [`parse`](crate::parse()) and //! [`parse_partial`](crate::parse_partial()), both taking -//! either a string or a bytes slice and parsing the input into either `f32` or `f64`: +//! either a string or a bytes slice and parsing the input into either `f32` or +//! `f64`: //! -//! - [`parse`](crate::parse()) treats the whole string as a decimal number and returns an -//! error if there are invalid characters or if the string is empty. -//! - [`parse_partial`](crate::parse_partial()) tries to find the longest substring at the -//! beginning of the given input string that can be parsed as a decimal number and, -//! in the case of success, returns the parsed value along the number of characters processed; -//! an error is returned if the string doesn't start with a decimal number or if it is empty. -//! This function is most useful as a building block when constructing more complex parsers, -//! or when parsing streams of data. +//! - [`parse`](crate::parse()) treats the whole string as a decimal number and +//! returns an error if there are invalid characters or if the string is +//! empty. +//! - [`parse_partial`](crate::parse_partial()) tries to find the longest +//! substring at the +//! beginning of the given input string that can be parsed as a decimal number +//! and, in the case of success, returns the parsed value along the number of +//! characters processed; an error is returned if the string doesn't start with +//! a decimal number or if it is empty. This function is most useful as a +//! building block when constructing more complex parsers, or when parsing +//! streams of data. //! //! ## Examples //! @@ -94,21 +99,24 @@ pub trait FastFloat: float::Float { /// Parse a decimal number from string into float (partial). /// - /// This method parses as many characters as possible and returns the resulting number along - /// with the number of digits processed (in case of success, this number is always positive). + /// This method parses as many characters as possible and returns the + /// resulting number along with the number of digits processed (in case + /// of success, this number is always positive). /// /// # Errors /// - /// Will return an error either if the string doesn't start with a valid decimal number - /// – that is, if no zero digits were processed. + /// Will return an error either if the string doesn't start with a valid + /// decimal number – that is, if no zero digits were processed. #[inline] fn parse_float_partial>(s: S) -> Result<(Self, usize)> { parse::parse_float(s.as_ref()).ok_or(Error) } } -impl FastFloat for f32 {} -impl FastFloat for f64 {} +impl FastFloat for f32 { +} +impl FastFloat for f64 { +} /// Parse a decimal number from string into float (full). /// @@ -123,13 +131,14 @@ pub fn parse>(s: S) -> Result { /// Parse a decimal number from string into float (partial). /// -/// This function parses as many characters as possible and returns the resulting number along -/// with the number of digits processed (in case of success, this number is always positive). +/// This function parses as many characters as possible and returns the +/// resulting number along with the number of digits processed (in case of +/// success, this number is always positive). /// /// # Errors /// -/// Will return an error either if the string doesn't start with a valid decimal number -/// – that is, if no zero digits were processed. +/// Will return an error either if the string doesn't start with a valid decimal +/// number – that is, if no zero digits were processed. #[inline] pub fn parse_partial>(s: S) -> Result<(T, usize)> { T::parse_float_partial(s) diff --git a/src/number.rs b/src/number.rs index 4ad3dee..eaf8548 100644 --- a/src/number.rs +++ b/src/number.rs @@ -84,7 +84,8 @@ fn parse_8digits(mut v: u64) -> u64 { #[inline] fn try_parse_digits(s: &mut AsciiStr<'_>, x: &mut u64) { s.parse_digits(|digit| { - *x = x.wrapping_mul(10).wrapping_add(digit as u64); // overflows to be handled later + // overflows to be handled later + *x = x.wrapping_mul(10).wrapping_add(digit as u64); }); } @@ -104,16 +105,12 @@ fn try_parse_8digits(s: &mut AsciiStr<'_>, x: &mut u64) { // may cause overflows, to be handled later if let Some(v) = s.try_read_u64() { if is_8digits(v) { - *x = x - .wrapping_mul(1_0000_0000) - .wrapping_add(parse_8digits(v)); + *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v)); // SAFETY: safe since there is at least 8 bytes from `try_read_u64`. unsafe { s.step_by(8) }; if let Some(v) = s.try_read_u64() { if is_8digits(v) { - *x = x - .wrapping_mul(1_0000_0000) - .wrapping_add(parse_8digits(v)); + *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v)); // SAFETY: safe since there is at least 8 bytes from `try_read_u64`. unsafe { s.step_by(8) }; } @@ -225,7 +222,8 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { while p.first_is2(b'0', b'.') { // SAFETY: safe since there's at least 1 element that is `0` or `.`. let byte = unsafe { p.first_unchecked() }; - n_digits -= byte.saturating_sub(b'0' - 1) as isize; // '0' = b'.' + 2 + // '0' = b'.' + 2 + n_digits -= byte.saturating_sub(b'0' - 1) as isize; // SAFETY: safe since there's at least 1 element from the `first_is2` check. unsafe { p.step() }; } diff --git a/src/simple.rs b/src/simple.rs index cec1183..6db94dd 100644 --- a/src/simple.rs +++ b/src/simple.rs @@ -6,9 +6,8 @@ use crate::float::Float; pub fn parse_long_mantissa(s: &[u8]) -> AdjustedMantissa { const MAX_SHIFT: usize = 60; const NUM_POWERS: usize = 19; - const POWERS: [u8; 19] = [ - 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59, - ]; + const POWERS: [u8; 19] = + [0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59]; let get_shift = |n| { if n < NUM_POWERS { @@ -81,5 +80,8 @@ pub fn parse_long_mantissa(s: &[u8]) -> AdjustedMantissa { power2 -= 1; } mantissa &= (1_u64 << F::MANTISSA_EXPLICIT_BITS) - 1; - AdjustedMantissa { mantissa, power2 } + AdjustedMantissa { + mantissa, + power2, + } } diff --git a/src/table.rs b/src/table.rs index ede4bae..47c700e 100644 --- a/src/table.rs +++ b/src/table.rs @@ -4,10 +4,10 @@ pub const N_POWERS_OF_FIVE: usize = (LARGEST_POWER_OF_FIVE - SMALLEST_POWER_OF_F #[cfg(test)] mod tests { - use super::*; - use num_bigint::BigUint; + use super::*; + fn compute_pow5_128(q: i32) -> (u64, u64) { let mut c = if q < 0 { let pow5 = BigUint::from(5_u8).pow((-q) as u32); @@ -15,7 +15,11 @@ mod tests { while (BigUint::from(1_u8) << z) < pow5 { z += 1; } - let b = if q < -27 { 2 * z + 128 } else { z + 127 }; + let b = if q < -27 { + 2 * z + 128 + } else { + z + 127 + }; (BigUint::from(1_u8) << b) / pow5 + BigUint::from(1_u8) } else { BigUint::from(5_u8).pow(q as u32) diff --git a/tests/test_basic.rs b/tests/test_basic.rs index f6eb4c8..0086c97 100644 --- a/tests/test_basic.rs +++ b/tests/test_basic.rs @@ -14,10 +14,10 @@ macro_rules! check { ($ty:ident, $s:expr) => {{ check!($ty, stringify!($s), $s) }}; - ($ty:ident, $s:expr, inf) => {{ + ($ty:ident, $s:expr,inf) => {{ check!($ty, $s, core::$ty::INFINITY) }}; - ($ty:ident, $s:expr, neg_inf) => {{ + ($ty:ident, $s:expr,neg_inf) => {{ check!($ty, $s, core::$ty::NEG_INFINITY) }}; ($ty:ident, $s:expr, $e:expr) => {{ @@ -212,24 +212,12 @@ fn test_f64_general() { check_f64!("9007199254740993.0", hexf64("0x1.p+53")); check_f64!(append_zeros("9007199254740993.0", 1000), hexf64("0x1.p+53")); check_f64!("10000000000000000000", hexf64("0x1.158e460913dp+63")); - check_f64!( - "10000000000000000000000000000001000000000000", - hexf64("0x1.cb2d6f618c879p+142") - ); - check_f64!( - "10000000000000000000000000000000000000000001", - hexf64("0x1.cb2d6f618c879p+142") - ); + check_f64!("10000000000000000000000000000001000000000000", hexf64("0x1.cb2d6f618c879p+142")); + check_f64!("10000000000000000000000000000000000000000001", hexf64("0x1.cb2d6f618c879p+142")); check_f64!(1.1920928955078125e-07); check_f64!("-0", -0.0); - check_f64!( - "1.0000000000000006661338147750939242541790008544921875", - 1.0000000000000007 - ); - check_f64!( - "1090544144181609348835077142190", - hexf64("0x1.b8779f2474dfbp+99") - ); + check_f64!("1.0000000000000006661338147750939242541790008544921875", 1.0000000000000007); + check_f64!("1090544144181609348835077142190", hexf64("0x1.b8779f2474dfbp+99")); check_f64!(2.2250738585072013e-308); check_f64!(-92666518056446206563E3); check_f64!(-92666518056446206563E3); @@ -255,10 +243,7 @@ fn test_f64_general() { check_f64!(-2.1470977154320536489471030463761883783915110400000000000000000000e+45); check_f64!(-4.4900312744003159009338275160799498340862630046359789166919680000e+61); check_f64!("+1", 1.0); - check_f64!( - "1.797693134862315700000000000000001e308", - 1.7976931348623157e308 - ); + check_f64!("1.797693134862315700000000000000001e308", 1.7976931348623157e308); check_f64!("3e-324", hexf64("0x0.0000000000001p-1022")); check_f64!("1.00000006e+09", hexf64("0x1.dcd651ep+29")); check_f64!("4.9406564584124653e-324", hexf64("0x0.0000000000001p-1022")); @@ -303,18 +288,9 @@ fn test_f32_basic() { 7699724722770042717456817626953125"; check_f32!(f1, hexf32("0x1.2ced3p+0")); check_f32!(format!("{}e-38", f1), hexf32("0x1.fffff8p-127")); - check_f32!( - format!("{}e-38", append_zeros(f1, 655)), - hexf32("0x1.fffff8p-127") - ); - check_f32!( - format!("{}e-38", append_zeros(f1, 656)), - hexf32("0x1.fffff8p-127") - ); - check_f32!( - format!("{}e-38", append_zeros(f1, 1000)), - hexf32("0x1.fffff8p-127") - ); + check_f32!(format!("{}e-38", append_zeros(f1, 655)), hexf32("0x1.fffff8p-127")); + check_f32!(format!("{}e-38", append_zeros(f1, 656)), hexf32("0x1.fffff8p-127")); + check_f32!(format!("{}e-38", append_zeros(f1, 1000)), hexf32("0x1.fffff8p-127")); check_f32!(1.00000006e+09); check_f32!(1.4012984643e-45); check_f32!(1.1754942107e-38); From 90b75230bce4a83dcab0c2929ac5f052fd1ef1f3 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 31 Oct 2024 00:06:12 -0500 Subject: [PATCH 11/20] Add our clippy lints. --- clippy.toml | 20 ++++++++++++++++++++ src/binary.rs | 2 +- src/common.rs | 34 +++++++++++++--------------------- src/decimal.rs | 5 +++-- src/float.rs | 6 +++--- src/lib.rs | 26 ++++++++++++++++++-------- src/number.rs | 9 +++++---- src/simple.rs | 2 +- 8 files changed, 64 insertions(+), 40 deletions(-) create mode 100644 clippy.toml diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 0000000..8bd81b7 --- /dev/null +++ b/clippy.toml @@ -0,0 +1,20 @@ +avoid-breaking-exported-api = false +disallowed-macros = [ + # Can also use an inline table with a `path` key. + { path = "std::print", reason = "no IO allowed" }, + { path = "std::println", reason = "no IO allowed" }, + { path = "std::format", reason = "no string allocation allowed" }, + { path = "std::debug", reason = "debugging macros should not be present in any release" }, + # NOTE: unimplemented is fine because this can be for intentionally disabled methods + { path = "std::todo", reason = "should never have TODO macros in releases" }, +] +disallowed-methods = [ + { path = "std::io::stdout", reason = "no IO allowed" }, + { path = "std::io::stdin", reason = "no IO allowed" }, + { path = "std::io::stderr", reason = "no IO allowed" }, +] +disallowed-types = [ + { path = "std::io::File", reason = "no IO allowed" }, + { path = "std::io::BufReader", reason = "need our own abstractions for reading/writing" }, + { path = "std::io::BufWriter", reason = "need our own abstractions for reading/writing" }, +] diff --git a/src/binary.rs b/src/binary.rs index da2751f..88d24ec 100644 --- a/src/binary.rs +++ b/src/binary.rs @@ -17,7 +17,7 @@ pub fn compute_float(q: i64, mut w: u64) -> AdjustedMantissa { w <<= lz; let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS + 3); if lo == 0xFFFF_FFFF_FFFF_FFFF { - let inside_safe_exponent = (q >= -27) && (q <= 55); + let inside_safe_exponent = (-27..=55).contains(&q); if !inside_safe_exponent { return am_error; } diff --git a/src/common.rs b/src/common.rs index 97e514f..a9afc6a 100644 --- a/src/common.rs +++ b/src/common.rs @@ -14,7 +14,7 @@ impl<'a> AsciiStr<'a> { Self { ptr: s.as_ptr(), end: unsafe { s.as_ptr().add(s.len()) }, - _marker: PhantomData::default(), + _marker: PhantomData, } } @@ -72,11 +72,11 @@ impl<'a> AsciiStr<'a> { #[inline] pub fn first(&self) -> Option { - if !self.is_empty() { + if self.is_empty() { + None + } else { // SAFETY: safe since `!self.is_empty()` Some(unsafe { self.first_unchecked() }) - } else { - None } } @@ -87,20 +87,12 @@ impl<'a> AsciiStr<'a> { #[inline] pub fn first_is2(&self, c1: u8, c2: u8) -> bool { - if let Some(c) = self.first() { - c == c1 || c == c2 - } else { - false - } + self.first().map_or(false, |c| c == c1 || c == c2) } #[inline] pub fn first_is_digit(&self) -> bool { - if let Some(c) = self.first() { - c.is_ascii_digit() - } else { - false - } + self.first().map_or(false, |c| c.is_ascii_digit()) } #[inline] @@ -116,13 +108,10 @@ impl<'a> AsciiStr<'a> { #[inline] pub fn try_read_digit(&mut self) -> Option { - if let Some(digit) = self.first_digit() { - // SAFETY: Safe since `first_digit` means the buffer is not empty - unsafe { self.step() }; - Some(digit) - } else { - None - } + let digit = self.first_digit()?; + // SAFETY: Safe since `first_digit` means the buffer is not empty + unsafe { self.step() }; + Some(digit) } #[inline] @@ -145,6 +134,7 @@ impl<'a> AsciiStr<'a> { /// /// Safe if `self.len() >= 8` #[inline] + #[allow(clippy::cast_ptr_alignment)] pub unsafe fn read_u64_unchecked(&self) -> u64 { debug_assert!(self.len() >= 8, "overflowing buffer: buffer is not 8 bytes long"); let src = self.ptr as *const u64; @@ -203,6 +193,7 @@ pub trait ByteSlice: AsRef<[u8]> + AsMut<[u8]> { /// /// Safe if `self.len() >= 8`. #[inline] + #[allow(clippy::cast_ptr_alignment)] unsafe fn read_u64(&self) -> u64 { debug_assert!(self.as_ref().len() >= 8); let src = self.as_ref().as_ptr() as *const u64; @@ -214,6 +205,7 @@ pub trait ByteSlice: AsRef<[u8]> + AsMut<[u8]> { /// /// Safe if `self.len() >= 8`. #[inline] + #[allow(clippy::cast_ptr_alignment)] unsafe fn write_u64(&mut self, value: u64) { debug_assert!(self.as_ref().len() >= 8); let dst = self.as_mut().as_mut_ptr() as *mut u64; diff --git a/src/decimal.rs b/src/decimal.rs index 135137c..74342f2 100644 --- a/src/decimal.rs +++ b/src/decimal.rs @@ -87,7 +87,7 @@ impl Decimal { if dp < self.num_digits { round_up = self.digits[dp] >= 5; if self.digits[dp] == 5 && dp + 1 == self.num_digits { - round_up = self.truncated || ((dp != 0) && (1 & self.digits[dp - 1] != 0)) + round_up = self.truncated || ((dp != 0) && (1 & self.digits[dp - 1] != 0)); } } if round_up { @@ -267,6 +267,7 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal { } #[inline] +#[allow(clippy::redundant_else)] fn number_of_digits_decimal_left_shift(d: &Decimal, mut shift: usize) -> usize { const TABLE: [u16; 65] = [ 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, 0x181D, 0x2024, @@ -326,7 +327,7 @@ fn number_of_digits_decimal_left_shift(d: &Decimal, mut shift: usize) -> usize { shift &= 63; let x_a = TABLE[shift]; let x_b = TABLE[shift + 1]; - let num_new_digits = (x_a >> 11) as _; + let num_new_digits = (x_a >> 11) as usize; let pow5_a = (0x7FF & x_a) as usize; let pow5_b = (0x7FF & x_b) as usize; let pow5 = &TABLE_POW5[pow5_a..]; diff --git a/src/float.rs b/src/float.rs index fbb2a0a..dee116e 100644 --- a/src/float.rs +++ b/src/float.rs @@ -68,12 +68,12 @@ impl Float for f32 { #[inline] fn from_u64(v: u64) -> Self { - v as _ + v as f32 } #[inline] fn from_u64_bits(v: u64) -> Self { - f32::from_bits((v & 0xFFFFFFFF) as u32) + f32::from_bits((v & 0xFFFF_FFFF) as u32) } #[inline] @@ -108,7 +108,7 @@ impl Float for f64 { #[inline] fn from_u64(v: u64) -> Self { - v as _ + v as f64 } #[inline] diff --git a/src/lib.rs b/src/lib.rs index 1a4a4c2..5dd4029 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,13 +12,12 @@ //! returns an error if there are invalid characters or if the string is //! empty. //! - [`parse_partial`](crate::parse_partial()) tries to find the longest -//! substring at the -//! beginning of the given input string that can be parsed as a decimal number -//! and, in the case of success, returns the parsed value along the number of -//! characters processed; an error is returned if the string doesn't start with -//! a decimal number or if it is empty. This function is most useful as a -//! building block when constructing more complex parsers, or when parsing -//! streams of data. +//! substring at the beginning of the given input string that can be parsed as +//! a decimal number and, in the case of success, returns the parsed value +//! along the number of characters processed; an error is returned if the +//! string doesn't start with a decimal number or if it is empty. This +//! function is most useful as a building block when constructing more complex +//! parsers, or when parsing streams of data. //! //! ## Examples //! @@ -36,7 +35,17 @@ //! assert_eq!(&s[n..], "foo"); //! ``` +#![allow(unused_unsafe)] +#![warn(unsafe_op_in_unsafe_fn)] #![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)] +#![deny( + clippy::doc_markdown, + clippy::unnecessary_safety_comment, + clippy::semicolon_if_nothing_returned, + clippy::unwrap_used, + clippy::as_underscore, + clippy::doc_markdown +)] #![allow( clippy::cast_possible_truncation, clippy::cast_possible_wrap, @@ -46,7 +55,8 @@ clippy::missing_const_for_fn, clippy::use_self, clippy::module_name_repetitions, - clippy::cargo_common_metadata + clippy::cargo_common_metadata, + clippy::struct_field_names )] use core::fmt::{self, Display}; diff --git a/src/number.rs b/src/number.rs index eaf8548..a13c5be 100644 --- a/src/number.rs +++ b/src/number.rs @@ -3,6 +3,7 @@ use crate::float::Float; const MIN_19DIGIT_INT: u64 = 100_0000_0000_0000_0000; +#[allow(clippy::unreadable_literal)] pub const INT_POW10: [u64; 16] = [ 1, 10, @@ -46,9 +47,9 @@ impl Number { // normal fast path let value = F::from_u64(self.mantissa); if self.exponent < 0 { - value / F::pow10_fast_path((-self.exponent) as _) + value / F::pow10_fast_path((-self.exponent) as usize) } else { - value * F::pow10_fast_path(self.exponent as _) + value * F::pow10_fast_path(self.exponent as usize) } } else { // disguised fast path @@ -57,7 +58,7 @@ impl Number { if mantissa > F::MAX_MANTISSA_FAST_PATH { return None; } - F::from_u64(mantissa) * F::pow10_fast_path(F::MAX_EXPONENT_FAST_PATH as _) + F::from_u64(mantissa) * F::pow10_fast_path(F::MAX_EXPONENT_FAST_PATH as usize) }; if self.negative { value = -value; @@ -201,7 +202,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { let exp_number = parse_scientific(&mut s); exponent += exp_number; - let len = s.offset_from(&start) as _; + let len = s.offset_from(&start) as usize; // handle uncommon case with many digits if n_digits <= 19 { diff --git a/src/simple.rs b/src/simple.rs index 6db94dd..f3a724a 100644 --- a/src/simple.rs +++ b/src/simple.rs @@ -45,7 +45,7 @@ pub fn parse_long_mantissa(s: &[u8]) -> AdjustedMantissa { _ => 1, } } else { - get_shift((-d.decimal_point) as _) + get_shift((-d.decimal_point) as usize) }; d.left_shift(shift); if d.decimal_point > Decimal::DECIMAL_POINT_RANGE { From af2ce36bfc4456d75417653ed83a53d4ad5a464c Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 31 Oct 2024 00:14:14 -0500 Subject: [PATCH 12/20] Add linters to our CI. --- .github/workflows/ci.yml | 3 +-- .github/workflows/lint.yml | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9ccc3a2..f03cf60 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,8 +1,7 @@ name: CI on: - push: - pull_request: + [push, pull_request, workflow_dispatch] jobs: test: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..0da680d --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,21 @@ +name: Lint + +on: + [push, pull_request, workflow_dispatch] + +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + components: rustfmt, clippy + - run: cargo check + - run: cargo fmt -- --check + - run: RUSTFLAGS="--deny warnings" cargo build + - run: cargo clippy --all-features -- --deny warnings From b3588daeff4e3369a9d84a2b5cc492702ecada72 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 31 Oct 2024 00:16:22 -0500 Subject: [PATCH 13/20] Patch clippy lints. --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 5dd4029..8d6dd5a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,7 +82,7 @@ impl Display for Error { #[cfg(feature = "std")] impl std::error::Error for Error { - fn description(&self) -> &str { + fn description(&self) -> &'static str { "error while parsing a float" } } From 86c93dd7a1c0ec7f3e9e38089539f825c737a172 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 31 Oct 2024 00:18:51 -0500 Subject: [PATCH 14/20] Add no_std support. Related to: https://github.com/aldanor/fast-float-rust/pull/33 --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index 8d6dd5a..d6793ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,6 +35,7 @@ //! assert_eq!(&s[n..], "foo"); //! ``` +#![cfg_attr(not(feature = "std"), no_std)] #![allow(unused_unsafe)] #![warn(unsafe_op_in_unsafe_fn)] #![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)] From a16408383f6eca565d5ac1d8bbe068b8c29b3c95 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 31 Oct 2024 00:24:30 -0500 Subject: [PATCH 15/20] Update our changelog for no_std support. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c4d868..16c3b36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## Unreleased +- Fix `no_std` support. - Remove most uses of unsafe. - Remove non-local safety invariants to prevent unsoundness. From 31d1abf7c6232f35eb069c64b8290e6fa92802b6 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 31 Oct 2024 00:38:24 -0500 Subject: [PATCH 16/20] Remove unsafety for the v0.2.2 release. --- CHANGELOG.md | 2 +- Cargo.toml | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 16c3b36..4f415ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## Unreleased +## 0.2.2 - Fix `no_std` support. - Remove most uses of unsafe. diff --git a/Cargo.toml b/Cargo.toml index de3d776..3531bb8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fast-float2" -version = "0.2.1" +version = "0.2.2" authors = ["Ivan Smirnov ", "Alex Huszagh "] repository = "https://github.com/Alexhuszagh/fast-float-rust" documentation = "https://docs.rs/fast-float2" diff --git a/README.md b/README.md index 72118a3..043f094 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ This crate provides a super-fast decimal number parser from strings into floats. ```toml [dependencies] -fast-float2 = "0.2.1" +fast-float2 = "0.2.2" ``` There are no dependencies and the crate can be used in a no_std context by disabling the "std" feature. From 04cd614a082b5b6fbc59638c183dea1b40899f68 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 31 Oct 2024 07:46:26 -0500 Subject: [PATCH 17/20] Add miri and fuzz targets to our CI. --- .github/workflows/fuzz.yml | 25 +++++++++++++++++++++++++ .github/workflows/miri.yml | 21 +++++++++++++++++++++ fuzz/Cargo.toml | 4 ++-- src/table.rs | 1 + 4 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/fuzz.yml create mode 100644 .github/workflows/miri.yml diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 0000000..4825e6a --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,25 @@ +name: Fuzz + +on: + [pull_request, workflow_dispatch] + +jobs: + fuzz: + name: Fuzz ${{matrix.target}} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + target: [fast_float, roundtrip_f64] + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + - run: cargo install cargo-fuzz + - run: cargo check + - run: | + cd fuzz + cargo +nightly fuzz run --release ${{matrix.target}} -- -max_total_time=300 diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml new file mode 100644 index 0000000..112f661 --- /dev/null +++ b/.github/workflows/miri.yml @@ -0,0 +1,21 @@ +name: Miri + +on: + [pull_request, workflow_dispatch] + +jobs: + miri: + name: Miri + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: dtolnay/rust-toolchain@stable + with: + toolchain: nightly + - run: cargo check + - run: cargo build + - run: | + rustup component add --toolchain nightly miri + cargo miri test diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 426cf5e..c019851 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -12,8 +12,8 @@ cargo-fuzz = true members = ["."] [dependencies] -fast-float = { path = ".." } -libfuzzer-sys = "0.3" +fast-float2 = { path = ".." } +libfuzzer-sys = "0.4.7" ryu = "1.0" [[bin]] diff --git a/src/table.rs b/src/table.rs index 47c700e..49f4855 100644 --- a/src/table.rs +++ b/src/table.rs @@ -41,6 +41,7 @@ mod tests { } #[test] + #[cfg_attr(miri, ignore)] fn test_pow5_table() { for q in SMALLEST_POWER_OF_FIVE..=LARGEST_POWER_OF_FIVE { let (hi, lo) = compute_pow5_128(q); From 2cc68894b15310afd7e560e2237b41d3906d2118 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 31 Oct 2024 15:26:36 -0500 Subject: [PATCH 18/20] Ensure our lengths use `isize` and not `usize` for checks. --- Cargo.toml | 2 +- README.md | 2 +- src/common.rs | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3531bb8..14198ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fast-float2" -version = "0.2.2" +version = "0.2.3" authors = ["Ivan Smirnov ", "Alex Huszagh "] repository = "https://github.com/Alexhuszagh/fast-float-rust" documentation = "https://docs.rs/fast-float2" diff --git a/README.md b/README.md index 043f094..635bab5 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ This crate provides a super-fast decimal number parser from strings into floats. ```toml [dependencies] -fast-float2 = "0.2.2" +fast-float2 = "0.2.3" ``` There are no dependencies and the crate can be used in a no_std context by disabling the "std" feature. diff --git a/src/common.rs b/src/common.rs index a9afc6a..2484437 100644 --- a/src/common.rs +++ b/src/common.rs @@ -18,8 +18,8 @@ impl<'a> AsciiStr<'a> { } } - pub fn len(&self) -> usize { - self.end as usize - self.ptr as usize + pub fn len(&self) -> isize { + self.end as isize - self.ptr as isize } /// # Safety @@ -28,7 +28,7 @@ impl<'a> AsciiStr<'a> { #[inline] pub unsafe fn step_by(&mut self, n: usize) -> &mut Self { debug_assert!( - n <= self.len(), + n < isize::MAX as usize && n as isize <= self.len(), "buffer overflow: stepping by greater than our buffer length." ); // SAFETY: Safe if `n <= self.len()` From 53bce57e399865651cd25f3abc0e6d7c72e03f17 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 31 Oct 2024 16:55:22 -0500 Subject: [PATCH 19/20] Fix the `MAX` to `max_value()` for ancient rustc support. --- src/common.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common.rs b/src/common.rs index 2484437..77e7d82 100644 --- a/src/common.rs +++ b/src/common.rs @@ -28,7 +28,8 @@ impl<'a> AsciiStr<'a> { #[inline] pub unsafe fn step_by(&mut self, n: usize) -> &mut Self { debug_assert!( - n < isize::MAX as usize && n as isize <= self.len(), + // FIXME: remove when we drop support for < 1.43.0 + n < isize::max_value() as usize && n as isize <= self.len(), "buffer overflow: stepping by greater than our buffer length." ); // SAFETY: Safe if `n <= self.len()` From a6ef8058c2dbff0d1654ad9f9d1e33b07cad03cb Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 11 Dec 2024 12:37:39 -0600 Subject: [PATCH 20/20] Update benchmarks and descriptions. Related to #16. --- README.md | 79 +++++++++++----------------------- extras/simple-bench/Cargo.toml | 4 +- extras/simple-bench/README.md | 2 +- 3 files changed, 29 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 635bab5..4160045 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -fast-float2 -=========== +# fast-float2 [![Build](https://github.com/Alexhuszagh/fast-float-rust/workflows/CI/badge.svg)](https://github.com/Alexhuszagh/fast-float-rust/actions?query=branch%3Amaster) [![Latest Version](https://img.shields.io/crates/v/fast-float2.svg)](https://crates.io/crates/fast-float2) @@ -90,9 +89,6 @@ below (the only exception being the original fast_float C++ library, of course which is within noise bounds of this crate). On modern machines like Apple M1, parsing throughput can reach up to 1.5 GB/s. -In particular, it is faster than Rust standard library's `FromStr::from_str()` by a factor of 2-8x -(larger factor for longer float strings), and is typically 2-3x faster than the nearest competitors. - While various details regarding the algorithm can be found in the repository for the original C++ library, here are few brief notes: @@ -109,54 +105,31 @@ C++ library, here are few brief notes: ## Benchmarks Below are tables of best timings in nanoseconds for parsing a single number -into a 64-bit float. - -#### Intel i7-4771 - -Intel i7-4771 3.5GHz, macOS, Rust 1.49. - -| | `canada` | `mesh` | `uniform` | `iidi` | `iei` | `rec32` | -| ---------------- | -------- | -------- | --------- | ------ | ------ | ------- | -| fast-float | 21.58 | 10.70 | 19.36 | 40.50 | 26.07 | 29.13 | -| lexical | 65.90 | 23.28 | 54.75 | 75.80 | 52.18 | 75.36 | -| from_str | 174.43 | 22.30 | 99.93 | 227.76 | 111.31 | 204.46 | -| fast_float (C++) | 22.78 | 10.99 | 20.05 | 41.12 | 27.51 | 30.85 | -| abseil (C++) | 42.66 | 32.88 | 46.01 | 50.83 | 46.33 | 49.95 | -| netlib (C) | 57.53 | 24.86 | 64.72 | 56.63 | 36.20 | 67.29 | -| strtod (C) | 286.10 | 31.15 | 258.73 | 295.73 | 205.72 | 315.95 | - -#### Apple M1 - -Apple M1, macOS, Rust 1.49. - -| | `canada` | `mesh` | `uniform` | `iidi` | `iei` | `rec32` | -| ---------------- | -------- | -------- | --------- | ------ | ------ | ------- | -| fast-float | 14.84 | 5.98 | 11.24 | 33.24 | 21.30 | 17.86 | -| lexical | 47.09 | 16.51 | 43.46 | 56.06 | 36.68 | 55.48 | -| from_str | 136.00 | 13.84 | 74.64 | 179.87 | 77.91 | 154.53 | -| fast_float (C++) | 13.71 | 7.28 | 11.71 | 32.94 | 20.64 | 18.30 | -| abseil (C++) | 36.55 | 24.20 | 38.48 | 40.86 | 35.46 | 40.09 | -| netlib (C) | 47.19 | 14.12 | 48.85 | 52.28 | 33.70 | 48.79 | -| strtod (C) | 176.13 | 21.48 | 165.43 | 187.98 | 132.19 | 190.63 | - -#### AMD Rome - -AMD Rome, Linux, Rust 1.49. - -| | `canada` | `mesh` | `uniform` | `iidi` | `iei` | `rec32` | -| ---------------- | -------- | -------- | --------- | ------ | ------ | ------- | -| fast-float | 25.90 | 12.12 | 20.54 | 47.01 | 29.23 | 32.36 | -| lexical | 63.18 | 22.13 | 54.78 | 81.23 | 55.06 | 79.14 | -| from_str | 190.06 | 26.10 | 102.44 | 239.87 | 119.04 | 211.73 | -| fast_float (C++) | 21.29 | 10.47 | 18.31 | 42.33 | 24.56 | 29.76 | -| abseil (C++) | 44.54 | 34.13 | 47.38 | 52.64 | 43.77 | 53.03 | -| netlib (C) | 69.43 | 23.31 | 79.98 | 72.17 | 35.81 | 86.91 | -| strtod (C) | 123.37 | 65.68 | 101.58 | 118.36 | 118.61 | 123.72 | +into a 64-bit float (using the median score). + +### Intel i7-14700K + +Intel i7-14700K 3.40GHz, Linux (WSL2), Rust 1.81. + +| | `canada` | `mesh` | `uniform` | `bi` | `iei` | `rec32` | +| ---------------------- | -------- | -------- | --------- | ----- | ------ | ------- | +| fast-float2 | 9.98 | 5.56 | 10.08 | 56.19 | 14.52 | 15.09 | +| fast-float | 9.77 | 5.04 | 9.05 | 57.52 | 14.40 | 14.23 | +| lexical | 10.62 | 4.93 | 9.92 | 26.40 | 12.43 | 14.40 | +| from_str | 11.59 | 5.92 | 11.23 | 35.92 | 14.75 | 16.76 | +| fast_float (C++) | 12.58 | 6.35 | 11.86 | 31.55 | 12.22 | 11.97 | +| abseil (C++) | 25.32 | 15.70 | 25.88 | 43.42 | 23.54 | 26.75 | +| netlib (C) | 35.10 | 10.22 | 37.72 | 68.63 | 23.07 | 38.23 | +| strtod (C) | 52.63 | 26.47 | 46.51 | 88.11 | 33.37 | 53.36 | +| doubleconversion (C++) | 32.50 | 14.69 | 47.80 | 70.01 | 205.72 | 45.66 | + +Note that the random number generation seems to differ between C/C++ and Rust, since the Rust implementations are slightly faster for pre-determined datasets like `canada` and `mesh`, but equivalent random number generators are slightly slower. Any performance penalty with `fast-float2` occurred due to fixing the UB in [check_len](https://github.com/aldanor/fast-float-rust/issues/28). The massive performance differences between `fast-float` (Rust) and `fast_float` (C++) are expected due to a faster fallback algorithms ([#96](https://github.com/fastfloat/fast_float/pull/96) and [#104](https://github.com/fastfloat/fast_float/pull/104)) used in these cases. #### Parsers -- `fast-float` - this very crate -- `lexical` – `lexical_core`, v0.7 (non-lossy; same performance as lossy) +- `fast-float2` - this very crate +- `fast-float` - the pre-ported variant +- `lexical` – `lexical_core`, v1.0.05 - `from_str` – Rust standard library, `FromStr` trait - `fast_float (C++)` – original C++ implementation of 'fast-float' method - `abseil (C++)` – Abseil C++ Common Libraries @@ -168,9 +141,9 @@ AMD Rome, Linux, Rust 1.49. - `canada` – numbers in `canada.txt` file - `mesh` – numbers in `mesh.txt` file - `uniform` – uniform random numbers from 0 to 1 -- `iidi` – random numbers of format `%d%d.%d` -- `iei` – random numbers of format `%de%d` -- `rec32` – reciprocals of random 32-bit integers +- `bi` – large, integer-only floats +- `rec32` – reciprocals of random 32-bit integers #### Notes diff --git a/extras/simple-bench/Cargo.toml b/extras/simple-bench/Cargo.toml index 0f82574..db3aec4 100644 --- a/extras/simple-bench/Cargo.toml +++ b/extras/simple-bench/Cargo.toml @@ -11,7 +11,7 @@ publish = false fast-float2 = { path = "../.." } structopt = "0.3" anyhow = "1.0" -lexical = "7.0.2" -lexical-core = "1.0.2" +lexical = "7.0.4" +lexical-core = "1.0.5" fastrand = "2.1.1" fast-float = "0.2" diff --git a/extras/simple-bench/README.md b/extras/simple-bench/README.md index fa6a3b8..0d9e95b 100644 --- a/extras/simple-bench/README.md +++ b/extras/simple-bench/README.md @@ -4,7 +4,7 @@ This crate provides a utility for benchmarking the `fast-float2` crate against To run a file-based test: ```sh -cargo run --release -- file ext/canada.txt +cargo run --release -- file ext/data/canada.txt ``` There's two files used in benchmarking of the original fast_float C++ library