diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3d7eb0e..f03cf60 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,8 +1,7 @@ name: CI on: - push: - pull_request: + [push, pull_request, workflow_dispatch] jobs: test: @@ -11,7 +10,7 @@ jobs: strategy: fail-fast: false matrix: - rust: [1.37.0, stable, nightly] + rust: [1.63.0, stable, nightly] steps: - uses: actions/checkout@v2 with: @@ -23,6 +22,23 @@ jobs: - run: cargo test - run: cd extras/data-tests && cargo run --release + msrv: + name: Rust ${{matrix.rust}} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + rust: [1.37.0] + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{matrix.rust}} + - run: cargo check + - run: cargo build + cross: name: Rust ${{matrix.target}} runs-on: ubuntu-latest diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 0000000..4825e6a --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,25 @@ +name: Fuzz + +on: + [pull_request, workflow_dispatch] + +jobs: + fuzz: + name: Fuzz ${{matrix.target}} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + target: [fast_float, roundtrip_f64] + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + - run: cargo install cargo-fuzz + - run: cargo check + - run: | + cd fuzz + cargo +nightly fuzz run --release ${{matrix.target}} -- -max_total_time=300 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..0da680d --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,21 @@ +name: Lint + +on: + [push, pull_request, workflow_dispatch] + +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + components: rustfmt, clippy + - run: cargo check + - run: cargo fmt -- --check + - run: RUSTFLAGS="--deny warnings" cargo build + - run: cargo clippy --all-features -- --deny warnings diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml new file mode 100644 index 0000000..112f661 --- /dev/null +++ b/.github/workflows/miri.yml @@ -0,0 +1,21 @@ +name: Miri + +on: + [pull_request, workflow_dispatch] + +jobs: + miri: + name: Miri + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: dtolnay/rust-toolchain@stable + with: + toolchain: nightly + - run: cargo check + - run: cargo build + - run: | + rustup component add --toolchain nightly miri + cargo miri test diff --git a/CHANGELOG.md b/CHANGELOG.md index c8d0e85..4f415ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +## 0.2.2 + +- Fix `no_std` support. +- Remove most uses of unsafe. +- Remove non-local safety invariants to prevent unsoundness. + +## 0.2.1 + +- Fix undefined behavior in checking the buffer length. + ## 0.2.0 - Fixed an edge case where long decimals with trailing zeros were truncated. diff --git a/Cargo.toml b/Cargo.toml index 5da5c9d..14198ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,9 @@ [package] -name = "fast-float" -version = "0.2.0" -authors = ["Ivan Smirnov "] -repository = "https://github.com/aldanor/fast-float-rust" -documentation = "https://docs.rs/fast-float" +name = "fast-float2" +version = "0.2.3" +authors = ["Ivan Smirnov ", "Alex Huszagh "] +repository = "https://github.com/Alexhuszagh/fast-float-rust" +documentation = "https://docs.rs/fast-float2" description = "Fast floating-point number parser." keywords = ["parser", "parsing", "parse", "float", "no-std"] categories = ["parser-implementations", "parsing", "text-processing", "algorithms", "no-std"] @@ -12,17 +12,19 @@ license = "MIT OR Apache-2.0" autobenches = false edition = "2018" exclude = ["benches/*", "extras/*"] +# FIXME: rust-version is not supported until 1.56.0. +rust-version = "1.37" [features] default = ["std"] std = [] [dev-dependencies] -lexical-core = "0.7" -hexf-parse = "0.1" +lexical-core = "1.0.2" +hexf-parse = "0.2.1" ryu = "1.0" -fastrand = "1.4" -num-bigint = "0.3" +fastrand = "2.1.1" +num-bigint = "0.4.6" [workspace] members = [".", "extras/data-tests", "extras/simple-bench"] diff --git a/README.md b/README.md index fde2755..4160045 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,8 @@ -fast-float -========== +# fast-float2 -[![Build](https://github.com/aldanor/fast-float-rust/workflows/CI/badge.svg)](https://github.com/aldanor/fast-float-rust/actions?query=branch%3Amaster) -[![Latest Version](https://img.shields.io/crates/v/fast-float.svg)](https://crates.io/crates/fast-float) -[![Documentation](https://docs.rs/fast-float/badge.svg)](https://docs.rs/fast-float) +[![Build](https://github.com/Alexhuszagh/fast-float-rust/workflows/CI/badge.svg)](https://github.com/Alexhuszagh/fast-float-rust/actions?query=branch%3Amaster) +[![Latest Version](https://img.shields.io/crates/v/fast-float2.svg)](https://crates.io/crates/fast-float2) +[![Documentation](https://docs.rs/fast-float2/badge.svg)](https://docs.rs/fast-float2) [![Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![Rustc 1.37+](https://img.shields.io/badge/rustc-1.37+-lightgray.svg)](https://blog.rust-lang.org/2019/08/15/Rust-1.37.0.html) @@ -12,19 +11,21 @@ This crate provides a super-fast decimal number parser from strings into floats. ```toml [dependencies] -fast-float = "0.2" +fast-float2 = "0.2.3" ``` There are no dependencies and the crate can be used in a no_std context by disabling the "std" feature. *Compiler support: rustc 1.37+.* +This crate is in maintenance mode for bug fixes (especially security patches): minimal feature enhancements will be accepted. This implementation has been adopted by the Rust standard library: if you do not need parsing directly from bytes and/or partial parsers, you should use [FromStr](https://doc.rust-lang.org/std/str/trait.FromStr.html) for [f32](https://doc.rust-lang.org/std/primitive.f32.html) or [f64](https://doc.rust-lang.org/std/primitive.f64.html) instead. + ## Usage -There's two top-level functions provided: -[`parse()`](https://docs.rs/fast-float/latest/fast_float/fn.parse.html) and +There's two top-level functions provided: +[`parse()`](https://docs.rs/fast-float/latest/fast_float/fn.parse.html) and [`parse_partial()`](https://docs.rs/fast-float/latest/fast_float/fn.parse_partial.html), both taking -either a string or a bytes slice and parsing the input into either `f32` or `f64`: +either a string or a bytes slice and parsing the input into either `f32` or `f64`: - `parse()` treats the whole string as a decimal number and returns an error if there are invalid characters or if the string is empty. @@ -39,12 +40,12 @@ Example: ```rust // Parse the entire string as a decimal number. let s = "1.23e-02"; -let x: f32 = fast_float::parse(s).unwrap(); +let x: f32 = fast_float2::parse(s).unwrap(); assert_eq!(x, 0.0123); // Parse as many characters as possible as a decimal number. let s = "1.23e-02foo"; -let (x, n) = fast_float::parse_partial::(s).unwrap(); +let (x, n) = fast_float2::parse_partial::(s).unwrap(); assert_eq!(x, 0.0123); assert_eq!(n, 8); assert_eq!(&s[n..], "foo"); @@ -53,19 +54,22 @@ assert_eq!(&s[n..], "foo"); ## Details This crate is a direct port of Daniel Lemire's [`fast_float`](https://github.com/fastfloat/fast_float) -C++ library (valuable discussions with Daniel while porting it helped shape the crate and get it to +C++ library (valuable discussions with Daniel while porting it helped shape the crate and get it to the performance level it's at now), with some Rust-specific tweaks. Please see the original repository for many useful details regarding the algorithm and the implementation. -The parser is locale-independent. The resulting value is the closest floating-point values (using either -`f32` or `f64`), using the "round to even" convention for values that would otherwise fall right in-between -two values. That is, we provide exact parsing according to the IEEE standard. +The parser is locale-independent. The resulting value is the closest floating-point values (using either +`f32` or `f64`), using the "round to even" convention for values that would otherwise fall right in-between +two values. That is, we provide exact parsing according to the IEEE standard. Infinity and NaN values can be parsed, along with scientific notation. Both little-endian and big-endian platforms are equally supported, with extra optimizations enabled on little-endian architectures. +Since [fast-float-rust](https://github.com/aldanor/fast-float-rust) is unmaintained, this is a fork +containing the patches and security updates. + ## Testing There are a few ways this crate is tested: @@ -80,14 +84,11 @@ There are a few ways this crate is tested: ## Performance The presented parser seems to beat all of the existing C/C++/Rust float parsers known to us at the -moment by a large margin, in all of the datasets we tested it on so far – see detailed benchmarks +moment by a large margin, in all of the datasets we tested it on so far – see detailed benchmarks below (the only exception being the original fast_float C++ library, of course – performance of which is within noise bounds of this crate). On modern machines like Apple M1, parsing throughput can reach up to 1.5 GB/s. -In particular, it is faster than Rust standard library's `FromStr::from_str()` by a factor of 2-8x -(larger factor for longer float strings), and is typically 2-3x faster than the nearest competitors. - While various details regarding the algorithm can be found in the repository for the original C++ library, here are few brief notes: @@ -103,55 +104,32 @@ C++ library, here are few brief notes: ## Benchmarks -Below are tables of best timings in nanoseconds for parsing a single number -into a 64-bit float. - -#### Intel i7-4771 - -Intel i7-4771 3.5GHz, macOS, Rust 1.49. - -| | `canada` | `mesh` | `uniform` | `iidi` | `iei` | `rec32` | -| ---------------- | -------- | -------- | --------- | ------ | ------ | ------- | -| fast-float | 21.58 | 10.70 | 19.36 | 40.50 | 26.07 | 29.13 | -| lexical | 65.90 | 23.28 | 54.75 | 75.80 | 52.18 | 75.36 | -| from_str | 174.43 | 22.30 | 99.93 | 227.76 | 111.31 | 204.46 | -| fast_float (C++) | 22.78 | 10.99 | 20.05 | 41.12 | 27.51 | 30.85 | -| abseil (C++) | 42.66 | 32.88 | 46.01 | 50.83 | 46.33 | 49.95 | -| netlib (C) | 57.53 | 24.86 | 64.72 | 56.63 | 36.20 | 67.29 | -| strtod (C) | 286.10 | 31.15 | 258.73 | 295.73 | 205.72 | 315.95 | - -#### Apple M1 - -Apple M1, macOS, Rust 1.49. +Below are tables of best timings in nanoseconds for parsing a single number +into a 64-bit float (using the median score). -| | `canada` | `mesh` | `uniform` | `iidi` | `iei` | `rec32` | -| ---------------- | -------- | -------- | --------- | ------ | ------ | ------- | -| fast-float | 14.84 | 5.98 | 11.24 | 33.24 | 21.30 | 17.86 | -| lexical | 47.09 | 16.51 | 43.46 | 56.06 | 36.68 | 55.48 | -| from_str | 136.00 | 13.84 | 74.64 | 179.87 | 77.91 | 154.53 | -| fast_float (C++) | 13.71 | 7.28 | 11.71 | 32.94 | 20.64 | 18.30 | -| abseil (C++) | 36.55 | 24.20 | 38.48 | 40.86 | 35.46 | 40.09 | -| netlib (C) | 47.19 | 14.12 | 48.85 | 52.28 | 33.70 | 48.79 | -| strtod (C) | 176.13 | 21.48 | 165.43 | 187.98 | 132.19 | 190.63 | +### Intel i7-14700K -#### AMD Rome +Intel i7-14700K 3.40GHz, Linux (WSL2), Rust 1.81. -AMD Rome, Linux, Rust 1.49. +| | `canada` | `mesh` | `uniform` | `bi` | `iei` | `rec32` | +| ---------------------- | -------- | -------- | --------- | ----- | ------ | ------- | +| fast-float2 | 9.98 | 5.56 | 10.08 | 56.19 | 14.52 | 15.09 | +| fast-float | 9.77 | 5.04 | 9.05 | 57.52 | 14.40 | 14.23 | +| lexical | 10.62 | 4.93 | 9.92 | 26.40 | 12.43 | 14.40 | +| from_str | 11.59 | 5.92 | 11.23 | 35.92 | 14.75 | 16.76 | +| fast_float (C++) | 12.58 | 6.35 | 11.86 | 31.55 | 12.22 | 11.97 | +| abseil (C++) | 25.32 | 15.70 | 25.88 | 43.42 | 23.54 | 26.75 | +| netlib (C) | 35.10 | 10.22 | 37.72 | 68.63 | 23.07 | 38.23 | +| strtod (C) | 52.63 | 26.47 | 46.51 | 88.11 | 33.37 | 53.36 | +| doubleconversion (C++) | 32.50 | 14.69 | 47.80 | 70.01 | 205.72 | 45.66 | -| | `canada` | `mesh` | `uniform` | `iidi` | `iei` | `rec32` | -| ---------------- | -------- | -------- | --------- | ------ | ------ | ------- | -| fast-float | 25.90 | 12.12 | 20.54 | 47.01 | 29.23 | 32.36 | -| lexical | 63.18 | 22.13 | 54.78 | 81.23 | 55.06 | 79.14 | -| from_str | 190.06 | 26.10 | 102.44 | 239.87 | 119.04 | 211.73 | -| fast_float (C++) | 21.29 | 10.47 | 18.31 | 42.33 | 24.56 | 29.76 | -| abseil (C++) | 44.54 | 34.13 | 47.38 | 52.64 | 43.77 | 53.03 | -| netlib (C) | 69.43 | 23.31 | 79.98 | 72.17 | 35.81 | 86.91 | -| strtod (C) | 123.37 | 65.68 | 101.58 | 118.36 | 118.61 | 123.72 | +Note that the random number generation seems to differ between C/C++ and Rust, since the Rust implementations are slightly faster for pre-determined datasets like `canada` and `mesh`, but equivalent random number generators are slightly slower. Any performance penalty with `fast-float2` occurred due to fixing the UB in [check_len](https://github.com/aldanor/fast-float-rust/issues/28). The massive performance differences between `fast-float` (Rust) and `fast_float` (C++) are expected due to a faster fallback algorithms ([#96](https://github.com/fastfloat/fast_float/pull/96) and [#104](https://github.com/fastfloat/fast_float/pull/104)) used in these cases. #### Parsers -- `fast-float` - this very crate -- `lexical` – `lexical_core`, v0.7 (non-lossy; same performance as lossy) +- `fast-float2` - this very crate +- `fast-float` - the pre-ported variant +- `lexical` – `lexical_core`, v1.0.05 - `from_str` – Rust standard library, `FromStr` trait - `fast_float (C++)` – original C++ implementation of 'fast-float' method - `abseil (C++)` – Abseil C++ Common Libraries @@ -163,18 +141,18 @@ AMD Rome, Linux, Rust 1.49. - `canada` – numbers in `canada.txt` file - `mesh` – numbers in `mesh.txt` file - `uniform` – uniform random numbers from 0 to 1 -- `iidi` – random numbers of format `%d%d.%d` -- `iei` – random numbers of format `%de%d` -- `rec32` – reciprocals of random 32-bit integers +- `bi` – large, integer-only floats +- `rec32` – reciprocals of random 32-bit integers #### Notes -- The two test files referred above can be found in +- The two test files referred above can be found in [this](https://github.com/lemire/simple_fastfloat_benchmark) repository. - The Rust part of the table (along with a few other benchmarks) can be generated via the benchmark tool that can be found under `extras/simple-bench` of this repo. - The C/C++ part of the table (along with a few other benchmarks and parsers) can be - generated via a C++ utility that can be found in + generated via a C++ utility that can be found in [this](https://github.com/lemire/simple_fastfloat_benchmark) repository.
diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..ba992a3 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,3 @@ +# Security Policy + +This crate is in maintenance mode, so only the latest version is supported and will be receiving bug fixes. If you have a security vulnerability, please reach out to me privately at [ahuszagh@gmail.com](mailto:ahuszagh@gmail.com). Other forms of communication may not reach me. diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 0000000..8bd81b7 --- /dev/null +++ b/clippy.toml @@ -0,0 +1,20 @@ +avoid-breaking-exported-api = false +disallowed-macros = [ + # Can also use an inline table with a `path` key. + { path = "std::print", reason = "no IO allowed" }, + { path = "std::println", reason = "no IO allowed" }, + { path = "std::format", reason = "no string allocation allowed" }, + { path = "std::debug", reason = "debugging macros should not be present in any release" }, + # NOTE: unimplemented is fine because this can be for intentionally disabled methods + { path = "std::todo", reason = "should never have TODO macros in releases" }, +] +disallowed-methods = [ + { path = "std::io::stdout", reason = "no IO allowed" }, + { path = "std::io::stdin", reason = "no IO allowed" }, + { path = "std::io::stderr", reason = "no IO allowed" }, +] +disallowed-types = [ + { path = "std::io::File", reason = "no IO allowed" }, + { path = "std::io::BufReader", reason = "need our own abstractions for reading/writing" }, + { path = "std::io::BufWriter", reason = "need our own abstractions for reading/writing" }, +] diff --git a/extras/data-tests/Cargo.toml b/extras/data-tests/Cargo.toml index e04d88f..5cb96b5 100644 --- a/extras/data-tests/Cargo.toml +++ b/extras/data-tests/Cargo.toml @@ -8,4 +8,4 @@ license = "MIT OR Apache-2.0" publish = false [dependencies] -fast-float = { path = "../.." } +fast-float2 = { path = "../.." } diff --git a/extras/data-tests/src/main.rs b/extras/data-tests/src/main.rs index 7487b0c..5acf599 100644 --- a/extras/data-tests/src/main.rs +++ b/extras/data-tests/src/main.rs @@ -22,7 +22,7 @@ impl TestCase { } } - fn execute_one(&self, expected: F) { + fn execute_one(&self, expected: F) { let r = F::parse_float_partial(&self.string); if !r.is_ok() { dbg!(self); @@ -31,10 +31,7 @@ impl TestCase { let (value, len) = r.unwrap(); if len != self.string.len() || value != expected { if len != self.string.len() { - eprintln!( - "Expected empty string remainder, got: {:?}", - self.string.len() - len - ); + eprintln!("Expected empty string remainder, got: {:?}", self.string.len() - len); } if value != expected { eprintln!("Expected output {}, got {}", expected, value); @@ -51,10 +48,7 @@ impl TestCase { fn parse_test_file(filename: impl AsRef) -> impl Iterator { let file = File::open(filename).unwrap(); - BufReader::new(file) - .lines() - .map(Result::unwrap) - .map(TestCase::parse) + BufReader::new(file).lines().map(Result::unwrap).map(TestCase::parse) } fn run_test_cases(filename: impl AsRef) -> usize { diff --git a/extras/simple-bench/Cargo.toml b/extras/simple-bench/Cargo.toml index 5cd59c8..db3aec4 100644 --- a/extras/simple-bench/Cargo.toml +++ b/extras/simple-bench/Cargo.toml @@ -8,9 +8,10 @@ license = "MIT OR Apache-2.0" publish = false [dependencies] -fast-float = { path = "../.." } +fast-float2 = { path = "../.." } structopt = "0.3" anyhow = "1.0" -lexical = "5.2" -lexical-core = "0.7" -fastrand = "1.4" +lexical = "7.0.4" +lexical-core = "1.0.5" +fastrand = "2.1.1" +fast-float = "0.2" diff --git a/extras/simple-bench/README.md b/extras/simple-bench/README.md index 10fdbbc..0d9e95b 100644 --- a/extras/simple-bench/README.md +++ b/extras/simple-bench/README.md @@ -1,10 +1,10 @@ -This crate provides a utility for benchmarking the `fast-float` crate against +This crate provides a utility for benchmarking the `fast-float2` crate against `lexical_core` and standard library's `FromStr`. To run a file-based test: ```sh -cargo run --release -- file ext/canada.txt +cargo run --release -- file ext/data/canada.txt ``` There's two files used in benchmarking of the original fast_float C++ library @@ -18,8 +18,8 @@ To run a randomized test: cargo run --release -- random uniform ``` -For more details and options (choosing a different random generator, storing -randomized inputs to a file, changing the number of runs, or switching between +For more details and options (choosing a different random generator, storing +randomized inputs to a file, changing the number of runs, or switching between 32-bit and 64-bit floats), refer to help: ``` diff --git a/extras/simple-bench/src/main.rs b/extras/simple-bench/src/main.rs index 9428fad..aaa2d49 100644 --- a/extras/simple-bench/src/main.rs +++ b/extras/simple-bench/src/main.rs @@ -6,20 +6,14 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use std::time::Instant; +use fast_float2::FastFloat; use fastrand::Rng; use lexical::FromLexical; -use structopt::StructOpt; - -use fast_float::FastFloat; - use random::RandomGen; +use structopt::StructOpt; #[derive(Debug, StructOpt)] -#[structopt( - name = "fast-float-simple-bench", - about = "fast-float benchmark utility", - no_version -)] +#[structopt(name = "fast-float-simple-bench", about = "fast-float benchmark utility", no_version)] struct Opt { /// Parse numbers as float32 (default is float64) #[structopt(short, long = "32")] @@ -108,6 +102,7 @@ fn run_bench T>( #[derive(Debug, Copy, Clone, Eq, PartialEq)] enum Method { FastFloat, + FastFloat2, Lexical, FromStr, } @@ -123,13 +118,14 @@ fn type_str(float32: bool) -> &'static str { impl Method { pub fn name(&self) -> &'static str { match self { + Self::FastFloat2 => "fast-float2", Self::FastFloat => "fast-float", Self::Lexical => "lexical", Self::FromStr => "from_str", } } - fn run_as( + fn run_as( &self, input: &Input, repeat: usize, @@ -137,13 +133,14 @@ impl Method { ) -> BenchResult { let data = &input.data; let times = match self { + Self::FastFloat2 => run_bench(data, repeat, |s: &str| { + fast_float2::parse_partial::(s).unwrap_or_default().0 + }), Self::FastFloat => run_bench(data, repeat, |s: &str| { fast_float::parse_partial::(s).unwrap_or_default().0 }), Self::Lexical => run_bench(data, repeat, |s: &str| { - lexical_core::parse_partial::(s.as_bytes()) - .unwrap_or_default() - .0 + lexical_core::parse_partial::(s.as_bytes()).unwrap_or_default().0 }), Self::FromStr => run_bench(data, repeat, |s: &str| s.parse::().unwrap_or_default()), }; @@ -165,7 +162,7 @@ impl Method { } pub fn all() -> &'static [Self] { - &[Method::FastFloat, Method::Lexical, Method::FromStr] + &[Method::FastFloat2, Method::FastFloat, Method::Lexical, Method::FromStr] } } @@ -175,12 +172,8 @@ fn print_report(results: &[BenchResult], title: &str) { println!("| {:^width$} |", title, width = width); println!("|{:=>(); + let mut metrics = res.times.iter().map(|&t| transform(t, n, b)).collect::>(); metrics.sort_by(|a, b| a.partial_cmp(b).unwrap()); for &(_, idx) in columns { print!("{:>w$.2}", metrics[idx], w = w); @@ -235,23 +224,23 @@ struct Input { impl Input { pub fn from_file(filename: impl AsRef) -> Self { let filename = filename.as_ref(); - let data = fs::read_to_string(&filename) - .unwrap() - .trim() - .lines() - .map(String::from) - .collect(); + let data = + fs::read_to_string(&filename).unwrap().trim().lines().map(String::from).collect(); let name = filename.file_name().unwrap().to_str().unwrap().into(); - Self { data, name } + Self { + data, + name, + } } pub fn from_random(gen: RandomGen, count: usize, seed: u64) -> Self { let mut rng = Rng::with_seed(seed); - let data = iter::repeat_with(|| gen.gen(&mut rng)) - .take(count) - .collect(); + let data = iter::repeat_with(|| gen.gen(&mut rng)).take(count).collect(); let name = format!("{}", gen); - Self { data, name } + Self { + data, + name, + } } pub fn count(&self) -> usize { @@ -276,14 +265,16 @@ impl Input { fn main() { let opt: Opt = StructOpt::from_args(); - let methods = if !opt.only_fast_float && !matches!(&opt.command, &Cmd::All {..}) { + let methods = if !opt.only_fast_float && !matches!(&opt.command, &Cmd::All { .. }) { Method::all().into() } else { - vec![Method::FastFloat] + vec![Method::FastFloat2] }; let inputs = match opt.command { - Cmd::File { filename } => vec![Input::from_file(filename)], + Cmd::File { + filename, + } => vec![Input::from_file(filename)], Cmd::Random { gen, count, @@ -295,8 +286,11 @@ fn main() { fs::write(filename, input.data.join("\n")).unwrap(); } vec![input] - } - Cmd::All { count, seed } => { + }, + Cmd::All { + count, + seed, + } => { let mut inputs = vec![]; let data_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("ext/data"); inputs.push(Input::from_file(data_dir.join("mesh.txt"))); @@ -305,7 +299,7 @@ fn main() { inputs.push(Input::from_random(gen, count, seed)) } inputs - } + }, }; let mut results = vec![]; diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 426cf5e..c019851 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -12,8 +12,8 @@ cargo-fuzz = true members = ["."] [dependencies] -fast-float = { path = ".." } -libfuzzer-sys = "0.3" +fast-float2 = { path = ".." } +libfuzzer-sys = "0.4.7" ryu = "1.0" [[bin]] diff --git a/fuzz/fuzz_targets/fast_float.rs b/fuzz/fuzz_targets/fast_float.rs index 733c5f8..581e5d7 100644 --- a/fuzz/fuzz_targets/fast_float.rs +++ b/fuzz/fuzz_targets/fast_float.rs @@ -11,6 +11,6 @@ fn black_box(dummy: T) -> T { } fuzz_target!(|data: &[u8]| { - let _ = black_box(::fast_float::parse::(data)); - let _ = black_box(::fast_float::parse::(data)); + let _ = black_box(::fast_float2::parse::(data)); + let _ = black_box(::fast_float2::parse::(data)); }); diff --git a/fuzz/fuzz_targets/roundtrip_f64.rs b/fuzz/fuzz_targets/roundtrip_f64.rs index 4c92e9f..bb3506c 100644 --- a/fuzz/fuzz_targets/roundtrip_f64.rs +++ b/fuzz/fuzz_targets/roundtrip_f64.rs @@ -5,7 +5,7 @@ use libfuzzer_sys::fuzz_target; // is small enough that we can test it exhaustively fn check_roundtrip(float: f64, string: impl AsRef) { - let result = ::fast_float::parse::(string.as_ref()).unwrap(); + let result = ::fast_float2::parse::(string.as_ref()).unwrap(); if float.is_nan() { assert!(result.is_nan()); } else { diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..164014c --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,19 @@ +# Requires nightly to do proper formatting. +use_small_heuristics = "Off" +use_field_init_shorthand = true +trailing_semicolon = true +newline_style = "Unix" +match_block_trailing_comma = true +empty_item_single_line = false +enum_discrim_align_threshold = 40 +fn_params_layout = "Tall" +fn_single_line = false +format_macro_matchers = true +format_macro_bodies = true +imports_indent = "Block" +imports_layout = "HorizontalVertical" +indent_style = "Block" +match_arm_blocks = true +overflow_delimited_expr = true +group_imports = "StdExternalCrate" +wrap_comments = true diff --git a/src/binary.rs b/src/binary.rs index 1d6eadd..88d24ec 100644 --- a/src/binary.rs +++ b/src/binary.rs @@ -17,7 +17,7 @@ pub fn compute_float(q: i64, mut w: u64) -> AdjustedMantissa { w <<= lz; let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS + 3); if lo == 0xFFFF_FFFF_FFFF_FFFF { - let inside_safe_exponent = (q >= -27) && (q <= 55); + let inside_safe_exponent = (-27..=55).contains(&q); if !inside_safe_exponent { return am_error; } @@ -33,7 +33,10 @@ pub fn compute_float(q: i64, mut w: u64) -> AdjustedMantissa { mantissa += mantissa & 1; mantissa >>= 1; power2 = (mantissa >= (1_u64 << F::MANTISSA_EXPLICIT_BITS)) as i32; - return AdjustedMantissa { mantissa, power2 }; + return AdjustedMantissa { + mantissa, + power2, + }; } if lo <= 1 && q >= F::MIN_EXPONENT_ROUND_TO_EVEN as i64 @@ -53,7 +56,10 @@ pub fn compute_float(q: i64, mut w: u64) -> AdjustedMantissa { if power2 >= F::INFINITE_POWER { return am_inf; } - AdjustedMantissa { mantissa, power2 } + AdjustedMantissa { + mantissa, + power2, + } } #[inline] @@ -67,9 +73,10 @@ fn full_multiplication(a: u64, b: u64) -> (u64, u64) { (r as u64, (r >> 64) as u64) } -// This will compute or rather approximate w * 5**q and return a pair of 64-bit words -// approximating the result, with the "high" part corresponding to the most significant -// bits and the low part corresponding to the least significant bits. +// This will compute or rather approximate w * 5**q and return a pair of 64-bit +// words approximating the result, with the "high" part corresponding to the +// most significant bits and the low part corresponding to the least significant +// bits. #[inline] fn compute_product_approx(q: i64, w: u64, precision: usize) -> (u64, u64) { debug_assert!(q >= SMALLEST_POWER_OF_FIVE as i64); @@ -82,7 +89,12 @@ fn compute_product_approx(q: i64, w: u64, precision: usize) -> (u64, u64) { 0xFFFF_FFFF_FFFF_FFFF_u64 }; let index = (q - SMALLEST_POWER_OF_FIVE as i64) as usize; - let (lo5, hi5) = unsafe { *POWER_OF_FIVE_128.get_unchecked(index) }; + // NOTE: this cannot be ellided by the compiler, but the proof the index + // must be within the bounds is non-trivial, especially because this + // comes from a parsed result. Since this is unlikely to have any major + // performance implications, as is determined empirically, we keep the + // bounds check despite the performance hit. + let (lo5, hi5) = POWER_OF_FIVE_128[index]; let (mut first_lo, mut first_hi) = full_multiplication(w, lo5); if first_hi & mask == mask { let (_, second_hi) = full_multiplication(w, hi5); diff --git a/src/common.rs b/src/common.rs index e46ded0..77e7d82 100644 --- a/src/common.rs +++ b/src/common.rs @@ -14,19 +14,47 @@ impl<'a> AsciiStr<'a> { Self { ptr: s.as_ptr(), end: unsafe { s.as_ptr().add(s.len()) }, - _marker: PhantomData::default(), + _marker: PhantomData, } } + pub fn len(&self) -> isize { + self.end as isize - self.ptr as isize + } + + /// # Safety + /// + /// Safe if `n <= self.len()` #[inline] - pub fn step_by(&mut self, n: usize) -> &mut Self { + pub unsafe fn step_by(&mut self, n: usize) -> &mut Self { + debug_assert!( + // FIXME: remove when we drop support for < 1.43.0 + n < isize::max_value() as usize && n as isize <= self.len(), + "buffer overflow: stepping by greater than our buffer length." + ); + // SAFETY: Safe if `n <= self.len()` unsafe { self.ptr = self.ptr.add(n) }; self } + /// # Safety + /// + /// Safe if `!self.is_empty()` #[inline] - pub fn step(&mut self) -> &mut Self { - self.step_by(1) + pub unsafe fn step(&mut self) -> &mut Self { + debug_assert!(!self.is_empty(), "buffer overflow: buffer is empty."); + // SAFETY: Safe if the buffer is not empty, that is, `self.len() >= 1` + unsafe { self.step_by(1) } + } + + #[inline] + pub fn step_if(&mut self, c: u8) -> bool { + let stepped = self.first_is(c); + if stepped { + // SAFETY: safe since we have at least 1 character in the buffer + unsafe { self.step() }; + } + stepped } #[inline] @@ -34,100 +62,117 @@ impl<'a> AsciiStr<'a> { self.ptr == self.end } + /// # Safety + /// + /// Safe if `!self.is_empty()` #[inline] - pub fn first(&self) -> u8 { + pub unsafe fn first_unchecked(&self) -> u8 { + debug_assert!(!self.is_empty(), "attempting to get first value of empty buffer."); unsafe { *self.ptr } } #[inline] - pub fn first_is(&self, c: u8) -> bool { - self.first() == c + pub fn first(&self) -> Option { + if self.is_empty() { + None + } else { + // SAFETY: safe since `!self.is_empty()` + Some(unsafe { self.first_unchecked() }) + } } #[inline] - pub fn first_either(&self, c1: u8, c2: u8) -> bool { - let c = self.first(); - c == c1 || c == c2 + pub fn first_is(&self, c: u8) -> bool { + self.first() == Some(c) } #[inline] - pub fn check_first(&self, c: u8) -> bool { - !self.is_empty() && self.first() == c + pub fn first_is2(&self, c1: u8, c2: u8) -> bool { + self.first().map_or(false, |c| c == c1 || c == c2) } #[inline] - pub fn check_first_either(&self, c1: u8, c2: u8) -> bool { - !self.is_empty() && (self.first() == c1 || self.first() == c2) + pub fn first_is_digit(&self) -> bool { + self.first().map_or(false, |c| c.is_ascii_digit()) } #[inline] - pub fn check_first_digit(&self) -> bool { - !self.is_empty() && self.first().is_ascii_digit() + pub fn first_digit(&self) -> Option { + self.first().and_then(|x| { + if x.is_ascii_digit() { + Some(x - b'0') + } else { + None + } + }) } #[inline] - pub fn parse_digits(&mut self, mut func: impl FnMut(u8)) { - while !self.is_empty() && self.first().is_ascii_digit() { - func(self.first() - b'0'); - self.step(); - } + pub fn try_read_digit(&mut self) -> Option { + let digit = self.first_digit()?; + // SAFETY: Safe since `first_digit` means the buffer is not empty + unsafe { self.step() }; + Some(digit) } #[inline] - pub fn check_len(&self, n: usize) -> bool { - let len = self.end as usize - self.ptr as usize; - n <= len + pub fn parse_digits(&mut self, mut func: impl FnMut(u8)) { + while let Some(digit) = self.try_read_digit() { + func(digit); + } } #[inline] pub fn try_read_u64(&self) -> Option { - if self.check_len(8) { - Some(self.read_u64()) + if self.len() >= 8 { + Some(unsafe { self.read_u64_unchecked() }) } else { None } } + /// # Safety + /// + /// Safe if `self.len() >= 8` #[inline] - pub fn read_u64(&self) -> u64 { - debug_assert!(self.check_len(8)); + #[allow(clippy::cast_ptr_alignment)] + pub unsafe fn read_u64_unchecked(&self) -> u64 { + debug_assert!(self.len() >= 8, "overflowing buffer: buffer is not 8 bytes long"); let src = self.ptr as *const u64; + // SAFETY: Safe if `self.len() >= 8` u64::from_le(unsafe { ptr::read_unaligned(src) }) } #[inline] pub fn offset_from(&self, other: &Self) -> isize { - isize::wrapping_sub(self.ptr as _, other.ptr as _) // assuming the same end + isize::wrapping_sub(self.ptr as isize, other.ptr as isize) // assuming the same end } } -// Most of these are inherently unsafe; we assume we know what we're calling and when. +// Most of these are inherently unsafe; we assume we know what we're calling and +// when. pub trait ByteSlice: AsRef<[u8]> + AsMut<[u8]> { - #[inline] - fn get_at(&self, i: usize) -> u8 { - unsafe { *self.as_ref().get_unchecked(i) } - } - - #[inline] - fn get_first(&self) -> u8 { - debug_assert!(!self.as_ref().is_empty()); - self.get_at(0) - } - #[inline] fn check_first(&self, c: u8) -> bool { - !self.as_ref().is_empty() && self.get_first() == c + self.as_ref().first() == Some(&c) } #[inline] fn check_first2(&self, c1: u8, c2: u8) -> bool { - !self.as_ref().is_empty() && (self.get_first() == c1 || self.get_first() == c2) + if let Some(&c) = self.as_ref().first() { + c == c1 || c == c2 + } else { + false + } } #[inline] fn eq_ignore_case(&self, u: &[u8]) -> bool { - debug_assert!(self.as_ref().len() >= u.len()); - let d = (0..u.len()).fold(0, |d, i| d | self.get_at(i) ^ u.get_at(i)); + let s = self.as_ref(); + if s.len() < u.len() { + return false; + } + let d = (0..u.len()).fold(0, |d, i| d | s[i] ^ u[i]); d == 0 || d == 32 } @@ -145,31 +190,33 @@ pub trait ByteSlice: AsRef<[u8]> + AsMut<[u8]> { s } + /// # Safety + /// + /// Safe if `self.len() >= 8`. #[inline] - fn skip_chars2(&self, c1: u8, c2: u8) -> &[u8] { - let mut s = self.as_ref(); - while !s.is_empty() && (s.get_first() == c1 || s.get_first() == c2) { - s = s.advance(1); - } - s - } - - #[inline] - fn read_u64(&self) -> u64 { + #[allow(clippy::cast_ptr_alignment)] + unsafe fn read_u64(&self) -> u64 { debug_assert!(self.as_ref().len() >= 8); let src = self.as_ref().as_ptr() as *const u64; + // SAFETY: safe if `self.len() >= 8`. u64::from_le(unsafe { ptr::read_unaligned(src) }) } + /// # Safety + /// + /// Safe if `self.len() >= 8`. #[inline] - fn write_u64(&mut self, value: u64) { + #[allow(clippy::cast_ptr_alignment)] + unsafe fn write_u64(&mut self, value: u64) { debug_assert!(self.as_ref().len() >= 8); let dst = self.as_mut().as_mut_ptr() as *mut u64; + // SAFETY: safe if `self.len() >= 8`. unsafe { ptr::write_unaligned(dst, u64::to_le(value)) }; } } -impl ByteSlice for [u8] {} +impl ByteSlice for [u8] { +} #[inline] pub fn is_8digits(v: u64) -> bool { @@ -180,8 +227,8 @@ pub fn is_8digits(v: u64) -> bool { #[inline] pub fn parse_digits(s: &mut &[u8], mut f: impl FnMut(u8)) { - while !s.is_empty() { - let c = s.get_first().wrapping_sub(b'0'); + while let Some(&ch) = s.first() { + let c = ch.wrapping_sub(b'0'); if c < 10 { f(c); *s = s.advance(1); @@ -215,14 +262,14 @@ mod tests { fn test_read_write_u64() { let bytes = b"01234567"; let string = AsciiStr::new(bytes); - let int = string.read_u64(); - assert_eq!(int, 0x3736353433323130); + let int = string.try_read_u64(); + assert_eq!(int, Some(0x3736353433323130)); - let int = bytes.read_u64(); + let int = unsafe { bytes.read_u64() }; assert_eq!(int, 0x3736353433323130); let mut slc = [0u8; 8]; - slc.write_u64(0x3736353433323130); + unsafe { slc.write_u64(0x3736353433323130) }; assert_eq!(&slc, bytes); } } diff --git a/src/decimal.rs b/src/decimal.rs index 2e7aaa0..74342f2 100644 --- a/src/decimal.rs +++ b/src/decimal.rs @@ -33,7 +33,8 @@ impl PartialEq for Decimal { } } -impl Eq for Decimal {} +impl Eq for Decimal { +} impl Default for Decimal { fn default() -> Self { @@ -86,7 +87,7 @@ impl Decimal { if dp < self.num_digits { round_up = self.digits[dp] >= 5; if self.digits[dp] == 5 && dp + 1 == self.num_digits { - round_up = self.truncated || ((dp != 0) && (1 & self.digits[dp - 1] != 0)) + round_up = self.truncated || ((dp != 0) && (1 & self.digits[dp - 1] != 0)); } } if round_up { @@ -189,9 +190,11 @@ impl Decimal { #[inline] pub fn parse_decimal(mut s: &[u8]) -> Decimal { // can't fail since it follows a call to parse_number + assert!(!s.is_empty(), "the buffer cannot be empty since it follows a call to parse_number"); let mut d = Decimal::default(); let start = s; - let c = s.get_first(); + + let c = s[0]; d.negative = c == b'-'; if c == b'-' || c == b'+' { s = s.advance(1); @@ -205,11 +208,13 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal { s = s.skip_chars(b'0'); } while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS { - let v = s.read_u64(); + // SAFETY: Safe since `s.len() >= 8` + let v = unsafe { s.read_u64() }; if !is_8digits(v) { break; } - d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030); + // SAFETY: Safe since `num_digits + 8 < Decimal::MAX_DIGITS` + unsafe { d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030) }; d.num_digits += 8; s = s.advance(8); } @@ -249,7 +254,11 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal { exp_num = 10 * exp_num + digit as i32; } }); - d.decimal_point += if neg_exp { -exp_num } else { exp_num }; + d.decimal_point += if neg_exp { + -exp_num + } else { + exp_num + }; } for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW { d.digits[i] = 0; @@ -258,6 +267,7 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal { } #[inline] +#[allow(clippy::redundant_else)] fn number_of_digits_decimal_left_shift(d: &Decimal, mut shift: usize) -> usize { const TABLE: [u16; 65] = [ 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, 0x181D, 0x2024, @@ -317,7 +327,7 @@ fn number_of_digits_decimal_left_shift(d: &Decimal, mut shift: usize) -> usize { shift &= 63; let x_a = TABLE[shift]; let x_b = TABLE[shift + 1]; - let num_new_digits = (x_a >> 11) as _; + let num_new_digits = (x_a >> 11) as usize; let pow5_a = (0x7FF & x_a) as usize; let pow5_b = (0x7FF & x_b) as usize; let pow5 = &TABLE_POW5[pow5_a..]; diff --git a/src/float.rs b/src/float.rs index b4e9c68..dee116e 100644 --- a/src/float.rs +++ b/src/float.rs @@ -45,7 +45,8 @@ pub trait Float: fn pow10_fast_path(exponent: usize) -> Self; } -impl private::Sealed for f32 {} +impl private::Sealed for f32 { +} impl Float for f32 { const INFINITY: Self = core::f32::INFINITY; @@ -67,25 +68,25 @@ impl Float for f32 { #[inline] fn from_u64(v: u64) -> Self { - v as _ + v as f32 } #[inline] fn from_u64_bits(v: u64) -> Self { - f32::from_bits((v & 0xFFFFFFFF) as u32) + f32::from_bits((v & 0xFFFF_FFFF) as u32) } #[inline] fn pow10_fast_path(exponent: usize) -> Self { #[allow(clippy::use_self)] - const TABLE: [f32; 16] = [ - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0., - ]; + const TABLE: [f32; 16] = + [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0.]; TABLE[exponent & 15] } } -impl private::Sealed for f64 {} +impl private::Sealed for f64 { +} impl Float for f64 { const INFINITY: Self = core::f64::INFINITY; @@ -107,7 +108,7 @@ impl Float for f64 { #[inline] fn from_u64(v: u64) -> Self { - v as _ + v as f64 } #[inline] diff --git a/src/lib.rs b/src/lib.rs index aef86be..d6793ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,37 +1,52 @@ -//! This crate provides a super-fast decimal number parser from strings into floats. +//! This crate provides a super-fast decimal number parser from strings into +//! floats. //! //! ## Usage //! //! There's two top-level functions provided: [`parse`](crate::parse()) and //! [`parse_partial`](crate::parse_partial()), both taking -//! either a string or a bytes slice and parsing the input into either `f32` or `f64`: +//! either a string or a bytes slice and parsing the input into either `f32` or +//! `f64`: //! -//! - [`parse`](crate::parse()) treats the whole string as a decimal number and returns an -//! error if there are invalid characters or if the string is empty. -//! - [`parse_partial`](crate::parse_partial()) tries to find the longest substring at the -//! beginning of the given input string that can be parsed as a decimal number and, -//! in the case of success, returns the parsed value along the number of characters processed; -//! an error is returned if the string doesn't start with a decimal number or if it is empty. -//! This function is most useful as a building block when constructing more complex parsers, -//! or when parsing streams of data. +//! - [`parse`](crate::parse()) treats the whole string as a decimal number and +//! returns an error if there are invalid characters or if the string is +//! empty. +//! - [`parse_partial`](crate::parse_partial()) tries to find the longest +//! substring at the beginning of the given input string that can be parsed as +//! a decimal number and, in the case of success, returns the parsed value +//! along the number of characters processed; an error is returned if the +//! string doesn't start with a decimal number or if it is empty. This +//! function is most useful as a building block when constructing more complex +//! parsers, or when parsing streams of data. //! //! ## Examples //! //! ```rust //! // Parse the entire string as a decimal number. //! let s = "1.23e-02"; -//! let x: f32 = fast_float::parse(s).unwrap(); +//! let x: f32 = fast_float2::parse(s).unwrap(); //! assert_eq!(x, 0.0123); //! //! // Parse as many characters as possible as a decimal number. //! let s = "1.23e-02foo"; -//! let (x, n) = fast_float::parse_partial::(s).unwrap(); +//! let (x, n) = fast_float2::parse_partial::(s).unwrap(); //! assert_eq!(x, 0.0123); //! assert_eq!(n, 8); //! assert_eq!(&s[n..], "foo"); //! ``` +#![cfg_attr(not(feature = "std"), no_std)] +#![allow(unused_unsafe)] +#![warn(unsafe_op_in_unsafe_fn)] #![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)] +#![deny( + clippy::doc_markdown, + clippy::unnecessary_safety_comment, + clippy::semicolon_if_nothing_returned, + clippy::unwrap_used, + clippy::as_underscore, + clippy::doc_markdown +)] #![allow( clippy::cast_possible_truncation, clippy::cast_possible_wrap, @@ -41,7 +56,8 @@ clippy::missing_const_for_fn, clippy::use_self, clippy::module_name_repetitions, - clippy::cargo_common_metadata + clippy::cargo_common_metadata, + clippy::struct_field_names )] use core::fmt::{self, Display}; @@ -67,7 +83,7 @@ impl Display for Error { #[cfg(feature = "std")] impl std::error::Error for Error { - fn description(&self) -> &str { + fn description(&self) -> &'static str { "error while parsing a float" } } @@ -94,21 +110,24 @@ pub trait FastFloat: float::Float { /// Parse a decimal number from string into float (partial). /// - /// This method parses as many characters as possible and returns the resulting number along - /// with the number of digits processed (in case of success, this number is always positive). + /// This method parses as many characters as possible and returns the + /// resulting number along with the number of digits processed (in case + /// of success, this number is always positive). /// /// # Errors /// - /// Will return an error either if the string doesn't start with a valid decimal number - /// – that is, if no zero digits were processed. + /// Will return an error either if the string doesn't start with a valid + /// decimal number – that is, if no zero digits were processed. #[inline] fn parse_float_partial>(s: S) -> Result<(Self, usize)> { parse::parse_float(s.as_ref()).ok_or(Error) } } -impl FastFloat for f32 {} -impl FastFloat for f64 {} +impl FastFloat for f32 { +} +impl FastFloat for f64 { +} /// Parse a decimal number from string into float (full). /// @@ -123,13 +142,14 @@ pub fn parse>(s: S) -> Result { /// Parse a decimal number from string into float (partial). /// -/// This function parses as many characters as possible and returns the resulting number along -/// with the number of digits processed (in case of success, this number is always positive). +/// This function parses as many characters as possible and returns the +/// resulting number along with the number of digits processed (in case of +/// success, this number is always positive). /// /// # Errors /// -/// Will return an error either if the string doesn't start with a valid decimal number -/// – that is, if no zero digits were processed. +/// Will return an error either if the string doesn't start with a valid decimal +/// number – that is, if no zero digits were processed. #[inline] pub fn parse_partial>(s: S) -> Result<(T, usize)> { T::parse_float_partial(s) diff --git a/src/number.rs b/src/number.rs index b3d95f5..a13c5be 100644 --- a/src/number.rs +++ b/src/number.rs @@ -3,6 +3,7 @@ use crate::float::Float; const MIN_19DIGIT_INT: u64 = 100_0000_0000_0000_0000; +#[allow(clippy::unreadable_literal)] pub const INT_POW10: [u64; 16] = [ 1, 10, @@ -46,9 +47,9 @@ impl Number { // normal fast path let value = F::from_u64(self.mantissa); if self.exponent < 0 { - value / F::pow10_fast_path((-self.exponent) as _) + value / F::pow10_fast_path((-self.exponent) as usize) } else { - value * F::pow10_fast_path(self.exponent as _) + value * F::pow10_fast_path(self.exponent as usize) } } else { // disguised fast path @@ -57,7 +58,7 @@ impl Number { if mantissa > F::MAX_MANTISSA_FAST_PATH { return None; } - F::from_u64(mantissa) * F::pow10_fast_path(F::MAX_EXPONENT_FAST_PATH as _) + F::from_u64(mantissa) * F::pow10_fast_path(F::MAX_EXPONENT_FAST_PATH as usize) }; if self.negative { value = -value; @@ -84,16 +85,19 @@ fn parse_8digits(mut v: u64) -> u64 { #[inline] fn try_parse_digits(s: &mut AsciiStr<'_>, x: &mut u64) { s.parse_digits(|digit| { - *x = x.wrapping_mul(10).wrapping_add(digit as _); // overflows to be handled later + // overflows to be handled later + *x = x.wrapping_mul(10).wrapping_add(digit as u64); }); } #[inline] fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) { - while *x < MIN_19DIGIT_INT && !s.is_empty() && s.first().is_ascii_digit() { - let digit = s.first() - b'0'; - *x = (*x * 10) + digit as u64; // no overflows here - s.step(); + while *x < MIN_19DIGIT_INT { + if let Some(digit) = s.try_read_digit() { + *x = (*x * 10) + digit as u64; // no overflows here + } else { + break; + } } } @@ -102,16 +106,14 @@ fn try_parse_8digits(s: &mut AsciiStr<'_>, x: &mut u64) { // may cause overflows, to be handled later if let Some(v) = s.try_read_u64() { if is_8digits(v) { - *x = x - .wrapping_mul(1_0000_0000) - .wrapping_add(parse_8digits(v)); - s.step_by(8); + *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v)); + // SAFETY: safe since there is at least 8 bytes from `try_read_u64`. + unsafe { s.step_by(8) }; if let Some(v) = s.try_read_u64() { if is_8digits(v) { - *x = x - .wrapping_mul(1_0000_0000) - .wrapping_add(parse_8digits(v)); - s.step_by(8); + *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v)); + // SAFETY: safe since there is at least 8 bytes from `try_read_u64`. + unsafe { s.step_by(8) }; } } } @@ -120,16 +122,22 @@ fn try_parse_8digits(s: &mut AsciiStr<'_>, x: &mut u64) { #[inline] fn parse_scientific(s: &mut AsciiStr<'_>) -> i64 { + if !s.first_is2(b'e', b'E') { + return 0; + } + // the first character is 'e'/'E' and scientific mode is enabled let start = *s; - s.step(); + // SAFETY: safe since there is at least 1 character which is `e` or `E` + unsafe { s.step() }; let mut exp_num = 0_i64; let mut neg_exp = false; - if !s.is_empty() && s.first_either(b'-', b'+') { + if s.first_is2(b'-', b'+') { neg_exp = s.first_is(b'-'); - s.step(); + // SAFETY: safe since there's at least 1 character in the buffer + unsafe { s.step() }; } - if s.check_first_digit() { + if s.first_is_digit() { s.parse_digits(|digit| { if exp_num < 0x10000 { exp_num = 10 * exp_num + digit as i64; // no overflows here @@ -148,22 +156,24 @@ fn parse_scientific(s: &mut AsciiStr<'_>) -> i64 { #[inline] pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { - debug_assert!(!s.is_empty()); + if s.is_empty() { + return None; + } let mut s = AsciiStr::new(s); let start = s; // handle optional +/- sign let mut negative = false; - if s.first() == b'-' { + if s.step_if(b'-') { negative = true; - if s.step().is_empty() { + if s.is_empty() { return None; } - } else if s.first() == b'+' && s.step().is_empty() { + } else if s.step_if(b'+') && s.is_empty() { return None; } - debug_assert!(!s.is_empty()); + debug_assert!(!s.is_empty(), "should not have empty buffer after sign checks"); // parse initial digits before dot let mut mantissa = 0_u64; @@ -175,8 +185,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { let mut n_after_dot = 0; let mut exponent = 0_i64; let int_end = s; - if s.check_first(b'.') { - s.step(); + if s.step_if(b'.') { let before = s; try_parse_8digits(&mut s, &mut mantissa); try_parse_digits(&mut s, &mut mantissa); @@ -190,13 +199,10 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { } // handle scientific format - let mut exp_number = 0_i64; - if s.check_first_either(b'e', b'E') { - exp_number = parse_scientific(&mut s); - exponent += exp_number; - } + let exp_number = parse_scientific(&mut s); + exponent += exp_number; - let len = s.offset_from(&start) as _; + let len = s.offset_from(&start) as usize; // handle uncommon case with many digits if n_digits <= 19 { @@ -214,9 +220,13 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { n_digits -= 19; let mut many_digits = false; let mut p = digits_start; - while p.check_first_either(b'0', b'.') { - n_digits -= p.first().saturating_sub(b'0' - 1) as isize; // '0' = b'.' + 2 - p.step(); + while p.first_is2(b'0', b'.') { + // SAFETY: safe since there's at least 1 element that is `0` or `.`. + let byte = unsafe { p.first_unchecked() }; + // '0' = b'.' + 2 + n_digits -= byte.saturating_sub(b'0' - 1) as isize; + // SAFETY: safe since there's at least 1 element from the `first_is2` check. + unsafe { p.step() }; } if n_digits > 0 { // at this point we have more than 19 significant digits, let's try again @@ -227,7 +237,10 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { exponent = if mantissa >= MIN_19DIGIT_INT { int_end.offset_from(&s) // big int } else { - s.step(); // fractional component, skip the '.' + // SAFETY: safe since `s` is at the digits start, so we have + // at least 1 digit from `ndigits > 0`. + debug_assert!(s.first_is(b'.'), "first character for the fraction must be a decimal"); + unsafe { s.step() }; // fractional component, skip the '.' let before = s; try_parse_19digits(&mut s, &mut mantissa); -s.offset_from(&before) @@ -261,14 +274,14 @@ pub fn parse_inf_nan(s: &[u8]) -> Option<(F, usize)> { } else if s.eq_ignore_case(b"inf") { return Some((F::INFINITY, parse_inf_rest(s))); } else if s.len() >= 4 { - if s.get_first() == b'+' { + if s[0] == b'+' { let s = s.advance(1); if s.eq_ignore_case(b"nan") { return Some((F::NAN, 4)); } else if s.eq_ignore_case(b"inf") { return Some((F::INFINITY, 1 + parse_inf_rest(s))); } - } else if s.get_first() == b'-' { + } else if s[0] == b'-' { let s = s.advance(1); if s.eq_ignore_case(b"nan") { return Some((F::NEG_NAN, 4)); diff --git a/src/parse.rs b/src/parse.rs index 9c592d4..fe9b09e 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -5,10 +5,6 @@ use crate::simple::parse_long_mantissa; #[inline] pub fn parse_float(s: &[u8]) -> Option<(F, usize)> { - if s.is_empty() { - return None; - } - let (num, rest) = match parse_number(s) { Some(r) => r, None => return parse_inf_nan(s), diff --git a/src/simple.rs b/src/simple.rs index cec1183..f3a724a 100644 --- a/src/simple.rs +++ b/src/simple.rs @@ -6,9 +6,8 @@ use crate::float::Float; pub fn parse_long_mantissa(s: &[u8]) -> AdjustedMantissa { const MAX_SHIFT: usize = 60; const NUM_POWERS: usize = 19; - const POWERS: [u8; 19] = [ - 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59, - ]; + const POWERS: [u8; 19] = + [0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59]; let get_shift = |n| { if n < NUM_POWERS { @@ -46,7 +45,7 @@ pub fn parse_long_mantissa(s: &[u8]) -> AdjustedMantissa { _ => 1, } } else { - get_shift((-d.decimal_point) as _) + get_shift((-d.decimal_point) as usize) }; d.left_shift(shift); if d.decimal_point > Decimal::DECIMAL_POINT_RANGE { @@ -81,5 +80,8 @@ pub fn parse_long_mantissa(s: &[u8]) -> AdjustedMantissa { power2 -= 1; } mantissa &= (1_u64 << F::MANTISSA_EXPLICIT_BITS) - 1; - AdjustedMantissa { mantissa, power2 } + AdjustedMantissa { + mantissa, + power2, + } } diff --git a/src/table.rs b/src/table.rs index ede4bae..49f4855 100644 --- a/src/table.rs +++ b/src/table.rs @@ -4,10 +4,10 @@ pub const N_POWERS_OF_FIVE: usize = (LARGEST_POWER_OF_FIVE - SMALLEST_POWER_OF_F #[cfg(test)] mod tests { - use super::*; - use num_bigint::BigUint; + use super::*; + fn compute_pow5_128(q: i32) -> (u64, u64) { let mut c = if q < 0 { let pow5 = BigUint::from(5_u8).pow((-q) as u32); @@ -15,7 +15,11 @@ mod tests { while (BigUint::from(1_u8) << z) < pow5 { z += 1; } - let b = if q < -27 { 2 * z + 128 } else { z + 127 }; + let b = if q < -27 { + 2 * z + 128 + } else { + z + 127 + }; (BigUint::from(1_u8) << b) / pow5 + BigUint::from(1_u8) } else { BigUint::from(5_u8).pow(q as u32) @@ -37,6 +41,7 @@ mod tests { } #[test] + #[cfg_attr(miri, ignore)] fn test_pow5_table() { for q in SMALLEST_POWER_OF_FIVE..=LARGEST_POWER_OF_FIVE { let (hi, lo) = compute_pow5_128(q); diff --git a/tests/test_api.rs b/tests/test_api.rs index d0e1615..ff04b74 100644 --- a/tests/test_api.rs +++ b/tests/test_api.rs @@ -1,4 +1,4 @@ -use fast_float::{parse, parse_partial, FastFloat}; +use fast_float2::{parse, parse_partial, FastFloat}; macro_rules! check_ok { ($s:expr, $x:expr) => { diff --git a/tests/test_basic.rs b/tests/test_basic.rs index 235990f..0086c97 100644 --- a/tests/test_basic.rs +++ b/tests/test_basic.rs @@ -14,17 +14,17 @@ macro_rules! check { ($ty:ident, $s:expr) => {{ check!($ty, stringify!($s), $s) }}; - ($ty:ident, $s:expr, inf) => {{ + ($ty:ident, $s:expr,inf) => {{ check!($ty, $s, core::$ty::INFINITY) }}; - ($ty:ident, $s:expr, neg_inf) => {{ + ($ty:ident, $s:expr,neg_inf) => {{ check!($ty, $s, core::$ty::NEG_INFINITY) }}; ($ty:ident, $s:expr, $e:expr) => {{ let string = String::from($s); let s = string.as_bytes(); let expected: $ty = $e; - let result = fast_float::parse::<$ty, _>(s).unwrap(); + let result = fast_float2::parse::<$ty, _>(s).unwrap(); assert_eq!(result, expected); let lex = lexical_core::parse::<$ty>(s).unwrap(); assert_eq!(result, lex); @@ -212,24 +212,12 @@ fn test_f64_general() { check_f64!("9007199254740993.0", hexf64("0x1.p+53")); check_f64!(append_zeros("9007199254740993.0", 1000), hexf64("0x1.p+53")); check_f64!("10000000000000000000", hexf64("0x1.158e460913dp+63")); - check_f64!( - "10000000000000000000000000000001000000000000", - hexf64("0x1.cb2d6f618c879p+142") - ); - check_f64!( - "10000000000000000000000000000000000000000001", - hexf64("0x1.cb2d6f618c879p+142") - ); + check_f64!("10000000000000000000000000000001000000000000", hexf64("0x1.cb2d6f618c879p+142")); + check_f64!("10000000000000000000000000000000000000000001", hexf64("0x1.cb2d6f618c879p+142")); check_f64!(1.1920928955078125e-07); check_f64!("-0", -0.0); - check_f64!( - "1.0000000000000006661338147750939242541790008544921875", - 1.0000000000000007 - ); - check_f64!( - "1090544144181609348835077142190", - hexf64("0x1.b8779f2474dfbp+99") - ); + check_f64!("1.0000000000000006661338147750939242541790008544921875", 1.0000000000000007); + check_f64!("1090544144181609348835077142190", hexf64("0x1.b8779f2474dfbp+99")); check_f64!(2.2250738585072013e-308); check_f64!(-92666518056446206563E3); check_f64!(-92666518056446206563E3); @@ -255,10 +243,7 @@ fn test_f64_general() { check_f64!(-2.1470977154320536489471030463761883783915110400000000000000000000e+45); check_f64!(-4.4900312744003159009338275160799498340862630046359789166919680000e+61); check_f64!("+1", 1.0); - check_f64!( - "1.797693134862315700000000000000001e308", - 1.7976931348623157e308 - ); + check_f64!("1.797693134862315700000000000000001e308", 1.7976931348623157e308); check_f64!("3e-324", hexf64("0x0.0000000000001p-1022")); check_f64!("1.00000006e+09", hexf64("0x1.dcd651ep+29")); check_f64!("4.9406564584124653e-324", hexf64("0x0.0000000000001p-1022")); @@ -303,18 +288,9 @@ fn test_f32_basic() { 7699724722770042717456817626953125"; check_f32!(f1, hexf32("0x1.2ced3p+0")); check_f32!(format!("{}e-38", f1), hexf32("0x1.fffff8p-127")); - check_f32!( - format!("{}e-38", append_zeros(f1, 655)), - hexf32("0x1.fffff8p-127") - ); - check_f32!( - format!("{}e-38", append_zeros(f1, 656)), - hexf32("0x1.fffff8p-127") - ); - check_f32!( - format!("{}e-38", append_zeros(f1, 1000)), - hexf32("0x1.fffff8p-127") - ); + check_f32!(format!("{}e-38", append_zeros(f1, 655)), hexf32("0x1.fffff8p-127")); + check_f32!(format!("{}e-38", append_zeros(f1, 656)), hexf32("0x1.fffff8p-127")); + check_f32!(format!("{}e-38", append_zeros(f1, 1000)), hexf32("0x1.fffff8p-127")); check_f32!(1.00000006e+09); check_f32!(1.4012984643e-45); check_f32!(1.1754942107e-38); @@ -411,7 +387,7 @@ fn test_f64_pow10() { for i in -308..=308 { let s = format!("1e{}", i); let v = f64::from_str(&s).unwrap(); - assert_eq!(fast_float::parse::(s).unwrap(), v); + assert_eq!(fast_float2::parse::(s).unwrap(), v); } } @@ -420,6 +396,6 @@ fn test_f32_pow10() { for i in -38..=38 { let s = format!("1e{}", i); let v = f32::from_str(&s).unwrap(); - assert_eq!(fast_float::parse::(s).unwrap(), v); + assert_eq!(fast_float2::parse::(s).unwrap(), v); } } diff --git a/tests/test_exhaustive.rs b/tests/test_exhaustive.rs index 308b9b4..3b77b39 100644 --- a/tests/test_exhaustive.rs +++ b/tests/test_exhaustive.rs @@ -5,7 +5,7 @@ fn test_f32_exhaustive_ryu() { for i in 0..0xFFFF_FFFF_u32 { let a: f32 = unsafe { core::mem::transmute(i) }; let s = buf.format(a); - let b: f32 = fast_float::parse(s).unwrap(); + let b: f32 = fast_float2::parse(s).unwrap(); assert!(a == b || (a.is_nan() && b.is_nan())); } } diff --git a/tests/test_random.rs b/tests/test_random.rs index 1dadd74..ce47401 100644 --- a/tests/test_random.rs +++ b/tests/test_random.rs @@ -3,13 +3,13 @@ fn test_f64_random_from_u64() { const N_ITER: u64 = 1 << 32; - let rng = fastrand::Rng::with_seed(0); + let mut rng = fastrand::Rng::with_seed(0); let mut buf = ryu::Buffer::new(); for _ in 0..N_ITER { let i: u64 = rng.u64(0..0xFFFF_FFFF_FFFF_FFFF); let a: f64 = unsafe { core::mem::transmute(i) }; let s = buf.format(a); - let b: f64 = fast_float::parse(s).unwrap(); + let b: f64 = fast_float2::parse(s).unwrap(); assert!(a == b || (a.is_nan() && b.is_nan())); } }