blob: 2019f71e69b8c635e6d91b410c2403b57c81da32 [file] [log] [blame]
//! Arbitrary-precision decimal class for fallback algorithms.
//!
//! This is only used if the fast-path (native floats) and
//! the Eisel-Lemire algorithm are unable to unambiguously
//! determine the float.
//!
//! The technique used is "Simple Decimal Conversion", developed
//! by Nigel Tao and Ken Thompson. A detailed description of the
//! algorithm can be found in "ParseNumberF64 by Simple Decimal Conversion",
//! available online: <https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html>.
use crate::num::dec2flt::common::{is_8digits, parse_digits, ByteSlice, ByteSliceMut};
#[derive(Clone)]
pub struct Decimal {
/// The number of significant digits in the decimal.
pub num_digits: usize,
/// The offset of the decimal point in the significant digits.
pub decimal_point: i32,
/// If the number of significant digits stored in the decimal is truncated.
pub truncated: bool,
/// Buffer of the raw digits, in the range [0, 9].
pub digits: [u8; Self::MAX_DIGITS],
}
impl Default for Decimal {
fn default() -> Self {
Self { num_digits: 0, decimal_point: 0, truncated: false, digits: [0; Self::MAX_DIGITS] }
}
}
impl Decimal {
/// The maximum number of digits required to unambiguously round a float.
///
/// For a double-precision IEEE 754 float, this required 767 digits,
/// so we store the max digits + 1.
///
/// We can exactly represent a float in radix `b` from radix 2 if
/// `b` is divisible by 2. This function calculates the exact number of
/// digits required to exactly represent that float.
///
/// According to the "Handbook of Floating Point Arithmetic",
/// for IEEE754, with emin being the min exponent, p2 being the
/// precision, and b being the radix, the number of digits follows as:
///
/// `−emin + p2 + ⌊(emin + 1) log(2, b) − log(1 − 2^(−p2), b)⌋`
///
/// For f32, this follows as:
/// emin = -126
/// p2 = 24
///
/// For f64, this follows as:
/// emin = -1022
/// p2 = 53
///
/// In Python:
/// `-emin + p2 + math.floor((emin+ 1)*math.log(2, b)-math.log(1-2**(-p2), b))`
pub const MAX_DIGITS: usize = 768;
/// The max digits that can be exactly represented in a 64-bit integer.
pub const MAX_DIGITS_WITHOUT_OVERFLOW: usize = 19;
pub const DECIMAL_POINT_RANGE: i32 = 2047;
/// Append a digit to the buffer.
pub fn try_add_digit(&mut self, digit: u8) {
if self.num_digits < Self::MAX_DIGITS {
self.digits[self.num_digits] = digit;
}
self.num_digits += 1;
}
/// Trim trailing zeros from the buffer.
pub fn trim(&mut self) {
// All of the following calls to `Decimal::trim` can't panic because:
//
// 1. `parse_decimal` sets `num_digits` to a max of `Decimal::MAX_DIGITS`.
// 2. `right_shift` sets `num_digits` to `write_index`, which is bounded by `num_digits`.
// 3. `left_shift` `num_digits` to a max of `Decimal::MAX_DIGITS`.
//
// Trim is only called in `right_shift` and `left_shift`.
debug_assert!(self.num_digits <= Self::MAX_DIGITS);
while self.num_digits != 0 && self.digits[self.num_digits - 1] == 0 {
self.num_digits -= 1;
}
}
pub fn round(&self) -> u64 {
if self.num_digits == 0 || self.decimal_point < 0 {
return 0;
} else if self.decimal_point > 18 {
return 0xFFFF_FFFF_FFFF_FFFF_u64;
}
let dp = self.decimal_point as usize;
let mut n = 0_u64;
for i in 0..dp {
n *= 10;
if i < self.num_digits {
n += self.digits[i] as u64;
}
}
let mut round_up = false;
if dp < self.num_digits {
round_up = self.digits[dp] >= 5;
if self.digits[dp] == 5 && dp + 1 == self.num_digits {
round_up = self.truncated || ((dp != 0) && (1 & self.digits[dp - 1] != 0))
}
}
if round_up {
n += 1;
}
n
}
/// Computes decimal * 2^shift.
pub fn left_shift(&mut self, shift: usize) {
if self.num_digits == 0 {
return;
}
let num_new_digits = number_of_digits_decimal_left_shift(self, shift);
let mut read_index = self.num_digits;
let mut write_index = self.num_digits + num_new_digits;
let mut n = 0_u64;
while read_index != 0 {
read_index -= 1;
write_index -= 1;
n += (self.digits[read_index] as u64) << shift;
let quotient = n / 10;
let remainder = n - (10 * quotient);
if write_index < Self::MAX_DIGITS {
self.digits[write_index] = remainder as u8;
} else if remainder > 0 {
self.truncated = true;
}
n = quotient;
}
while n > 0 {
write_index -= 1;
let quotient = n / 10;
let remainder = n - (10 * quotient);
if write_index < Self::MAX_DIGITS {
self.digits[write_index] = remainder as u8;
} else if remainder > 0 {
self.truncated = true;
}
n = quotient;
}
self.num_digits += num_new_digits;
if self.num_digits > Self::MAX_DIGITS {
self.num_digits = Self::MAX_DIGITS;
}
self.decimal_point += num_new_digits as i32;
self.trim();
}
/// Computes decimal * 2^-shift.
pub fn right_shift(&mut self, shift: usize) {
let mut read_index = 0;
let mut write_index = 0;
let mut n = 0_u64;
while (n >> shift) == 0 {
if read_index < self.num_digits {
n = (10 * n) + self.digits[read_index] as u64;
read_index += 1;
} else if n == 0 {
return;
} else {
while (n >> shift) == 0 {
n *= 10;
read_index += 1;
}
break;
}
}
self.decimal_point -= read_index as i32 - 1;
if self.decimal_point < -Self::DECIMAL_POINT_RANGE {
// `self = Self::Default()`, but without the overhead of clearing `digits`.
self.num_digits = 0;
self.decimal_point = 0;
self.truncated = false;
return;
}
let mask = (1_u64 << shift) - 1;
while read_index < self.num_digits {
let new_digit = (n >> shift) as u8;
n = (10 * (n & mask)) + self.digits[read_index] as u64;
read_index += 1;
self.digits[write_index] = new_digit;
write_index += 1;
}
while n > 0 {
let new_digit = (n >> shift) as u8;
n = 10 * (n & mask);
if write_index < Self::MAX_DIGITS {
self.digits[write_index] = new_digit;
write_index += 1;
} else if new_digit > 0 {
self.truncated = true;
}
}
self.num_digits = write_index;
self.trim();
}
}
/// Parse a big integer representation of the float as a decimal.
pub fn parse_decimal(mut s: &[u8]) -> Decimal {
let mut d = Decimal::default();
let start = s;
s = s.skip_chars(b'0');
parse_digits(&mut s, |digit| d.try_add_digit(digit));
if s.first_is(b'.') {
s = s.advance(1);
let first = s;
// Skip leading zeros.
if d.num_digits == 0 {
s = s.skip_chars(b'0');
}
while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS {
// SAFETY: s is at least 8 bytes.
let v = unsafe { s.read_u64_unchecked() };
if !is_8digits(v) {
break;
}
// SAFETY: d.num_digits + 8 is less than d.digits.len()
unsafe {
d.digits[d.num_digits..].write_u64_unchecked(v - 0x3030_3030_3030_3030);
}
d.num_digits += 8;
s = s.advance(8);
}
parse_digits(&mut s, |digit| d.try_add_digit(digit));
d.decimal_point = s.len() as i32 - first.len() as i32;
}
if d.num_digits != 0 {
// Ignore the trailing zeros if there are any
let mut n_trailing_zeros = 0;
for &c in start[..(start.len() - s.len())].iter().rev() {
if c == b'0' {
n_trailing_zeros += 1;
} else if c != b'.' {
break;
}
}
d.decimal_point += n_trailing_zeros as i32;
d.num_digits -= n_trailing_zeros;
d.decimal_point += d.num_digits as i32;
if d.num_digits > Decimal::MAX_DIGITS {
d.truncated = true;
d.num_digits = Decimal::MAX_DIGITS;
}
}
if s.first_is2(b'e', b'E') {
s = s.advance(1);
let mut neg_exp = false;
if s.first_is(b'-') {
neg_exp = true;
s = s.advance(1);
} else if s.first_is(b'+') {
s = s.advance(1);
}
let mut exp_num = 0_i32;
parse_digits(&mut s, |digit| {
if exp_num < 0x10000 {
exp_num = 10 * exp_num + digit as i32;
}
});
d.decimal_point += if neg_exp { -exp_num } else { exp_num };
}
for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
d.digits[i] = 0;
}
d
}
fn number_of_digits_decimal_left_shift(d: &Decimal, mut shift: usize) -> usize {
#[rustfmt::skip]
const TABLE: [u16; 65] = [
0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, 0x181D, 0x2024,
0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, 0x3073, 0x3080, 0x388E, 0x389C,
0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169,
0x5180, 0x5998, 0x59B0, 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B,
0x72AA, 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, 0x8C02,
0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, 0x051C, 0x051C,
];
#[rustfmt::skip]
const TABLE_POW5: [u8; 0x051C] = [
5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, 3, 9, 0, 6, 2, 5, 1,
9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5,
1, 2, 2, 0, 7, 0, 3, 1, 2, 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2,
5, 1, 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, 3, 8, 1, 4, 6,
9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1,
6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9,
1, 0, 1, 5, 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, 0, 4, 6,
4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, 8, 7, 6, 9, 5, 3, 1, 2, 5, 1,
4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2,
3, 8, 2, 8, 1, 2, 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, 6,
2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, 2, 2, 5, 7, 4, 6, 1, 5,
4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2,
5, 2, 3, 2, 8, 3, 0, 6, 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5,
3, 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, 6, 0, 9, 1, 3, 4,
6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6,
1, 3, 2, 8, 1, 2, 5, 1, 4, 5, 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0,
6, 2, 5, 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, 1, 2, 5, 3,
6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8,
9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4,
7, 0, 1, 7, 7, 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, 3, 5,
0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, 2, 2, 7, 3, 7, 3, 6, 7, 5,
4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7,
7, 2, 1, 6, 1, 6, 0, 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8,
8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, 2, 8, 4, 2, 1, 7, 0,
9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0,
8, 5, 4, 7, 1, 5, 2, 0, 2, 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1,
0, 5, 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, 5, 7, 8, 1, 2,
5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8,
9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0,
6, 6, 8, 9, 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, 2, 3, 3,
8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, 9, 2, 0, 9, 8, 5, 0, 0, 6, 2,
6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4,
9, 2, 5, 0, 3, 1, 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, 1,
1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, 1, 6, 6, 8, 0, 9, 0, 8,
2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5,
8, 3, 4, 0, 4, 5, 4, 1, 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1,
3, 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, 3, 8, 7, 7, 7, 8,
7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0,
6, 2, 5, 6, 9, 3, 8, 8, 9, 3, 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2,
5, 5, 6, 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, 6, 1, 4, 1,
8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2,
3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3,
8, 2, 8, 1, 2, 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, 6, 2,
2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5,
];
shift &= 63;
let x_a = TABLE[shift];
let x_b = TABLE[shift + 1];
let num_new_digits = (x_a >> 11) as _;
let pow5_a = (0x7FF & x_a) as usize;
let pow5_b = (0x7FF & x_b) as usize;
let pow5 = &TABLE_POW5[pow5_a..];
for (i, &p5) in pow5.iter().enumerate().take(pow5_b - pow5_a) {
if i >= d.num_digits {
return num_new_digits - 1;
} else if d.digits[i] == p5 {
continue;
} else if d.digits[i] < p5 {
return num_new_digits - 1;
} else {
return num_new_digits;
}
}
num_new_digits
}