Int128.h#
Fully qualified name: carb/math/Int128.h
File members: carb/math/Int128.h
// Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto. Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
//
#pragma once
#include "../cpp/Bit.h"
#include "../cpp/StringView.h"
#include "MulDiv.h"
#include <iostream>
#include <string> // for std::string and std::hash
#include <string_view>
#ifdef DOXYGEN_BUILD
# define CARBLOCAL_USE_BUILTIN_INT128 1
#elif CARB_TOOLCHAIN_CLANG
// Although Clang has built-in uint128 support, on Windows it generates a call to `__udivti3`, which it does not
// provide. See: https://github.com/llvm/llvm-project/issues/25679
// For this reason we use our own uint128_t implementation for Clang on Windows.
# define CARBLOCAL_USE_BUILTIN_INT128 !CARB_PLATFORM_WINDOWS
#elif CARB_COMPILER_MSC
// No builtin to use
# define CARBLOCAL_USE_BUILTIN_INT128 0
// MS intrinsics
extern "C"
{
unsigned char _BitScanReverse64(unsigned long* _Index, uint64_t _Mask);
# pragma intrinsic(_BitScanReverse64)
# if CARB_X86_64
unsigned char _subborrow_u64(unsigned char, unsigned __int64, unsigned __int64, unsigned __int64*);
# pragma intrinsic(_subborrow_u64)
# endif
}
#elif CARB_COMPILER_GNUC
# define CARBLOCAL_USE_BUILTIN_INT128 1
#else
CARB_UNSUPPORTED_COMPILER();
#endif
namespace carb
{
namespace math
{
#if !CARBLOCAL_USE_BUILTIN_INT128
namespace detail
{
inline bool bsr(unsigned& index, uint64_t val) noexcept
{
# if CARB_COMPILER_GNUC || CARB_TOOLCHAIN_CLANG
if (val)
{
index = 64u - 1u - unsigned(__builtin_clzll(val));
return true;
}
return false;
# elif CARB_COMPILER_MSC
unsigned long ulIndex;
bool ret = _BitScanReverse64(&ulIndex, val);
index = unsigned(ulIndex);
return ret;
# else
CARB_UNSUPPORTED_COMPILER();
# endif
}
inline uint8_t sbb(uint8_t cf, uint64_t minuend, uint64_t subtrahend, uint64_t& difference) noexcept
{
# if CARB_COMPILER_GNUC || CARB_TOOLCHAIN_CLANG
auto c1 = __builtin_sub_overflow(minuend, subtrahend, &difference);
auto c2 = __builtin_sub_overflow(difference, cf, &difference);
return uint8_t(c1 | c2);
# elif CARB_COMPILER_MSC
# if CARB_X86_64
return _subborrow_u64(cf, minuend, subtrahend, &difference);
# elif CARB_AARCH64
// Don't have any intrinsics that support carry flag here, so do manual carry check
auto temp = minuend - subtrahend;
auto c1 = minuend < temp;
difference = temp - cf;
c1 |= (temp < difference);
return uint8_t(c1);
# else
CARB_UNSUPPORTED_ARCHITECTURE();
# endif
# else
CARB_UNSUPPORTED_COMPILER();
# endif
}
class int128base
{
public:
explicit operator bool() const noexcept
{
return (m_low | m_high) != 0;
}
constexpr int128base(const int128base&) = default;
int128base& operator=(const int128base&) = default;
protected:
constexpr int128base() noexcept : m_low(0), m_high(0)
{
}
constexpr int128base(uint64_t high, uint64_t low) noexcept : m_low(low), m_high(high)
{
}
// Algorithm derived from assembly at https://skanthak.hier-im-netz.de/integer.html
static int128base udivmodti4(int128base dividend, int128base divisor, int128base* remainder) noexcept
{
auto&& less = [](const int128base& lhs, const int128base& rhs) noexcept {
if (lhs.m_high < rhs.m_high)
return true;
if (rhs.m_high < lhs.m_high)
return false;
return lhs.m_low < rhs.m_low;
};
if (less(dividend, divisor)) // dividend < divisor?
{
// trivial
if (remainder)
*remainder = dividend;
return {};
}
unsigned index;
int128base quotient;
if (!bsr(index, divisor.m_high)) // high qword of divisor == 0?
{
if (dividend.m_high < divisor.m_low)
{
// Normal division
// dividend < divisor * 2**64; quotient < 2**64
quotient = int128base(0, udiv128(dividend.m_high, dividend.m_low, divisor.m_low,
remainder ? (remainder->m_high = 0, &remainder->m_low) : nullptr));
}
else
{
// Long division
uint64_t rem;
quotient.m_high = udiv128(0, dividend.m_high, divisor.m_low, &rem);
quotient.m_low = udiv128(rem, dividend.m_low, divisor.m_low,
remainder ? (remainder->m_high = 0, &remainder->m_low) : nullptr);
}
}
else
{
// Extended division
index ^= 0x3F; // index = number of leading '0' bits in (high qword of) divisor
if (!index) // divisor >= 2**127?
{
// special case: dividend >= divisor >= 2**127
// quotient: 1, remainder: dividend - divisor
quotient = int128base(0, 1);
if (remainder)
{
auto borrow = sbb(0, dividend.m_low, divisor.m_low, remainder->m_low);
sbb(borrow, dividend.m_high, divisor.m_high, remainder->m_high);
}
return quotient;
}
quotient.m_high = quotient.m_low = 0;
// shift the divisor by the number of leading '0' bits
uint64_t divisorH = (divisor.m_high << index) | (divisor.m_low >> (64 - index));
int128base temp;
temp.m_high = dividend.m_high;
if (temp.m_high >= divisorH)
{
// high qword of dividend >= divisor'
// subtract divisorH from temp.high to prevent possible quotient overflow and set most significant bit
// of quotient"
temp.m_high -= divisorH;
quotient.m_low = 1;
}
// temp.low = temp.high:dividend.m_low / divisorH
// temp.high = temp.high:dividend.m_low % divisorH
temp.m_low = udiv128(temp.m_high, dividend.m_low, divisorH, &temp.m_high);
// Shift quotient by number of leading zeros in divisor and mix in bits from temp.low
quotient.m_low = (quotient.m_low << index) | (temp.m_low >> (64 - index));
divisorH = uint64_t(int64_t(divisor.m_high) * int64_t(quotient.m_low));
// temp = divisor.m_low * quot
temp.m_low = umul128(divisor.m_low, quotient.m_low, temp.m_high);
// temp.high += divisorH
if (adc(0, temp.m_high, divisorH, temp.m_high)) // had carry?
{
--quotient.m_low;
// temp.low -= divisor.m_low; temp.high -= divisor.m_low; former borrows from latter
auto borrow = sbb(0, temp.m_low, divisor.m_low, temp.m_low);
sbb(borrow, temp.m_high, divisor.m_low, temp.m_high);
}
// Fix up quotient and calculate remainder
int128base rem;
// rem = dividend.m_high:dividend.m_low - temp
auto borrow = sbb(0, dividend.m_low, temp.m_low, rem.m_low);
borrow = sbb(borrow, dividend.m_high, temp.m_high, rem.m_high);
if (borrow)
{
// remainder is off by divisor and quot is off by 1
--quotient.m_low;
auto carry = adc(0, rem.m_low, divisor.m_low, rem.m_low);
adc(carry, rem.m_high, divisor.m_high, rem.m_high);
}
// Remainder
if (remainder)
*remainder = rem;
}
return quotient;
}
static int128base divmodti4(int128base dividend, int128base divisor, int128base* remainder) noexcept
{
int64_t const dvd_neg = dividend.m_ihigh >> 63;
// dividend = abs(dividend)
dividend.m_ihigh ^= dvd_neg;
dividend.m_ilow ^= dvd_neg;
auto borrow = sbb(0, dividend.m_low, uint64_t(dvd_neg), dividend.m_low);
sbb(borrow, dividend.m_high, uint64_t(dvd_neg), dividend.m_high);
int64_t const dvr_neg = divisor.m_ihigh >> 63;
// divisor = abs(divisor)
divisor.m_ihigh ^= dvr_neg;
divisor.m_ilow ^= dvr_neg;
borrow = sbb(0, divisor.m_low, uint64_t(dvr_neg), divisor.m_low);
sbb(borrow, divisor.m_high, uint64_t(dvr_neg), divisor.m_high);
int128base quotient = udivmodti4(dividend, divisor, remainder);
// Fix up quotient negation (negate if dvd_neg ^ dvr_neg)
int64_t const quot_neg = dvd_neg ^ dvr_neg;
quotient.m_ihigh ^= quot_neg;
quotient.m_ilow ^= quot_neg;
borrow = sbb(0, quotient.m_low, uint64_t(quot_neg), quotient.m_low);
sbb(borrow, quotient.m_high, uint64_t(quot_neg), quotient.m_high);
// Fix up remainder negation (negate if dvd_neg < 0)
if (dvd_neg && remainder)
{
remainder->m_ihigh = -remainder->m_ihigh;
borrow = !!remainder->m_ilow;
remainder->m_ilow = -remainder->m_ilow;
sbb(borrow, remainder->m_high, 0, remainder->m_high);
}
return quotient;
}
static int128base udivti3(int128base dividend, int128base divisor) noexcept
{
return udivmodti4(dividend, divisor, nullptr);
}
static int128base umodti3(int128base dividend, int128base divisor) noexcept
{
int128base remainder;
udivmodti4(dividend, divisor, &remainder);
return remainder;
}
static int128base divti3(int128base dividend, int128base divisor) noexcept
{
int64_t const dvd_neg = dividend.m_ihigh >> 63; // dividend < 0 ? -1 : 0
// dividend = abs(dividend)
dividend.m_ihigh ^= dvd_neg;
dividend.m_ilow ^= dvd_neg;
auto borrow = sbb(0, dividend.m_low, uint64_t(dvd_neg), dividend.m_low);
sbb(borrow, dividend.m_high, uint64_t(dvd_neg), dividend.m_high);
int64_t const dvr_neg = divisor.m_ihigh >> 63; // divisor < 0 ? -1 : 0
// divisor = abs(divisor)
divisor.m_ihigh ^= dvr_neg;
divisor.m_ilow ^= dvr_neg;
borrow = sbb(0, divisor.m_low, uint64_t(dvr_neg), divisor.m_low);
sbb(borrow, divisor.m_high, uint64_t(dvr_neg), divisor.m_high);
int128base quotient = udivmodti4(dividend, divisor, nullptr);
// Fix up negation (negate if dvd_neg ^ dvr_neg)
int64_t const quot_neg = dvd_neg ^ dvr_neg;
quotient.m_ihigh ^= quot_neg;
quotient.m_ilow ^= quot_neg;
borrow = sbb(0, quotient.m_low, uint64_t(quot_neg), quotient.m_low);
sbb(borrow, quotient.m_high, uint64_t(quot_neg), quotient.m_high);
return quotient;
}
static int128base modti3(int128base dividend, int128base divisor) noexcept
{
int64_t const dvr_neg = divisor.m_ihigh >> 63; // divisor < 0 ? -1 : 0
// divisor = abs(divisor)
divisor.m_ihigh ^= dvr_neg;
divisor.m_ilow ^= dvr_neg;
auto borrow = sbb(0, divisor.m_low, uint64_t(dvr_neg), divisor.m_low);
sbb(borrow, divisor.m_high, uint64_t(dvr_neg), divisor.m_high);
int64_t const dvd_neg = dividend.m_ihigh >> 63; // dividend < 0 ? -1 : 0
// dividend = abs(dividend)
dividend.m_ihigh ^= dvd_neg;
dividend.m_ilow ^= dvd_neg;
borrow = sbb(0, dividend.m_low, uint64_t(dvd_neg), dividend.m_low);
sbb(borrow, dividend.m_high, uint64_t(dvd_neg), dividend.m_high);
int128base remainder;
udivmodti4(dividend, divisor, &remainder);
// Fix up negation (negate if dividend is negative)
remainder.m_ihigh ^= dvd_neg;
remainder.m_ilow ^= dvd_neg;
borrow = sbb(0, remainder.m_low, uint64_t(dvd_neg), remainder.m_low);
sbb(borrow, remainder.m_high, uint64_t(dvd_neg), remainder.m_high);
return remainder;
}
CARB_IGNOREWARNING_MSC_WITH_PUSH(4201) // nonstandard extension used: nameless struct/union
union
{
struct
{
uint64_t m_low;
uint64_t m_high;
};
struct
{
int64_t m_ilow;
int64_t m_ihigh;
};
static_assert(cpp::endian::native == cpp::endian::little, "Assumes little endian");
};
CARB_IGNOREWARNING_MSC_POP
};
} // namespace detail
// macros for boilerplate
# define CARBLOCAL_BINARY_MATH_OP(type, op) \
template <class T, std::enable_if_t<std::is_integral<T>::value, bool> = false> \
type& operator op##=(T rhs) noexcept \
{ \
*this op## = type(rhs); \
return *this; \
} \
friend type operator op(type lhs, const type& rhs) noexcept \
{ \
lhs op## = rhs; \
return lhs; \
} \
template <class T, std::enable_if_t<std::is_integral<T>::value, bool> = false> \
friend type operator op(type lhs, T rhs) noexcept \
{ \
lhs op## = type(rhs); \
return lhs; \
} \
template <class T, std::enable_if_t<std::is_integral<T>::value, bool> = false> \
friend type operator op(T lhs, const type& rhs) noexcept \
{ \
type temp(lhs); \
temp op## = rhs; \
return temp; \
}
# define CARBLOCAL_OP_PROMOTE(type, op) \
template <class T, std::enable_if_t<std::is_integral<T>::value, bool> = false> \
friend bool operator op(const type& lhs, T rhs) noexcept \
{ \
return lhs op type(rhs); \
} \
template <class T, std::enable_if_t<std::is_integral<T>::value, bool> = false> \
friend bool operator op(T lhs, const type& rhs) noexcept \
{ \
return type(lhs) op rhs; \
}
# define CARBLOCAL_OP_DERIVE(type, op, ...) \
friend bool operator op(const type& lhs, const type& rhs) noexcept \
{ \
return __VA_ARGS__; \
} \
CARBLOCAL_OP_PROMOTE(type, op)
class uint128_t;
class int128_t : public detail::int128base
{
using base = detail::int128base;
public:
constexpr int128_t() noexcept = default;
template <class T, std::enable_if_t<std::conjunction<std::is_integral<T>, std::is_signed<T>>::value, bool> = true>
constexpr explicit int128_t(T lower) noexcept : base(lower < 0 ? uint64_t(-1) : uint64_t(0), uint64_t(lower))
{
}
template <class T, std::enable_if_t<std::conjunction<std::is_integral<T>, std::is_unsigned<T>>::value, bool> = true>
constexpr explicit int128_t(T lower) noexcept : base(0, uint64_t(lower))
{
}
constexpr explicit int128_t(const uint128_t& other) noexcept;
constexpr int128_t(const int128_t&) = default;
int128_t& operator=(const int128_t&) = default;
friend int128_t make_int128_t(uint64_t high, uint64_t low) noexcept;
friend int128_t divide_and_modulus(const int128_t& dividend, const int128_t& divisor, int128_t& modulus);
friend int64_t cast_to_int64_t(const int128_t& val) noexcept;
int128_t& operator++() noexcept
{
if (++m_low == 0)
++m_high;
return *this;
}
int128_t operator++(int) noexcept
{
int128_t prev = *this;
this->operator++();
return prev;
}
int128_t& operator--() noexcept
{
if (m_low-- == 0)
--m_high;
return *this;
}
int128_t operator--(int) noexcept
{
int128_t prev = *this;
this->operator--();
return prev;
}
int128_t& operator+=(const int128_t& other) noexcept
{
auto carry = detail::adc(0, m_low, other.m_low, m_low);
detail::adc(carry, m_high, other.m_high, m_high);
return *this;
}
CARBLOCAL_BINARY_MATH_OP(int128_t, +)
int128_t& operator-=(const int128_t& other) noexcept
{
auto borrow = detail::sbb(0, m_low, other.m_low, m_low);
detail::sbb(borrow, m_high, other.m_high, m_high);
return *this;
}
CARBLOCAL_BINARY_MATH_OP(int128_t, -)
int128_t& operator*=(int128_t other) noexcept
{
// 128-bit multiply is:
// otherH otherL
// X thisH thisL
// ---------------------
// thisL*otherH thisL*otherL
// + thisH*otherL 0
m_ihigh *= other.m_ilow;
other.m_ihigh *= m_ilow;
m_low = detail::umul128(m_low, other.m_low, other.m_low);
m_ihigh += other.m_ihigh;
m_ihigh += other.m_ilow;
return *this;
}
CARBLOCAL_BINARY_MATH_OP(int128_t, *)
int128_t& operator/=(const int128_t& other) noexcept
{
base::operator=(divti3(*this, other));
return *this;
}
CARBLOCAL_BINARY_MATH_OP(int128_t, /)
int128_t& operator%=(const int128_t& other) noexcept
{
base::operator=(modti3(*this, other));
return *this;
}
CARBLOCAL_BINARY_MATH_OP(int128_t, %)
int128_t& operator&=(const int128_t& other) noexcept
{
m_low &= other.m_ilow;
m_high &= other.m_ihigh;
return *this;
}
CARBLOCAL_BINARY_MATH_OP(int128_t, &)
int128_t& operator|=(const int128_t& other) noexcept
{
m_low |= other.m_ilow;
m_high |= other.m_ihigh;
return *this;
}
CARBLOCAL_BINARY_MATH_OP(int128_t, |)
int128_t& operator^=(const int128_t& other) noexcept
{
m_low ^= other.m_ilow;
m_high ^= other.m_ihigh;
return *this;
}
CARBLOCAL_BINARY_MATH_OP(int128_t, ^)
friend int128_t operator+(const int128_t& val) noexcept
{
return val;
}
friend int128_t operator-(const int128_t& val) noexcept
{
int128_t ret = ~val;
++ret;
return ret;
}
friend int128_t operator~(const int128_t& val) noexcept
{
return int128_t(~val.m_high, ~val.m_low);
}
friend bool operator<(const int128_t& lhs, const int128_t& rhs) noexcept
{
// The asm for this should be simple, but requires access to the overflow flag (Linux x86-64 example):
// cmp rdi, qword ptr [rdx] ; effectively (rdi - *rdx) to set flags and ignore the result.
// sbb rsi, qword ptr [rdx+8] ; subtract with borrow, sets OF and SF among others
// setl al ; "set less": sets al to 1 if OF != SF
// ret
auto const diff = lhs - rhs;
// Since we don't have access to the ALU flags, we emulate them ourself:
// From https://teaching.idallen.com/dat2343/11w/notes/040_overflow.txt
// Signed less-than is true if OverflowFlag != SignFlag
// OverflowFlag is true if (sign(lhs.high) ^ sign(rhs.high)) && (sign(lhs.high) ^ sign(diff.high))
// that is, if sign of lhs and rhs differ and the sign of the difference doesn't match lhs
// Truth table (signs of [l]hs, [r]hs, [d]iff shown as 0 if positive, 1 if negative; SF is [d]iff)
// l - r = d/SF | (l^r) (l^d) OF d/SF = Less
// ------------------+-------------------------------
// 0 - 0 = 0 | 0 & 0 = 0 ^ 0 = 0
// 0 - 0 = 1 | 0 & 1 = 0 ^ 1 = 1
// 0 - 1 = 0 | 1 & 0 = 0 ^ 0 = 0
// 0 - 1 = 1 | 1 & 1 = 1 ^ 1 = 0
// 1 - 0 = 0 | 1 & 1 = 1 ^ 0 = 1
// 1 - 0 = 1 | 1 & 0 = 0 ^ 1 = 1
// 1 - 1 = 0 | 0 & 1 = 0 ^ 0 = 0
// 1 - 1 = 1 | 0 & 0 = 0 ^ 1 = 1
int64_t const kOverflowFlag = ((lhs.m_ihigh ^ rhs.m_ihigh) & (lhs.m_ihigh ^ diff.m_ihigh)) >> 63; // sign extend
int64_t const kSignFlag = diff.m_ihigh >> 63; // sign extend
return !!(kOverflowFlag ^ kSignFlag);
}
CARBLOCAL_OP_PROMOTE(int128_t, <)
friend bool operator==(const int128_t& lhs, const int128_t& rhs) noexcept
{
return lhs.m_ilow == rhs.m_ilow && lhs.m_ihigh == rhs.m_ihigh;
}
CARBLOCAL_OP_PROMOTE(int128_t, ==)
CARBLOCAL_OP_DERIVE(int128_t, !=, !(rhs == lhs))
CARBLOCAL_OP_DERIVE(int128_t, >, (rhs < lhs))
CARBLOCAL_OP_DERIVE(int128_t, <=, !(rhs < lhs))
CARBLOCAL_OP_DERIVE(int128_t, >=, !(lhs < rhs))
int128_t& operator<<=(unsigned shift) noexcept
{
shift &= 127; // Intel-ism
if (shift >= 64)
{
m_high = m_low << (shift - 64);
m_low = 0;
}
else if (shift != 0)
{
m_high = (m_high << shift) | (m_low >> (64 - shift));
m_low <<= shift;
}
return *this;
}
friend int128_t operator<<(int128_t lhs, unsigned shift) noexcept
{
lhs <<= shift;
return lhs;
}
int128_t& operator>>=(unsigned shift) noexcept
{
shift &= 127; // Intel-ism
if (shift >= 64)
{
m_ilow = m_ihigh >> (shift - 64);
m_ihigh >>= 63; // sign extend
}
else if (shift != 0)
{
m_low = (m_low >> shift) | (m_high << (64 - shift));
m_ihigh >>= shift;
}
return *this;
}
friend int128_t operator>>(int128_t lhs, unsigned shift) noexcept
{
lhs >>= shift;
return lhs;
}
friend std::ostream& operator<<(std::ostream& os, const int128_t& val);
friend std::string to_string(const int128_t& val);
private:
constexpr int128_t(base b) noexcept : base(b)
{
}
constexpr int128_t(uint64_t high, uint64_t low) noexcept : base(high, low)
{
}
};
class uint128_t : public detail::int128base
{
using base = detail::int128base;
public:
constexpr uint128_t() noexcept = default;
template <class T, std::enable_if_t<std::conjunction<std::is_integral<T>, std::is_signed<T>>::value, bool> = true>
constexpr explicit uint128_t(T lower) noexcept : base(lower < 0 ? uint64_t(-1) : uint64_t(0), uint64_t(lower))
{
}
template <class T, std::enable_if_t<std::conjunction<std::is_integral<T>, std::is_unsigned<T>>::value, bool> = true>
constexpr explicit uint128_t(T lower) noexcept : base(0, uint64_t(lower))
{
}
constexpr explicit uint128_t(const int128_t& other) noexcept;
constexpr uint128_t(const uint128_t&) = default;
uint128_t& operator=(const uint128_t&) = default;
friend constexpr uint128_t make_uint128_t(uint64_t high, uint64_t low) noexcept;
friend uint128_t divide_and_modulus(const uint128_t& dividend, const uint128_t& divisor, uint128_t& modulus);
friend uint64_t cast_to_uint64_t(const uint128_t& val) noexcept;
uint128_t& operator++() noexcept
{
if (++m_low == 0)
++m_high;
return *this;
}
uint128_t operator++(int) noexcept
{
uint128_t prev = *this;
this->operator++();
return prev;
}
uint128_t& operator--() noexcept
{
if (m_low-- == 0)
--m_high;
return *this;
}
uint128_t operator--(int) noexcept
{
uint128_t prev = *this;
this->operator--();
return prev;
}
uint128_t& operator+=(const uint128_t& other) noexcept
{
auto carry = detail::adc(0, m_low, other.m_low, m_low);
detail::adc(carry, m_high, other.m_high, m_high);
return *this;
}
CARBLOCAL_BINARY_MATH_OP(uint128_t, +)
uint128_t& operator-=(const uint128_t& other) noexcept
{
auto borrow = detail::sbb(0, m_low, other.m_low, m_low);
detail::sbb(borrow, m_high, other.m_high, m_high);
return *this;
}
CARBLOCAL_BINARY_MATH_OP(uint128_t, -)
uint128_t& operator*=(uint128_t other) noexcept
{
// 128-bit multiply is:
// otherH otherL
// X thisH thisL
// ---------------------
// thisL*otherH thisL*otherL
// + thisH*otherL 0
m_high *= other.m_low;
other.m_high *= m_low;
m_low = detail::umul128(m_low, other.m_low, other.m_low);
m_high += other.m_high;
m_high += other.m_low;
return *this;
}
CARBLOCAL_BINARY_MATH_OP(uint128_t, *)
uint128_t& operator/=(const uint128_t& other) noexcept
{
base::operator=(udivti3(*this, other));
return *this;
}
CARBLOCAL_BINARY_MATH_OP(uint128_t, /)
uint128_t& operator%=(const uint128_t& other) noexcept
{
base::operator=(umodti3(*this, other));
return *this;
}
CARBLOCAL_BINARY_MATH_OP(uint128_t, %)
uint128_t& operator&=(const uint128_t& other) noexcept
{
m_low &= other.m_low;
m_high &= other.m_high;
return *this;
}
CARBLOCAL_BINARY_MATH_OP(uint128_t, &)
uint128_t& operator|=(const uint128_t& other) noexcept
{
m_low |= other.m_low;
m_high |= other.m_high;
return *this;
}
CARBLOCAL_BINARY_MATH_OP(uint128_t, |)
uint128_t& operator^=(const uint128_t& other) noexcept
{
m_low ^= other.m_low;
m_high ^= other.m_high;
return *this;
}
CARBLOCAL_BINARY_MATH_OP(uint128_t, ^)
friend uint128_t operator+(const uint128_t& val) noexcept
{
return val;
}
friend uint128_t operator~(const uint128_t& val) noexcept
{
return uint128_t(~val.m_high, ~val.m_low);
}
friend bool operator<(const uint128_t& lhs, const uint128_t& rhs) noexcept
{
if (lhs.m_high < rhs.m_high)
return true;
if (rhs.m_high < lhs.m_high)
return false;
return lhs.m_low < rhs.m_low;
}
CARBLOCAL_OP_PROMOTE(uint128_t, <)
friend bool operator==(const uint128_t& lhs, const uint128_t& rhs) noexcept
{
return lhs.m_low == rhs.m_low && lhs.m_high == rhs.m_high;
}
CARBLOCAL_OP_PROMOTE(uint128_t, ==)
CARBLOCAL_OP_DERIVE(uint128_t, !=, !(rhs == lhs))
CARBLOCAL_OP_DERIVE(uint128_t, >, (rhs < lhs))
CARBLOCAL_OP_DERIVE(uint128_t, <=, !(rhs < lhs))
CARBLOCAL_OP_DERIVE(uint128_t, >=, !(lhs < rhs))
uint128_t& operator<<=(unsigned shift) noexcept
{
shift &= 127; // Intel-ism
if (shift >= 64)
{
m_high = m_low << (shift - 64);
m_low = 0;
}
else if (shift != 0)
{
m_high = (m_high << shift) | (m_low >> (64 - shift));
m_low <<= shift;
}
return *this;
}
friend uint128_t operator<<(uint128_t lhs, unsigned shift) noexcept
{
lhs <<= shift;
return lhs;
}
uint128_t& operator>>=(unsigned shift) noexcept
{
shift &= 127; // Intel-ism
if (shift >= 64)
{
m_low = m_high >> (shift - 64);
m_high = 0;
}
else if (shift != 0)
{
m_low = (m_low >> shift) | (m_high << (64 - shift));
m_high >>= shift;
}
return *this;
}
friend uint128_t operator>>(uint128_t lhs, unsigned shift) noexcept
{
lhs >>= shift;
return lhs;
}
friend std::ostream& operator<<(std::ostream& os, const uint128_t& val);
friend std::string to_string(const uint128_t& val);
private:
constexpr uint128_t(base b) : base(b)
{
}
constexpr uint128_t(uint64_t high, uint64_t low) noexcept : base(high, low)
{
}
};
inline constexpr int128_t::int128_t(const uint128_t& other) noexcept : base(other)
{
}
inline constexpr uint128_t::uint128_t(const int128_t& other) noexcept : base(other)
{
}
inline int128_t make_int128_t(uint64_t high, uint64_t low) noexcept
{
return int128_t(high, low);
}
inline int64_t cast_to_int64_t(const int128_t& val) noexcept
{
return int64_t(val.m_low);
}
inline int128_t divide_and_modulus(const int128_t& dividend, const int128_t& divisor, int128_t& modulus)
{
return int128_t(int128_t::divmodti4(dividend, divisor, &modulus));
}
inline constexpr uint128_t make_uint128_t(uint64_t high, uint64_t low) noexcept
{
return uint128_t(high, low);
}
inline uint64_t cast_to_uint64_t(const uint128_t& val) noexcept
{
return val.m_low;
}
inline uint128_t divide_and_modulus(const uint128_t& dividend, const uint128_t& divisor, uint128_t& modulus)
{
return uint128_t(uint128_t::udivmodti4(dividend, divisor, &modulus));
}
# undef CARBLOCAL_BINARY_MATH_OP
# undef CARBLOCAL_OP_PROMOTE
# undef CARBLOCAL_OP_DERIVE
#else /* CARBLOCAL_USE_BUILTIN_INT128 */
using int128_t = __int128;
using uint128_t = unsigned __int128;
inline int128_t make_int128_t(uint64_t high, uint64_t low) noexcept
{
return (int128_t(high) << 64) + low;
}
inline int128_t divide_and_modulus(const int128_t& dividend, const int128_t& divisor, int128_t& modulus) noexcept
{
modulus = dividend % divisor;
return dividend / divisor;
}
inline int64_t cast_to_int64_t(const int128_t& val) noexcept
{
return int64_t(val);
}
inline constexpr uint128_t make_uint128_t(uint64_t high, uint64_t low) noexcept
{
return (uint128_t(high) << 64) + low;
}
inline uint128_t divide_and_modulus(const uint128_t& dividend, const uint128_t& divisor, uint128_t& modulus) noexcept
{
modulus = dividend % divisor;
return dividend / divisor;
}
inline uint64_t cast_to_uint64_t(const uint128_t& val) noexcept
{
return uint64_t(val);
}
#endif /* CARBLOCAL_USE_BUILTIN_INT128 */
namespace detail
{
struct formatter128
{
public:
formatter128() noexcept = default;
cpp::string_view format(uint128_t val) noexcept
{
using digit_pair = char[2];
constexpr static digit_pair kDigits[] = {
{ '0', '0' }, { '0', '1' }, { '0', '2' }, { '0', '3' }, { '0', '4' }, { '0', '5' }, { '0', '6' },
{ '0', '7' }, { '0', '8' }, { '0', '9' }, { '1', '0' }, { '1', '1' }, { '1', '2' }, { '1', '3' },
{ '1', '4' }, { '1', '5' }, { '1', '6' }, { '1', '7' }, { '1', '8' }, { '1', '9' }, { '2', '0' },
{ '2', '1' }, { '2', '2' }, { '2', '3' }, { '2', '4' }, { '2', '5' }, { '2', '6' }, { '2', '7' },
{ '2', '8' }, { '2', '9' }, { '3', '0' }, { '3', '1' }, { '3', '2' }, { '3', '3' }, { '3', '4' },
{ '3', '5' }, { '3', '6' }, { '3', '7' }, { '3', '8' }, { '3', '9' }, { '4', '0' }, { '4', '1' },
{ '4', '2' }, { '4', '3' }, { '4', '4' }, { '4', '5' }, { '4', '6' }, { '4', '7' }, { '4', '8' },
{ '4', '9' }, { '5', '0' }, { '5', '1' }, { '5', '2' }, { '5', '3' }, { '5', '4' }, { '5', '5' },
{ '5', '6' }, { '5', '7' }, { '5', '8' }, { '5', '9' }, { '6', '0' }, { '6', '1' }, { '6', '2' },
{ '6', '3' }, { '6', '4' }, { '6', '5' }, { '6', '6' }, { '6', '7' }, { '6', '8' }, { '6', '9' },
{ '7', '0' }, { '7', '1' }, { '7', '2' }, { '7', '3' }, { '7', '4' }, { '7', '5' }, { '7', '6' },
{ '7', '7' }, { '7', '8' }, { '7', '9' }, { '8', '0' }, { '8', '1' }, { '8', '2' }, { '8', '3' },
{ '8', '4' }, { '8', '5' }, { '8', '6' }, { '8', '7' }, { '8', '8' }, { '8', '9' }, { '9', '0' },
{ '9', '1' }, { '9', '2' }, { '9', '3' }, { '9', '4' }, { '9', '5' }, { '9', '6' }, { '9', '7' },
{ '9', '8' }, { '9', '9' }
};
char* out = &buffer[CARB_COUNTOF(buffer) - 1];
*out = '\0'; // NUL terminate
char* const end = out;
while (val >= 100)
{
// Since our division is very slow (integer division is slow anyways), use a lookup table and do digits two
// at a time. See Alexandrescu's talk "Three Optimization Tips for C++", tip from {fmt} library.
uint128_t mod;
val = divide_and_modulus(val, uint128_t(100), mod);
auto& lookup = kDigits[cast_to_uint64_t(mod)];
out -= 2;
CARB_ASSERT(out >= buffer);
memcpy(out, lookup, sizeof(lookup));
}
if (val < 10)
{
*--out = char('0' + (unsigned char)cast_to_uint64_t(val));
}
else
{
out -= 2;
auto& lookup = kDigits[cast_to_uint64_t(val)];
out[0] = lookup[0];
out[1] = lookup[1];
}
return cpp::string_view{ out, size_t(end - out) };
}
cpp::string_view format(int128_t val_) noexcept
{
bool const negative = val_ < 0;
uint128_t val = negative ? uint128_t(0) - uint128_t(val_) : uint128_t(val_);
auto sv = format(val);
if (!negative)
return sv;
auto p = const_cast<char*>(sv.data());
*--p = '-';
return cpp::string_view(p, sv.length() + 1);
}
cpp::string_view format_hex(int128_t val) noexcept
{
return format_hex(uint128_t(val));
}
cpp::string_view format_hex(uint128_t val) noexcept
{
constexpr static char kDigitsHex[] = "0123456789ABCDEF";
char* out = &buffer[CARB_COUNTOF(buffer) - 1];
*out = '\0'; // NUL terminate
char* const end = out;
while (val >= 0x10)
{
*--out = kDigitsHex[cast_to_uint64_t(val) & 0xf];
val >>= 4;
}
*--out = kDigitsHex[cast_to_uint64_t(val) & 0xf];
return cpp::string_view{ out, size_t(end - out) };
}
private:
char buffer[41]; // 39 characters + optional '-' + NUL
};
} // namespace detail
inline std::string to_string(const int128_t& val)
{
detail::formatter128 formatter;
auto sv = formatter.format(val);
return std::string(sv);
}
inline std::string to_string(const uint128_t& val)
{
detail::formatter128 formatter;
auto sv = formatter.format(val);
return std::string(sv);
}
} // namespace math
} // namespace carb
#ifndef DOXYGEN_BUILD
# ifndef __GLIBCXX_TYPE_INT_N_0 // gnuc extension
template <>
struct std::hash<carb::math::int128_t>
{
size_t operator()(const carb::math::int128_t& val) const noexcept
{
return carb::hashBuffer(&val, sizeof(val));
}
};
template <>
struct std::hash<carb::math::uint128_t>
{
size_t operator()(const carb::math::uint128_t& val) const noexcept
{
return carb::hashBuffer(&val, sizeof(val));
}
};
# endif
#endif
// operator<< support
#if !defined(DOXYGEN_BUILD) && !CARB_TOOLCHAIN_CLANG && (CARB_COMPILER_GNUC && __GNUC__ >= 10)
// GCC 10+ really wants this in the std namespace which seems weird.
namespace std
{
#else
namespace carb
{
namespace math
{
#endif
inline ::std::ostream& operator<<(::std::ostream& os, const ::carb::math::int128_t& val)
{
::carb::math::detail::formatter128 formatter;
auto sv = !(os.flags() & ::std::ios_base::hex) ? formatter.format(val) : formatter.format_hex(val);
os << ::std::string_view(sv.data(), sv.length());
return os;
}
inline ::std::ostream& operator<<(::std::ostream& os, const ::carb::math::uint128_t& val)
{
::carb::math::detail::formatter128 formatter;
auto sv = !(os.flags() & ::std::ios_base::hex) ? formatter.format(val) : formatter.format_hex(val);
os << ::std::string_view(sv.data(), sv.length());
return os;
}
#if !defined(DOXYGEN_BUILD) && !CARB_TOOLCHAIN_CLANG && (CARB_COMPILER_GNUC && __GNUC__ >= 10)
}
#else
}
}
#endif
#undef CARBLOCAL_USE_BUILTIN_INT128