2
0
mirror of https://github.com/boostorg/spirit.git synced 2026-01-19 04:42:11 +00:00

Modernize basic_chset and char range related components

This commit is contained in:
Nana Sakisaka
2025-09-08 08:50:20 +09:00
parent 572da40062
commit 5fb8252115
5 changed files with 436 additions and 110 deletions

View File

@@ -1,125 +1,113 @@
/*=============================================================================
Copyright (c) 2001-2011 Joel de Guzman
Copyright (c) 2001-2009 Daniel Nuffer
http://spirit.sourceforge.net/
Copyright (c) 2025 Nana Sakisaka
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
#ifndef BOOST_SPIRIT_BASIC_CHSET_APRIL_17_2008
#define BOOST_SPIRIT_BASIC_CHSET_APRIL_17_2008
#ifndef BOOST_SPIRIT_X3_CHAR_DETAIL_BASIC_CHSET_HPP
#define BOOST_SPIRIT_X3_CHAR_DETAIL_BASIC_CHSET_HPP
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/spirit/home/x3/char/detail/char_range_run.hpp>
///////////////////////////////////////////////////////////////////////////////
#include <bitset>
#include <climits>
#include <boost/spirit/home/support/char_set/range_run.hpp>
#include <climits>
namespace boost { namespace spirit { namespace support { namespace detail
namespace boost::spirit::x3::detail
{
///////////////////////////////////////////////////////////////////////////
//
// basic_chset: basic character set implementation using range_run
//
///////////////////////////////////////////////////////////////////////////
template <typename Char>
// basic character set implementation using range_run
template <typename CharT>
struct basic_chset
{
basic_chset() {}
basic_chset(basic_chset const& arg_)
: rr(arg_.rr) {}
bool
test(Char v) const
[[nodiscard]] constexpr bool
test(CharT v) const noexcept
{
return rr.test(v);
}
void
set(Char from, Char to)
constexpr void
set(CharT from, CharT to) noexcept
{
rr.set(range<Char>(from, to));
rr.set(char_range<CharT>(from, to));
}
void
set(Char c)
constexpr void
set(CharT c) noexcept
{
rr.set(range<Char>(c, c));
rr.set(char_range<CharT>(c, c));
}
void
clear(Char from, Char to)
constexpr void
clear(CharT from, CharT to) noexcept
{
rr.clear(range<Char>(from, to));
rr.clear(char_range<CharT>(from, to));
}
void
clear(Char c)
constexpr void
clear(CharT c) noexcept
{
rr.clear(range<Char>(c, c));
rr.clear(char_range<CharT>(c, c));
}
void
clear()
constexpr void
clear() noexcept
{
rr.clear();
}
void
inverse()
constexpr void
inverse() noexcept
{
basic_chset inv;
inv.set(
(std::numeric_limits<Char>::min)(),
(std::numeric_limits<Char>::max)()
(std::numeric_limits<CharT>::min)(),
(std::numeric_limits<CharT>::max)()
);
inv -= *this;
swap(inv);
}
void
swap(basic_chset& x)
constexpr void
swap(basic_chset& x) noexcept
{
rr.swap(x.rr);
}
basic_chset&
operator|=(basic_chset const& x)
constexpr basic_chset&
operator|=(basic_chset const& x) noexcept
{
typedef typename range_run<Char>::const_iterator const_iterator;
typedef typename range_run<CharT>::const_iterator const_iterator;
for (const_iterator iter = x.rr.begin(); iter != x.rr.end(); ++iter)
rr.set(*iter);
return *this;
}
basic_chset&
operator&=(basic_chset const& x)
constexpr basic_chset&
operator&=(basic_chset const& x) noexcept
{
basic_chset inv;
inv.set(
(std::numeric_limits<Char>::min)(),
(std::numeric_limits<Char>::max)()
(std::numeric_limits<CharT>::min)(),
(std::numeric_limits<CharT>::max)()
);
inv -= x;
*this -= inv;
return *this;
}
basic_chset&
operator-=(basic_chset const& x)
constexpr basic_chset&
operator-=(basic_chset const& x) noexcept
{
typedef typename range_run<Char>::const_iterator const_iterator;
typedef typename range_run<CharT>::const_iterator const_iterator;
for (const_iterator iter = x.rr.begin(); iter != x.rr.end(); ++iter)
rr.clear(*iter);
return *this;
}
basic_chset&
operator^=(basic_chset const& x)
constexpr basic_chset&
operator^=(basic_chset const& x) noexcept
{
basic_chset bma = x;
bma -= *this;
@@ -128,7 +116,8 @@ namespace boost { namespace spirit { namespace support { namespace detail
return *this;
}
private: range_run<Char> rr;
private:
char_range_run<CharT> rr;
};
#if (CHAR_BIT == 8)
@@ -138,92 +127,89 @@ namespace boost { namespace spirit { namespace support { namespace detail
// basic_chset: specializations for 8 bit chars using std::bitset
//
///////////////////////////////////////////////////////////////////////////
template <typename Char>
template <typename CharT>
struct basic_chset_8bit
{
basic_chset_8bit() {}
basic_chset_8bit(basic_chset_8bit const& arg_)
: bset(arg_.bset) {}
bool
test(Char v) const
[[nodiscard]] constexpr bool
test(CharT v) const noexcept
{
return bset.test((unsigned char)v);
}
void
set(Char from, Char to)
constexpr void
set(CharT from, CharT to) noexcept
{
for (int i = from; i <= to; ++i)
bset.set((unsigned char)i);
}
void
set(Char c)
constexpr void
set(CharT c) noexcept
{
bset.set((unsigned char)c);
}
void
clear(Char from, Char to)
constexpr void
clear(CharT from, CharT to) noexcept
{
for (int i = from; i <= to; ++i)
bset.reset((unsigned char)i);
}
void
clear(Char c)
constexpr void
clear(CharT c) noexcept
{
bset.reset((unsigned char)c);
}
void
clear()
constexpr void
clear() noexcept
{
bset.reset();
}
void
inverse()
constexpr void
inverse() noexcept
{
bset.flip();
}
void
swap(basic_chset_8bit& x)
constexpr void
swap(basic_chset_8bit& x) noexcept
{
std::swap(bset, x.bset);
}
basic_chset_8bit&
operator|=(basic_chset_8bit const& x)
constexpr basic_chset_8bit&
operator|=(basic_chset_8bit const& x) noexcept
{
bset |= x.bset;
return *this;
}
basic_chset_8bit&
operator&=(basic_chset_8bit const& x)
constexpr basic_chset_8bit&
operator&=(basic_chset_8bit const& x) noexcept
{
bset &= x.bset;
return *this;
}
basic_chset_8bit&
operator-=(basic_chset_8bit const& x)
constexpr basic_chset_8bit&
operator-=(basic_chset_8bit const& x) noexcept
{
bset &= ~x.bset;
return *this;
}
basic_chset_8bit&
operator^=(basic_chset_8bit const& x)
constexpr basic_chset_8bit&
operator^=(basic_chset_8bit const& x) noexcept
{
bset ^= x.bset;
return *this;
}
private: std::bitset<256> bset;
private:
std::bitset<256> bset;
};
/////////////////////////////////
@@ -243,7 +229,7 @@ namespace boost { namespace spirit { namespace support { namespace detail
#endif // #if (CHAR_BIT == 8)
}}}}
} // boost::spirit::x3::detail
#endif

View File

@@ -1,18 +1,18 @@
/*=============================================================================
Copyright (c) 2001-2014 Joel de Guzman
Copyright (c) 2001-2011 Hartmut Kaiser
Copyright (c) 2025 Nana Sakisaka
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
#if !defined(BOOST_SPIRIT_X3_CAST_CHAR_NOVEMBER_10_2006_0907AM)
#ifndef BOOST_SPIRIT_X3_CAST_CHAR_NOVEMBER_10_2006_0907AM
#define BOOST_SPIRIT_X3_CAST_CHAR_NOVEMBER_10_2006_0907AM
#include <boost/type_traits/is_signed.hpp>
#include <boost/type_traits/make_unsigned.hpp>
#include <boost/type_traits/make_signed.hpp>
#include <type_traits>
#include <concepts>
namespace boost { namespace spirit { namespace x3 { namespace detail
namespace boost::spirit::x3::detail
{
// Here's the thing... typical encodings (except ASCII) deal with unsigned
// integers > 127 (ASCII uses only 127). Yet, most char and wchar_t are signed.
@@ -24,25 +24,19 @@ namespace boost { namespace spirit { namespace x3 { namespace detail
// optimizer will optimize the if-else branches}
template <typename TargetChar, typename SourceChar>
TargetChar cast_char(SourceChar ch)
[[nodiscard]] constexpr TargetChar cast_char(SourceChar ch) noexcept
{
#if defined(_MSC_VER)
# pragma warning(push)
# pragma warning(disable: 4127) // conditional expression is constant
#endif
if (is_signed<TargetChar>::value != is_signed<SourceChar>::value)
if constexpr (std::is_signed_v<TargetChar> != std::is_signed_v<SourceChar>)
{
if (is_signed<SourceChar>::value)
if constexpr (std::is_signed_v<SourceChar>)
{
// source is signed, target is unsigned
typedef typename make_unsigned<SourceChar>::type USourceChar;
return TargetChar(USourceChar(ch));
// source is signed, target is unsigned
return TargetChar(static_cast<std::make_unsigned_t<SourceChar>>(ch));
}
else
{
// source is unsigned, target is signed
typedef typename make_signed<SourceChar>::type SSourceChar;
return TargetChar(SSourceChar(ch));
// source is unsigned, target is signed
return TargetChar(static_cast<std::make_signed_t<SourceChar>>(ch));
}
}
else
@@ -50,12 +44,17 @@ namespace boost { namespace spirit { namespace x3 { namespace detail
// source and target has same signedness
return TargetChar(ch); // just cast
}
#if defined(_MSC_VER)
# pragma warning(pop)
#endif
}
}}}}
template <typename SourceChar, typename TargetChar>
concept cast_char_viable = requires(SourceChar ch) {
{ cast_char<TargetChar>(ch) } -> std::convertible_to<TargetChar>;
};
template <typename SourceChar, typename TargetChar>
concept cast_char_noexcept = requires(SourceChar ch) {
{ cast_char<TargetChar>(ch) } noexcept -> std::convertible_to<TargetChar>;
};
} // boost::spirit::x3::detail
#endif

View File

@@ -0,0 +1,40 @@
/*=============================================================================
Copyright (c) 2001-2011 Joel de Guzman
Copyright (c) 2025 Nana Sakisaka
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#ifndef BOOST_SPIRIT_X3_CHAR_DETAIL_CHAR_RANGE_HPP
#define BOOST_SPIRIT_X3_CHAR_DETAIL_CHAR_RANGE_HPP
#include <type_traits>
#include <utility>
namespace boost::spirit::x3::detail
{
// A closed range [first, last]
template <typename CharT>
struct char_range
{
static_assert(std::is_default_constructible_v<CharT>);
static_assert(std::is_copy_constructible_v<CharT> && std::is_copy_assignable_v<CharT>);
using value_type = CharT;
constexpr char_range() noexcept(std::is_nothrow_default_constructible_v<CharT>) = default;
template<class A, class B>
requires std::is_constructible_v<CharT, A> && std::is_constructible_v<CharT, B>
constexpr char_range(A&& first, B&& last)
noexcept(std::is_nothrow_constructible_v<CharT, A> && std::is_nothrow_constructible_v<CharT, B>)
: first(std::forward<A>(first))
, last(std::forward<B>(last))
{}
CharT first{};
CharT last{};
};
} // boost::spirit::x3::detail
#endif

View File

@@ -0,0 +1,108 @@
/*=============================================================================
Copyright (c) 2001-2011 Joel de Guzman
Copyright (c) 2025 Nana Sakisaka
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#ifndef BOOST_SPIRIT_X3_CHAR_DETAIL_CHAR_RANGE_FUNCTIONS_HPP
#define BOOST_SPIRIT_X3_CHAR_DETAIL_CHAR_RANGE_FUNCTIONS_HPP
#include <boost/spirit/home/x3/char/detail/char_range.hpp>
#include <limits>
#include <concepts>
namespace boost::spirit::x3::detail
{
template <typename CharT>
[[nodiscard]] constexpr bool
is_valid(char_range<CharT> const& range) noexcept
{
// test for valid ranges
return range.first <= range.last;
}
template <typename CharT>
[[nodiscard]] constexpr bool
includes(char_range<CharT> const& range, char_range<CharT> const& other) noexcept
{
// see if two ranges intersect
return (range.first <= other.first) && (range.last >= other.last);
}
template <typename CharT>
[[nodiscard]] constexpr bool
includes(char_range<CharT> const& range, CharT val) noexcept
{
// see if val is in range
return (range.first <= val) && (range.last >= val);
}
template <typename CharT>
[[nodiscard]] constexpr bool
can_merge(char_range<CharT> const& range, char_range<CharT> const& other) noexcept
{
// see if a 'range' overlaps, or is adjacent to
// another range 'other', so we can merge them
using limits = std::numeric_limits<CharT>;
CharT const decr_first =
range.first == (limits::min)()
? range.first : range.first-1;
CharT const incr_last =
range.last == (limits::max)()
? range.last : range.last+1;
return (decr_first <= other.last) && (incr_last >= other.first);
}
template <typename CharT>
constexpr void
merge(char_range<CharT>& result, char_range<CharT> const& other) noexcept
{
// merge two ranges
if (result.first > other.first)
{
result.first = other.first;
}
if (result.last < other.last)
{
result.last = other.last;
}
}
struct char_range_compare
{
using is_transparent = int;
template <typename CharT>
[[nodiscard]] constexpr bool operator()(char_range<CharT> const& x, CharT const y) const noexcept
{
return x.first < y;
}
template <typename CharT>
[[nodiscard]] constexpr bool operator()(CharT const x, char_range<CharT> const& y) const noexcept
{
return x < y.first;
}
template <typename CharT>
[[nodiscard]] constexpr bool operator()(char_range<CharT> const& x, char_range<CharT> const& y) const noexcept
{
return x.first < y.first;
}
// This overload is required to satsify `std::indirect_strict_weak_order`
template <std::integral CharT> // hack: minimal constraint to avoid obvious mistakes
[[nodiscard]] constexpr bool operator()(CharT const x, CharT const y) const noexcept
{
return x == y;
}
};
} // boost::spirit::x3::detail
#endif

View File

@@ -0,0 +1,193 @@
/*=============================================================================
Copyright (c) 2001-2011 Joel de Guzman
Copyright (c) 2025 Nana Sakisaka
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#ifndef BOOST_SPIRIT_X3_CHAR_DETAIL_CHAR_RANGE_RUN_HPP
#define BOOST_SPIRIT_X3_CHAR_DETAIL_CHAR_RANGE_RUN_HPP
#include <boost/spirit/home/x3/char/detail/char_range.hpp>
#include <boost/spirit/home/x3/char/detail/char_range_functions.hpp>
#include <boost/assert.hpp>
#include <algorithm>
#include <vector>
#include <iterator>
#include <ranges>
#include <utility>
namespace boost::spirit::x3::detail
{
// An implementation of a sparse bit (boolean) set. The set uses
// a sorted vector of disjoint ranges. This class implements the
// bare minimum essentials from which the full range of set
// operators can be implemented. The set is constructed from
// ranges. Internally, adjacent or overlapping ranges are
// coalesced.
//
// range_runs are very space-economical in situations where there
// are lots of ranges and a few individual disjoint values.
// Searching is O(log n) where n is the number of ranges.
template <typename CharT>
class char_range_run
{
public:
using range_type = char_range<CharT>;
using storage_type = std::vector<range_type>; // TODO: use default_init_allocator as soon as constexpr placement new is available
[[nodiscard]] static constexpr bool
try_merge(storage_type& run, typename storage_type::iterator iter, range_type const& range)
noexcept(std::is_nothrow_move_assignable_v<CharT>)
{
// *iter intersects with or is adjacent to 'range'?
if (!detail::can_merge(*iter, range)) return false;
// merge range and *iter
detail::merge(*iter, range);
// collapse all subsequent ranges that can merge with *iter:
auto it = std::next(iter);
// 1. skip subsequent ranges completely included in *iter
while (it != run.end() && it->last <= iter->last)
{
++it;
}
// 2. collapse next range if adjacent or overlapping with *iter
if (it != run.end() && it->first-1 <= iter->last)
{
iter->last = it->last;
++it;
}
// erase all ranges that were collapsed
run.erase(std::next(iter), it);
return true;
}
[[nodiscard]] constexpr bool test(CharT val) const noexcept
{
if (run_.empty()) return false;
// search the ranges for one that potentially includes val
auto const iter = std::ranges::upper_bound(
run_, val,
char_range_compare{}
);
// return true if *(iter-1) includes val
return iter != run_.begin() && detail::includes(*std::prev(iter), val);
}
constexpr void swap(char_range_run& other) noexcept
{
run_.swap(other.run_);
}
constexpr void set(range_type const& range)
{
BOOST_ASSERT(detail::is_valid(range));
if (run_.empty())
{
// the vector is empty, insert 'range'
run_.emplace_back(range);
return;
}
// search the ranges for one that potentially includes 'range'
auto iter = std::ranges::upper_bound(run_, range, char_range_compare{});
if (iter != run_.begin())
{
// if *(iter-1) includes 'range', return early
if (detail::includes(*std::prev(iter), range))
{
return;
}
// if *(iter-1) can merge with 'range', merge them and return
if (this->try_merge(run_, std::prev(iter), range))
{
return;
}
}
// if *iter can merge with with 'range', merge them
if (iter == run_.end() || !this->try_merge(run_, iter, range))
{
// no overlap, insert 'range'
run_.insert(iter, range);
}
}
constexpr void clear(range_type const& range)
{
BOOST_ASSERT(detail::is_valid(range));
if (run_.empty()) return;
// search the ranges for one that potentially includes 'range'
auto iter = std::ranges::upper_bound(run_, range, char_range_compare{});
// 'range' starts with or after another range:
if (iter != run_.begin())
{
auto const left_iter = std::prev(iter);
// 'range' starts after '*left_iter':
if (left_iter->first < range.first)
{
// if 'range' is completely included inside '*left_iter':
// need to break it apart into two ranges (punch a hole),
if (left_iter->last > range.last)
{
auto const last_save = left_iter->last;
left_iter->last = range.first-1;
run_.insert(iter, range_type(range.last+1, last_save));
return;
}
// if 'range' contains 'left_iter->last':
// truncate '*left_iter' (clip its right)
else if (left_iter->last >= range.first)
{
left_iter->last = range.first-1;
}
}
// 'range' has the same left bound as '*left_iter': it
// must be removed or truncated by the code below
else
{
iter = left_iter;
}
}
// remove or truncate subsequent ranges that overlap with 'range':
auto it = iter;
// 1. skip subsequent ranges completely included in 'range'
while (it != run_.end() && it->last <= range.last)
{
++it;
}
// 2. clip left of next range if overlapping with 'range'
if (it != run_.end() && it->first <= range.last)
{
it->first = range.last+1;
}
// erase all ranges that 'range' contained
run_.erase(iter, it);
}
constexpr void clear() noexcept
{
run_.clear();
}
private:
storage_type run_;
};
} // boost::spirit::x3::detail
#endif