mirror of
https://github.com/boostorg/spirit.git
synced 2026-01-19 04:42:11 +00:00
Cease dependence on Boost.Regex
`boost/spirit/include/classic_regex.hpp` is an optional header
This commit is contained in:
@@ -33,7 +33,6 @@ target_link_libraries(boost_spirit
|
|||||||
Boost::preprocessor
|
Boost::preprocessor
|
||||||
Boost::proto
|
Boost::proto
|
||||||
Boost::range
|
Boost::range
|
||||||
Boost::regex
|
|
||||||
Boost::smart_ptr
|
Boost::smart_ptr
|
||||||
Boost::static_assert
|
Boost::static_assert
|
||||||
Boost::thread
|
Boost::thread
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
/*=============================================================================
|
/*=============================================================================
|
||||||
Copyright (c) 2001-2011 Joel de Guzman
|
Copyright (c) 2001-2011 Joel de Guzman
|
||||||
|
Copyright (c) 2023 Nikita Kniazev
|
||||||
|
|
||||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
@@ -11,10 +12,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <boost/config.hpp>
|
||||||
#include <boost/cstdint.hpp>
|
#include <boost/cstdint.hpp>
|
||||||
#include <boost/regex/pending/unicode_iterator.hpp>
|
|
||||||
#include <boost/type_traits/make_unsigned.hpp>
|
#include <boost/type_traits/make_unsigned.hpp>
|
||||||
#include <iterator>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace boost { namespace spirit
|
namespace boost { namespace spirit
|
||||||
@@ -24,48 +24,63 @@ namespace boost { namespace spirit
|
|||||||
typedef std::basic_string<ucs4_char> ucs4_string;
|
typedef std::basic_string<ucs4_char> ucs4_string;
|
||||||
typedef std::basic_string<utf8_char> utf8_string;
|
typedef std::basic_string<utf8_char> utf8_string;
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
inline void utf8_put_encode(utf8_string& out, ucs4_char x)
|
||||||
|
{
|
||||||
|
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf D90
|
||||||
|
if (BOOST_UNLIKELY(x > 0x10FFFFul || (0xD7FFul < x && x < 0xE000ul)))
|
||||||
|
x = 0xFFFDul;
|
||||||
|
|
||||||
|
// Table 3-6. UTF-8 Bit Distribution
|
||||||
|
if (x < 0x80ul) {
|
||||||
|
out.push_back(static_cast<unsigned char>(x));
|
||||||
|
}
|
||||||
|
else if (x < 0x800ul) {
|
||||||
|
out.push_back(static_cast<unsigned char>(0xC0ul + (x >> 6)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
|
||||||
|
}
|
||||||
|
else if (x < 0x10000ul) {
|
||||||
|
out.push_back(static_cast<unsigned char>(0xE0ul + (x >> 12)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
out.push_back(static_cast<unsigned char>(0xF0ul + (x >> 18)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 12) & 0x3Ful)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Char>
|
template <typename Char>
|
||||||
inline utf8_string to_utf8(Char value)
|
inline utf8_string to_utf8(Char value)
|
||||||
{
|
{
|
||||||
// always store as UTF8
|
|
||||||
utf8_string result;
|
utf8_string result;
|
||||||
typedef std::back_insert_iterator<utf8_string> insert_iter;
|
|
||||||
insert_iter out_iter(result);
|
|
||||||
utf8_output_iterator<insert_iter> utf8_iter(out_iter);
|
|
||||||
typedef typename make_unsigned<Char>::type UChar;
|
typedef typename make_unsigned<Char>::type UChar;
|
||||||
*utf8_iter = (UChar)value;
|
detail::utf8_put_encode(result, static_cast<UChar>(value));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Char>
|
template <typename Char>
|
||||||
inline utf8_string to_utf8(Char const* str)
|
inline utf8_string to_utf8(Char const* str)
|
||||||
{
|
{
|
||||||
// always store as UTF8
|
|
||||||
utf8_string result;
|
utf8_string result;
|
||||||
typedef std::back_insert_iterator<utf8_string> insert_iter;
|
|
||||||
insert_iter out_iter(result);
|
|
||||||
utf8_output_iterator<insert_iter> utf8_iter(out_iter);
|
|
||||||
typedef typename make_unsigned<Char>::type UChar;
|
typedef typename make_unsigned<Char>::type UChar;
|
||||||
while (*str)
|
while (*str)
|
||||||
*utf8_iter++ = (UChar)*str++;
|
detail::utf8_put_encode(result, static_cast<UChar>(*str++));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Char, typename Traits, typename Allocator>
|
template <typename Char, typename Traits, typename Allocator>
|
||||||
inline utf8_string
|
inline utf8_string
|
||||||
to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
|
to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
|
||||||
{
|
{
|
||||||
// always store as UTF8
|
|
||||||
utf8_string result;
|
utf8_string result;
|
||||||
typedef std::back_insert_iterator<utf8_string> insert_iter;
|
|
||||||
insert_iter out_iter(result);
|
|
||||||
utf8_output_iterator<insert_iter> utf8_iter(out_iter);
|
|
||||||
typedef typename make_unsigned<Char>::type UChar;
|
typedef typename make_unsigned<Char>::type UChar;
|
||||||
for (Char const* ptr = str.data(),
|
for (Char const* ptr = str.data(),
|
||||||
* end = ptr + str.size(); ptr < end; ++ptr)
|
* end = ptr + str.size(); ptr < end; ++ptr)
|
||||||
{
|
detail::utf8_put_encode(result, static_cast<UChar>(*ptr));
|
||||||
*utf8_iter++ = (UChar)*ptr;
|
|
||||||
}
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,28 +89,37 @@ namespace boost { namespace spirit
|
|||||||
inline utf8_string to_utf8(wchar_t value)
|
inline utf8_string to_utf8(wchar_t value)
|
||||||
{
|
{
|
||||||
utf8_string result;
|
utf8_string result;
|
||||||
typedef std::back_insert_iterator<utf8_string> insert_iter;
|
detail::utf8_put_encode(result, static_cast<make_unsigned<wchar_t>::type>(value));
|
||||||
insert_iter out_iter(result);
|
|
||||||
utf8_output_iterator<insert_iter> utf8_iter(out_iter);
|
|
||||||
|
|
||||||
u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(&value);
|
|
||||||
*utf8_iter++ = *ucs4_iter;
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
inline ucs4_char decode_utf16(wchar_t const*& s)
|
||||||
|
{
|
||||||
|
typedef make_unsigned<wchar_t>::type uwchar_t;
|
||||||
|
|
||||||
|
uwchar_t x(*s);
|
||||||
|
if (x < 0xD800ul || x > 0xDFFFul)
|
||||||
|
return x;
|
||||||
|
|
||||||
|
// expected high-surrogate
|
||||||
|
if (BOOST_UNLIKELY((x >> 10) != 0x36ul))
|
||||||
|
return 0xFFFDul;
|
||||||
|
|
||||||
|
uwchar_t y(*++s);
|
||||||
|
// expected low-surrogate
|
||||||
|
if (BOOST_UNLIKELY((y >> 10) != 0x37ul))
|
||||||
|
return 0xFFFDul;
|
||||||
|
|
||||||
|
return ((x & 0x3FFul) << 10) + (y & 0x3FFul) + 0x10000ul;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline utf8_string to_utf8(wchar_t const* str)
|
inline utf8_string to_utf8(wchar_t const* str)
|
||||||
{
|
{
|
||||||
utf8_string result;
|
utf8_string result;
|
||||||
typedef std::back_insert_iterator<utf8_string> insert_iter;
|
for (ucs4_char c; (c = detail::decode_utf16(str)) != ucs4_char(); ++str)
|
||||||
insert_iter out_iter(result);
|
detail::utf8_put_encode(result, c);
|
||||||
utf8_output_iterator<insert_iter> utf8_iter(out_iter);
|
|
||||||
|
|
||||||
u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(str);
|
|
||||||
for (ucs4_char c; (c = *ucs4_iter) != ucs4_char(); ++ucs4_iter) {
|
|
||||||
*utf8_iter++ = c;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
/*=============================================================================
|
/*=============================================================================
|
||||||
Copyright (c) 2001-2014 Joel de Guzman
|
Copyright (c) 2001-2014 Joel de Guzman
|
||||||
|
Copyright (c) 2023 Nikita Kniazev
|
||||||
|
|
||||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
@@ -7,43 +8,62 @@
|
|||||||
#if !defined(BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM)
|
#if !defined(BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM)
|
||||||
#define BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM
|
#define BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM
|
||||||
|
|
||||||
#include <boost/cstdint.hpp>
|
#include <boost/config.hpp>
|
||||||
#include <boost/regex/pending/unicode_iterator.hpp>
|
#include <type_traits>
|
||||||
#include <boost/type_traits/make_unsigned.hpp>
|
|
||||||
#include <iterator>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace boost { namespace spirit { namespace x3
|
namespace boost { namespace spirit { namespace x3
|
||||||
{
|
{
|
||||||
typedef ::boost::uint32_t ucs4_char;
|
typedef char32_t ucs4_char;
|
||||||
typedef char utf8_char;
|
typedef char utf8_char;
|
||||||
typedef std::basic_string<ucs4_char> ucs4_string;
|
typedef std::basic_string<ucs4_char> ucs4_string;
|
||||||
typedef std::basic_string<utf8_char> utf8_string;
|
typedef std::basic_string<utf8_char> utf8_string;
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
inline void utf8_put_encode(utf8_string& out, ucs4_char x)
|
||||||
|
{
|
||||||
|
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf D90
|
||||||
|
if (BOOST_UNLIKELY(x > 0x10FFFFul || (0xD7FFul < x && x < 0xE000ul)))
|
||||||
|
x = 0xFFFDul;
|
||||||
|
|
||||||
|
// Table 3-6. UTF-8 Bit Distribution
|
||||||
|
if (x < 0x80ul) {
|
||||||
|
out.push_back(static_cast<unsigned char>(x));
|
||||||
|
}
|
||||||
|
else if (x < 0x800ul) {
|
||||||
|
out.push_back(static_cast<unsigned char>(0xC0ul + (x >> 6)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
|
||||||
|
}
|
||||||
|
else if (x < 0x10000ul) {
|
||||||
|
out.push_back(static_cast<unsigned char>(0xE0ul + (x >> 12)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
out.push_back(static_cast<unsigned char>(0xF0ul + (x >> 18)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 12) & 0x3Ful)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Char>
|
template <typename Char>
|
||||||
inline utf8_string to_utf8(Char value)
|
inline utf8_string to_utf8(Char value)
|
||||||
{
|
{
|
||||||
// always store as UTF8
|
|
||||||
utf8_string result;
|
utf8_string result;
|
||||||
typedef std::back_insert_iterator<utf8_string> insert_iter;
|
typedef typename std::make_unsigned<Char>::type UChar;
|
||||||
insert_iter out_iter(result);
|
detail::utf8_put_encode(result, static_cast<UChar>(value));
|
||||||
utf8_output_iterator<insert_iter> utf8_iter(out_iter);
|
|
||||||
typedef typename make_unsigned<Char>::type UChar;
|
|
||||||
*utf8_iter = (UChar)value;
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Char>
|
template <typename Char>
|
||||||
inline utf8_string to_utf8(Char const* str)
|
inline utf8_string to_utf8(Char const* str)
|
||||||
{
|
{
|
||||||
// always store as UTF8
|
|
||||||
utf8_string result;
|
utf8_string result;
|
||||||
typedef std::back_insert_iterator<utf8_string> insert_iter;
|
typedef typename std::make_unsigned<Char>::type UChar;
|
||||||
insert_iter out_iter(result);
|
|
||||||
utf8_output_iterator<insert_iter> utf8_iter(out_iter);
|
|
||||||
typedef typename make_unsigned<Char>::type UChar;
|
|
||||||
while (*str)
|
while (*str)
|
||||||
*utf8_iter++ = (UChar)*str++;
|
detail::utf8_put_encode(result, static_cast<UChar>(*str++));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -51,16 +71,10 @@ namespace boost { namespace spirit { namespace x3
|
|||||||
inline utf8_string
|
inline utf8_string
|
||||||
to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
|
to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
|
||||||
{
|
{
|
||||||
// always store as UTF8
|
|
||||||
utf8_string result;
|
utf8_string result;
|
||||||
typedef std::back_insert_iterator<utf8_string> insert_iter;
|
typedef typename std::make_unsigned<Char>::type UChar;
|
||||||
insert_iter out_iter(result);
|
|
||||||
utf8_output_iterator<insert_iter> utf8_iter(out_iter);
|
|
||||||
typedef typename make_unsigned<Char>::type UChar;
|
|
||||||
for (Char ch : str)
|
for (Char ch : str)
|
||||||
{
|
detail::utf8_put_encode(result, static_cast<UChar>(ch));
|
||||||
*utf8_iter++ = (UChar)ch;
|
|
||||||
}
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -69,28 +83,37 @@ namespace boost { namespace spirit { namespace x3
|
|||||||
inline utf8_string to_utf8(wchar_t value)
|
inline utf8_string to_utf8(wchar_t value)
|
||||||
{
|
{
|
||||||
utf8_string result;
|
utf8_string result;
|
||||||
typedef std::back_insert_iterator<utf8_string> insert_iter;
|
detail::utf8_put_encode(result, static_cast<std::make_unsigned<wchar_t>::type>(value));
|
||||||
insert_iter out_iter(result);
|
|
||||||
utf8_output_iterator<insert_iter> utf8_iter(out_iter);
|
|
||||||
|
|
||||||
u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(&value);
|
|
||||||
*utf8_iter++ = *ucs4_iter;
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
inline ucs4_char decode_utf16(wchar_t const*& s)
|
||||||
|
{
|
||||||
|
typedef std::make_unsigned<wchar_t>::type uwchar_t;
|
||||||
|
|
||||||
|
uwchar_t x(*s);
|
||||||
|
if (x < 0xD800ul || x > 0xDFFFul)
|
||||||
|
return x;
|
||||||
|
|
||||||
|
// expected high-surrogate
|
||||||
|
if (BOOST_UNLIKELY((x >> 10) != 0b110110ul))
|
||||||
|
return 0xFFFDul;
|
||||||
|
|
||||||
|
uwchar_t y(*++s);
|
||||||
|
// expected low-surrogate
|
||||||
|
if (BOOST_UNLIKELY((y >> 10) != 0b110111ul))
|
||||||
|
return 0xFFFDul;
|
||||||
|
|
||||||
|
return ((x & 0x3FFul) << 10) + (y & 0x3FFul) + 0x10000ul;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline utf8_string to_utf8(wchar_t const* str)
|
inline utf8_string to_utf8(wchar_t const* str)
|
||||||
{
|
{
|
||||||
utf8_string result;
|
utf8_string result;
|
||||||
typedef std::back_insert_iterator<utf8_string> insert_iter;
|
for (ucs4_char c; (c = detail::decode_utf16(str)) != ucs4_char(); ++str)
|
||||||
insert_iter out_iter(result);
|
detail::utf8_put_encode(result, c);
|
||||||
utf8_output_iterator<insert_iter> utf8_iter(out_iter);
|
|
||||||
|
|
||||||
u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(str);
|
|
||||||
for (ucs4_char c; (c = *ucs4_iter) != ucs4_char(); ++ucs4_iter) {
|
|
||||||
*utf8_iter++ = c;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user