mirror of
https://github.com/boostorg/quickbook.git
synced 2026-01-20 04:42:30 +00:00
Doesn't support things like custom entities, but should be good enough for the boostbook/docbook that quickbook generates.
222 lines
7.8 KiB
C++
222 lines
7.8 KiB
C++
/*=============================================================================
|
|
Copyright (c) 2002 2004 2006 Joel de Guzman
|
|
Copyright (c) 2004 Eric Niebler
|
|
http://spirit.sourceforge.net/
|
|
|
|
Use, modification and distribution is subject to the Boost Software
|
|
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
|
http://www.boost.org/LICENSE_1_0.txt)
|
|
=============================================================================*/
|
|
#include "utils.hpp"
|
|
|
|
#include <cctype>
|
|
#include <cstring>
|
|
#include <map>
|
|
#include <boost/spirit/include/classic_chset.hpp>
|
|
#include <boost/spirit/include/classic_core.hpp>
|
|
#include <boost/spirit/include/classic_numerics.hpp>
|
|
#include <boost/spirit/include/phoenix1_binders.hpp>
|
|
#include <boost/spirit/include/phoenix1_primitives.hpp>
|
|
|
|
namespace quickbook
|
|
{
|
|
namespace detail
|
|
{
|
|
namespace cl = boost::spirit::classic;
|
|
namespace ph = phoenix;
|
|
|
|
struct xml_decode_grammar : cl::grammar<xml_decode_grammar>
|
|
{
|
|
std::string& result;
|
|
xml_decode_grammar(std::string& result_) : result(result_) {}
|
|
|
|
void append_char(char c) const { result += c; }
|
|
|
|
void append_escaped_char(unsigned int c) const
|
|
{
|
|
if (c < 0x80) {
|
|
result += static_cast<char>(c);
|
|
}
|
|
else if (c < 0x800) {
|
|
char e[] = {static_cast<char>(0xc0 + (c >> 6)),
|
|
static_cast<char>(0x80 + (c & 0x3f)), '\0'};
|
|
result += e;
|
|
}
|
|
else if (c < 0x10000) {
|
|
char e[] = {static_cast<char>(0xe0 + (c >> 12)),
|
|
static_cast<char>(0x80 + ((c >> 6) & 0x3f)),
|
|
static_cast<char>(0x80 + (c & 0x3f)), '\0'};
|
|
result += e;
|
|
}
|
|
else if (c < 0x110000) {
|
|
char e[] = {static_cast<char>(0xf0 + (c >> 18)),
|
|
static_cast<char>(0x80 + ((c >> 12) & 0x3f)),
|
|
static_cast<char>(0x80 + ((c >> 6) & 0x3f)),
|
|
static_cast<char>(0x80 + (c & 0x3f)), '\0'};
|
|
result += e;
|
|
}
|
|
else {
|
|
result += "\xEF\xBF\xBD";
|
|
}
|
|
}
|
|
|
|
template <typename Scanner> struct definition
|
|
{
|
|
definition(xml_decode_grammar const& self)
|
|
{
|
|
// clang-format off
|
|
|
|
auto append_escaped_char = ph::bind(&xml_decode_grammar::append_escaped_char);
|
|
auto append_char = ph::bind(&xml_decode_grammar::append_char);
|
|
auto encoded =
|
|
cl::ch_p('&')
|
|
>> ( "#x"
|
|
>> cl::hex_p [append_escaped_char(self, ph::arg1)]
|
|
>> !cl::ch_p(';')
|
|
| '#'
|
|
>> cl::uint_p [append_escaped_char(self, ph::arg1)]
|
|
>> !cl::ch_p(';')
|
|
| cl::str_p("amp;") [append_char(self, '&')]
|
|
| cl::str_p("apos;") [append_char(self, '\'')]
|
|
| cl::str_p("gt;") [append_char(self, '>')]
|
|
| cl::str_p("lt;") [append_char(self, '<')]
|
|
| cl::str_p("quot;") [append_char(self, '"')]
|
|
);
|
|
auto character = cl::anychar_p [append_char(self, ph::arg1)];
|
|
xml_encoded = *(encoded | character);
|
|
|
|
// clang-format on
|
|
}
|
|
|
|
cl::rule<Scanner> const& start() { return xml_encoded; }
|
|
cl::rule<Scanner> xml_encoded;
|
|
};
|
|
|
|
private:
|
|
xml_decode_grammar& operator=(xml_decode_grammar const&);
|
|
};
|
|
|
|
std::string decode_string(quickbook::string_view str)
|
|
{
|
|
std::string result;
|
|
xml_decode_grammar xml_decode(result);
|
|
boost::spirit::classic::parse(str.begin(), str.end(), xml_decode);
|
|
return result;
|
|
}
|
|
|
|
std::string encode_string(quickbook::string_view str)
|
|
{
|
|
std::string result;
|
|
result.reserve(str.size());
|
|
|
|
for (string_iterator it = str.begin(); it != str.end(); ++it) {
|
|
switch (*it) {
|
|
case '<':
|
|
result += "<";
|
|
break;
|
|
case '>':
|
|
result += ">";
|
|
break;
|
|
case '&':
|
|
result += "&";
|
|
break;
|
|
case '"':
|
|
result += """;
|
|
break;
|
|
default:
|
|
result += *it;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
void print_char(char ch, std::ostream& out)
|
|
{
|
|
switch (ch) {
|
|
case '<':
|
|
out << "<";
|
|
break;
|
|
case '>':
|
|
out << ">";
|
|
break;
|
|
case '&':
|
|
out << "&";
|
|
break;
|
|
case '"':
|
|
out << """;
|
|
break;
|
|
default:
|
|
out << ch;
|
|
break;
|
|
// note ' is not included. see the curse of apos:
|
|
// http://fishbowl.pastiche.org/2003/07/01/the_curse_of_apos
|
|
}
|
|
}
|
|
|
|
void print_string(quickbook::string_view str, std::ostream& out)
|
|
{
|
|
for (string_iterator cur = str.begin(); cur != str.end(); ++cur) {
|
|
print_char(*cur, out);
|
|
}
|
|
}
|
|
|
|
std::string make_identifier(quickbook::string_view text)
|
|
{
|
|
std::string id(text.begin(), text.end());
|
|
for (std::string::iterator i = id.begin(); i != id.end(); ++i) {
|
|
if (!std::isalnum(static_cast<unsigned char>(*i))) {
|
|
*i = '_';
|
|
}
|
|
else {
|
|
*i = static_cast<char>(
|
|
std::tolower(static_cast<unsigned char>(*i)));
|
|
}
|
|
}
|
|
|
|
return id;
|
|
}
|
|
|
|
static std::string escape_uri_impl(
|
|
quickbook::string_view uri_param, char const* mark)
|
|
{
|
|
// Extra capital characters for validating percent escapes.
|
|
static char const hex[] = "0123456789abcdefABCDEF";
|
|
|
|
std::string uri;
|
|
uri.reserve(uri_param.size());
|
|
|
|
for (std::string::size_type n = 0; n < uri_param.size(); ++n) {
|
|
if (static_cast<unsigned char>(uri_param[n]) > 127 ||
|
|
(!std::isalnum(static_cast<unsigned char>(uri_param[n])) &&
|
|
!std::strchr(mark, uri_param[n])) ||
|
|
(uri_param[n] == '%' &&
|
|
!(n + 2 < uri_param.size() &&
|
|
std::strchr(hex, uri_param[n + 1]) &&
|
|
std::strchr(hex, uri_param[n + 2])))) {
|
|
char escape[] = {'%', hex[uri_param[n] / 16],
|
|
hex[uri_param[n] % 16], '\0'};
|
|
uri += escape;
|
|
}
|
|
else {
|
|
uri += uri_param[n];
|
|
}
|
|
}
|
|
|
|
return uri;
|
|
}
|
|
|
|
std::string escape_uri(quickbook::string_view uri_param)
|
|
{
|
|
std::string uri(uri_param.begin(), uri_param.end());
|
|
return escape_uri_impl(uri_param, "-_.!~*'()?\\/");
|
|
}
|
|
|
|
std::string partially_escape_uri(quickbook::string_view uri_param)
|
|
{
|
|
return escape_uri_impl(uri_param, "-_.!~*'()?\\/:&=#%+");
|
|
}
|
|
}
|
|
}
|