mirror of
https://github.com/boostorg/quickbook.git
synced 2026-02-02 09:02:15 +00:00
350 lines
10 KiB
C++
350 lines
10 KiB
C++
/*=============================================================================
|
|
Copyright (c) 2013 Daniel James
|
|
|
|
Use, modification and distribution is subject to the Boost Software
|
|
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
|
http://www.boost.org/LICENSE_1_0.txt)
|
|
=============================================================================*/
|
|
|
|
#include "glob.hpp"
|
|
#include <cassert>
|
|
|
|
namespace quickbook
|
|
{
|
|
typedef string_iterator glob_iterator;
|
|
|
|
void check_glob_range(glob_iterator&, glob_iterator);
|
|
void check_glob_escape(glob_iterator&, glob_iterator);
|
|
|
|
bool match_section(
|
|
glob_iterator& pattern_begin,
|
|
glob_iterator pattern_end,
|
|
glob_iterator& filename_begin,
|
|
glob_iterator& filename_end);
|
|
bool match_range(
|
|
glob_iterator& pattern_begin, glob_iterator pattern_end, char x);
|
|
|
|
// Is pattern a glob or a plain file name?
|
|
// Throws glob_error if pattern is an invalid glob.
|
|
bool check_glob(quickbook::string_view pattern)
|
|
{
|
|
bool is_glob = false;
|
|
bool is_ascii = true;
|
|
|
|
glob_iterator begin = pattern.begin();
|
|
glob_iterator end = pattern.end();
|
|
|
|
while (begin != end) {
|
|
if (*begin < 32 || (*begin & 0x80)) is_ascii = false;
|
|
|
|
switch (*begin) {
|
|
case '\\':
|
|
check_glob_escape(begin, end);
|
|
break;
|
|
|
|
case '[':
|
|
check_glob_range(begin, end);
|
|
is_glob = true;
|
|
break;
|
|
|
|
case ']':
|
|
throw glob_error("uneven square brackets");
|
|
|
|
case '?':
|
|
is_glob = true;
|
|
++begin;
|
|
break;
|
|
|
|
case '*':
|
|
is_glob = true;
|
|
++begin;
|
|
|
|
if (begin != end && *begin == '*') {
|
|
throw glob_error("'**' not supported");
|
|
}
|
|
break;
|
|
|
|
default:
|
|
++begin;
|
|
}
|
|
}
|
|
|
|
if (is_glob && !is_ascii)
|
|
throw glob_error("invalid character, globs are ascii only");
|
|
|
|
return is_glob;
|
|
}
|
|
|
|
void check_glob_range(glob_iterator& begin, glob_iterator end)
|
|
{
|
|
assert(begin != end && *begin == '[');
|
|
++begin;
|
|
|
|
if (*begin == ']') throw glob_error("empty range");
|
|
|
|
while (begin != end) {
|
|
switch (*begin) {
|
|
case '\\':
|
|
++begin;
|
|
|
|
if (begin == end) {
|
|
throw glob_error("trailing escape");
|
|
}
|
|
else if (*begin == '\\' || *begin == '/') {
|
|
throw glob_error("contains escaped slash");
|
|
}
|
|
|
|
++begin;
|
|
break;
|
|
case '[':
|
|
throw glob_error("nested square brackets");
|
|
case ']':
|
|
++begin;
|
|
return;
|
|
case '/':
|
|
throw glob_error("slash in square brackets");
|
|
default:
|
|
++begin;
|
|
}
|
|
}
|
|
|
|
throw glob_error("uneven square brackets");
|
|
}
|
|
|
|
void check_glob_escape(glob_iterator& begin, glob_iterator end)
|
|
{
|
|
assert(begin != end && *begin == '\\');
|
|
|
|
++begin;
|
|
|
|
if (begin == end) {
|
|
throw glob_error("trailing escape");
|
|
}
|
|
else if (*begin == '\\' || *begin == '/') {
|
|
throw glob_error("contains escaped slash");
|
|
}
|
|
|
|
++begin;
|
|
}
|
|
|
|
// Does filename match pattern?
|
|
// Might throw glob_error if pattern is an invalid glob,
|
|
// but should call check_glob first to validate the glob.
|
|
bool glob(
|
|
quickbook::string_view const& pattern,
|
|
quickbook::string_view const& filename)
|
|
{
|
|
// If there wasn't this special case then '*' would match an
|
|
// empty string.
|
|
if (filename.empty()) return pattern.empty();
|
|
|
|
glob_iterator pattern_it = pattern.begin();
|
|
glob_iterator pattern_end = pattern.end();
|
|
|
|
glob_iterator filename_it = filename.begin();
|
|
glob_iterator filename_end = filename.end();
|
|
|
|
if (!match_section(pattern_it, pattern_end, filename_it, filename_end))
|
|
return false;
|
|
|
|
while (pattern_it != pattern_end) {
|
|
assert(*pattern_it == '*');
|
|
++pattern_it;
|
|
|
|
if (pattern_it == pattern_end) return true;
|
|
|
|
if (*pattern_it == '*') {
|
|
throw glob_error("'**' not supported");
|
|
}
|
|
|
|
for (;;) {
|
|
if (filename_it == filename_end) return false;
|
|
if (match_section(
|
|
pattern_it, pattern_end, filename_it, filename_end))
|
|
break;
|
|
++filename_it;
|
|
}
|
|
}
|
|
|
|
return filename_it == filename_end;
|
|
}
|
|
|
|
bool match_section(
|
|
glob_iterator& pattern_begin,
|
|
glob_iterator pattern_end,
|
|
glob_iterator& filename_begin,
|
|
glob_iterator& filename_end)
|
|
{
|
|
glob_iterator pattern_it = pattern_begin;
|
|
glob_iterator filename_it = filename_begin;
|
|
|
|
while (pattern_it != pattern_end && *pattern_it != '*') {
|
|
if (filename_it == filename_end) return false;
|
|
|
|
switch (*pattern_it) {
|
|
case '*':
|
|
assert(false);
|
|
throw new glob_error("Internal error");
|
|
case '[':
|
|
if (!match_range(pattern_it, pattern_end, *filename_it))
|
|
return false;
|
|
++filename_it;
|
|
break;
|
|
case ']':
|
|
throw glob_error("uneven square brackets");
|
|
case '?':
|
|
++pattern_it;
|
|
++filename_it;
|
|
break;
|
|
case '\\':
|
|
++pattern_it;
|
|
if (pattern_it == pattern_end) {
|
|
throw glob_error("trailing escape");
|
|
}
|
|
else if (*pattern_it == '\\' || *pattern_it == '/') {
|
|
throw glob_error("contains escaped slash");
|
|
}
|
|
BOOST_FALLTHROUGH;
|
|
default:
|
|
if (*pattern_it != *filename_it) return false;
|
|
++pattern_it;
|
|
++filename_it;
|
|
}
|
|
}
|
|
|
|
if (pattern_it == pattern_end && filename_it != filename_end)
|
|
return false;
|
|
|
|
pattern_begin = pattern_it;
|
|
filename_begin = filename_it;
|
|
return true;
|
|
}
|
|
|
|
bool match_range(
|
|
glob_iterator& pattern_begin, glob_iterator pattern_end, char x)
|
|
{
|
|
assert(pattern_begin != pattern_end && *pattern_begin == '[');
|
|
++pattern_begin;
|
|
if (pattern_begin == pattern_end) {
|
|
throw glob_error("uneven square brackets");
|
|
}
|
|
|
|
bool invert_match = false;
|
|
bool matched = false;
|
|
|
|
if (*pattern_begin == '^') {
|
|
invert_match = true;
|
|
++pattern_begin;
|
|
if (pattern_begin == pattern_end) {
|
|
throw glob_error("uneven square brackets");
|
|
}
|
|
}
|
|
else if (*pattern_begin == ']') {
|
|
throw glob_error("empty range");
|
|
}
|
|
|
|
// Search for a match
|
|
for (;;) {
|
|
unsigned char first = *pattern_begin;
|
|
++pattern_begin;
|
|
if (first == ']') break;
|
|
if (first == '[') {
|
|
throw glob_error("nested square brackets");
|
|
}
|
|
if (pattern_begin == pattern_end) {
|
|
throw glob_error("uneven square brackets");
|
|
}
|
|
|
|
if (first == '\\') {
|
|
first = *pattern_begin;
|
|
if (first == '\\' || first == '/') {
|
|
throw glob_error("contains escaped slash");
|
|
}
|
|
++pattern_begin;
|
|
if (pattern_begin == pattern_end) {
|
|
throw glob_error("uneven square brackets");
|
|
}
|
|
}
|
|
else if (first == '/') {
|
|
throw glob_error("slash in square brackets");
|
|
}
|
|
|
|
if (*pattern_begin != '-') {
|
|
matched = matched || (first == x);
|
|
}
|
|
else {
|
|
++pattern_begin;
|
|
if (pattern_begin == pattern_end) {
|
|
throw glob_error("uneven square brackets");
|
|
}
|
|
|
|
unsigned char second = *pattern_begin;
|
|
++pattern_begin;
|
|
if (second == ']') {
|
|
matched = matched || (first == x) || (x == '-');
|
|
break;
|
|
}
|
|
if (pattern_begin == pattern_end) {
|
|
throw glob_error("uneven square brackets");
|
|
}
|
|
|
|
if (second == '\\') {
|
|
second = *pattern_begin;
|
|
if (second == '\\' || second == '/') {
|
|
throw glob_error("contains escaped slash");
|
|
}
|
|
++pattern_begin;
|
|
if (pattern_begin == pattern_end) {
|
|
throw glob_error("uneven square brackets");
|
|
}
|
|
}
|
|
else if (second == '/') {
|
|
throw glob_error("slash in square brackets");
|
|
}
|
|
|
|
matched = matched || (first <= x && x <= second);
|
|
}
|
|
}
|
|
|
|
return invert_match != matched;
|
|
}
|
|
|
|
std::size_t find_glob_char(quickbook::string_view pattern, std::size_t pos)
|
|
{
|
|
// Weird style is because quickbook::string_view's find_first_of
|
|
// doesn't take a position argument.
|
|
std::size_t removed = 0;
|
|
|
|
for (;;) {
|
|
pos = pattern.find_first_of("[]?*\\");
|
|
if (pos == quickbook::string_view::npos) return pos;
|
|
if (pattern[pos] != '\\') return pos + removed;
|
|
pattern.remove_prefix(pos + 2);
|
|
removed += pos + 2;
|
|
}
|
|
}
|
|
|
|
std::string glob_unescape(quickbook::string_view pattern)
|
|
{
|
|
std::string result;
|
|
|
|
for (;;) {
|
|
std::size_t pos = pattern.find("\\");
|
|
if (pos == quickbook::string_view::npos) {
|
|
result.append(pattern.data(), pattern.size());
|
|
break;
|
|
}
|
|
|
|
result.append(pattern.data(), pos);
|
|
++pos;
|
|
if (pos < pattern.size()) {
|
|
result += pattern[pos];
|
|
++pos;
|
|
}
|
|
pattern.remove_prefix(pos);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}
|