2
0
mirror of https://github.com/boostorg/parser.git synced 2026-01-24 06:02:12 +00:00
Files
parser/test/split.cpp
Zach Laine 5adef16a4d Add split_view + split range adaptor. split produces a range of subranges,
each of which is a nonoveralpping match of the given parser.

Fixes #64.
2024-01-19 21:19:53 -06:00

231 lines
7.5 KiB
C++

/**
* Copyright (C) 2024 T. Zachary Laine
*
* Distributed under the Boost Software License, Version 1.0. (See
* accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
#include <boost/parser/split.hpp>
#include <gtest/gtest.h>
namespace bp = boost::parser;
#if BOOST_PARSER_USE_CONCEPTS
namespace deduction {
std::string str;
auto const parser = bp::char_;
auto const skip = bp::ws;
auto deduced_1 = bp::split_view(str, parser, skip, bp::trace::on);
auto deduced_2 = bp::split_view(str, parser, skip);
auto deduced_3 = bp::split_view(str, parser, bp::trace::on);
auto deduced_4 = bp::split_view(str, parser);
}
#endif
TEST(search, split_)
{
{
auto r = bp::split("", bp::lit("XYZ"), bp::ws);
int count = 0;
for (auto subrange : r) {
(void)subrange;
++count;
}
EXPECT_EQ(count, 0);
}
{
char const str[] = "aaXYZb";
auto r = bp::split(str, bp::lit("XYZ"), bp::ws);
int count = 0;
int const offsets[] = {0, 2, 5, 6};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin() - str, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end() - str, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 2);
}
{
char const str[] = "aaXYZbaabaXYZ";
auto r = str | bp::split(bp::lit("XYZ"), bp::ws, bp::trace::off);
int count = 0;
int const offsets[] = {0, 2, 5, 10, 13, 13};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin() - str, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end() - str, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 3);
}
{
char const str[] = "aaXYZbaabaXYZ";
auto r = str | bp::split(bp::lit("XYZ"), bp::trace::off);
int count = 0;
int const offsets[] = {0, 2, 5, 10, 13, 13};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin() - str, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end() - str, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 3);
}
{
char const str[] = "aaXYZbaabaXYZ";
auto r = str | bp::split(bp::lit("XYZ"));
int count = 0;
int const offsets[] = {0, 2, 5, 10, 13, 13};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin() - str, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end() - str, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 3);
}
{
char const str[] = "aaXYZbaabaXYZXYZ";
auto r = str | bp::split(bp::lit("XYZ"));
int count = 0;
int const offsets[] = {0, 2, 5, 10, 13, 13, 16, 16};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin() - str, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end() - str, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 4);
}
{
char const str[] = "XYZaaXYZbaabaXYZXYZ";
auto r = str | bp::split(bp::lit("XYZ"));
int count = 0;
int const offsets[] = {0, 0, 3, 5, 8, 13, 16, 16, 19, 19};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin() - str, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end() - str, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 5);
}
{
char const str[] = "XYZXYZaaXYZbaabaXYZXYZ";
auto r = str | bp::split(bp::lit("XYZ"));
int count = 0;
int const offsets[] = {0, 0, 3, 3, 6, 8, 11, 16, 19, 19, 22, 22};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin() - str, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end() - str, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 6);
}
}
TEST(search, split_unicode)
{
{
char const str_[] = "";
auto str = str_ | bp::as_utf8;
auto r = bp::split(str, bp::lit("XYZ"), bp::ws);
int count = 0;
for (auto subrange : r) {
(void)subrange;
++count;
}
EXPECT_EQ(count, 0);
}
{
char const str_[] = "aaXYZb";
auto str = str_ | bp::as_utf16;
auto r = bp::split(str, bp::lit("XYZ"), bp::ws);
int count = 0;
int const offsets[] = {0, 2, 5, 6};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin().base() - str_, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end().base() - str_, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 2);
}
{
char const str_[] = "aaXYZbaabaXYZ";
auto str = str_ | bp::as_utf32;
auto r = str | bp::split(bp::lit("XYZ"), bp::ws, bp::trace::off);
int count = 0;
int const offsets[] = {0, 2, 5, 10, 13, 13};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin().base() - str_, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end().base() - str_, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 3);
}
{
char const str_[] = "aaXYZbaabaXYZ";
auto str = str_ | bp::as_utf8;
auto r = str | bp::split(bp::lit("XYZ"), bp::trace::off);
int count = 0;
int const offsets[] = {0, 2, 5, 10, 13, 13};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin().base() - str_, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end().base() - str_, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 3);
}
{
char const str_[] = "aaXYZbaabaXYZ";
auto str = str_ | bp::as_utf16;
auto r = str | bp::split(bp::lit("XYZ"));
int count = 0;
int const offsets[] = {0, 2, 5, 10, 13, 13};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin().base() - str_, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end().base() - str_, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 3);
}
{
char const str_[] = "aaXYZbaabaXYZXYZ";
auto str = str_ | bp::as_utf32;
auto r = str | bp::split(bp::lit("XYZ"));
int count = 0;
int const offsets[] = {0, 2, 5, 10, 13, 13, 16, 16};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin().base() - str_, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end().base() - str_, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 4);
}
{
char const str_[] = "XYZaaXYZbaabaXYZXYZ";
auto str = str_ | bp::as_utf8;
auto r = str | bp::split(bp::lit("XYZ"));
int count = 0;
int const offsets[] = {0, 0, 3, 5, 8, 13, 16, 16, 19, 19};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin().base() - str_, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end().base() - str_, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 5);
}
{
char const str_[] = "XYZXYZaaXYZbaabaXYZXYZ";
auto str = str_ | bp::as_utf16;
auto r = str | bp::split(bp::lit("XYZ"));
int count = 0;
int const offsets[] = {0, 0, 3, 3, 6, 8, 11, 16, 19, 19, 22, 22};
for (auto subrange : r) {
EXPECT_EQ(subrange.begin().base() - str_, offsets[count * 2 + 0]);
EXPECT_EQ(subrange.end().base() - str_, offsets[count * 2 + 1]);
++count;
}
EXPECT_EQ(count, 6);
}
}