From 00dadb420329c900c886a7ddb047f355b356e8ff Mon Sep 17 00:00:00 2001 From: Sascha Ochsenknecht Date: Fri, 4 Dec 2009 08:09:43 +0000 Subject: [PATCH] enhance split_unix() to allow unix style splitting of command line string [SVN r58133] --- include/boost/program_options/parsers.hpp | 10 ++- src/split.cpp | 65 ++++++++++---------- test/split_test.cpp | 74 +++++++++++++++++++++-- 3 files changed, 107 insertions(+), 42 deletions(-) diff --git a/include/boost/program_options/parsers.hpp b/include/boost/program_options/parsers.hpp index 3d5692a..460f05e 100644 --- a/include/boost/program_options/parsers.hpp +++ b/include/boost/program_options/parsers.hpp @@ -206,14 +206,18 @@ namespace boost { namespace program_options { can be passed to command_line_parser. The second parameter is used to specify a collection of possible seperator chars used for splitting. The seperator is defaulted to space " ". + Splitting is done in a unix style way, with respect to quotes '"' + and escape characters '\' */ BOOST_PROGRAM_OPTIONS_DECL std::vector - split(const std::string& cmdline, const std::string& sep = " "); - + split_unix(const std::string& cmdline, const std::string& seperator = " ", + const std::string& quote = "\"", const std::string& escape = "\\"); + #ifndef BOOST_NO_STD_WSTRING /** @overload */ BOOST_PROGRAM_OPTIONS_DECL std::vector - split(const std::wstring& cmdline, const std::wstring& sep = L" "); + split_unix(const std::wstring& cmdline, const std::wstring& seperator = L" ", + const std::wstring& quote = L"\"", const std::wstring& escape = L"\\"); #endif #ifdef _WIN32 diff --git a/src/split.cpp b/src/split.cpp index 033fdde..96da068 100644 --- a/src/split.cpp +++ b/src/split.cpp @@ -4,62 +4,59 @@ // or copy at http://www.boost.org/LICENSE_1_0.txt) #define BOOST_PROGRAM_OPTIONS_SOURCE + #include +#include + #include #include namespace boost { namespace program_options { namespace detail { - - template + template< class charT > std::vector > - split(const std::basic_string& cmdline, const std::basic_string& sep) - { - std::vector > result; - if (!cmdline.empty()) - { - std::basic_string sub(cmdline), val; - std::size_t pos; - - while (sub.size() > 0) - { - if ((pos = sub.find_first_of(sep)) != sub.npos) - { - val = sub.substr(0,pos); - sub = sub.substr(pos+1); - } - else - { - val = sub; - sub.erase(); - } - if (!val.empty()) - { - result.push_back(val); - } - } + split_unix( + const std::basic_string& cmdline, + const std::basic_string& seperator, + const std::basic_string& quote, + const std::basic_string& escape) + { + typedef boost::tokenizer< boost::escaped_list_separator, + typename std::basic_string::const_iterator, + std::basic_string > tokenizerT; + + tokenizerT tok(cmdline.begin(), cmdline.end(), + boost::escaped_list_separator< charT >(escape, seperator, quote)); + + std::vector< std::basic_string > result; + for (typename tokenizerT::iterator cur_token(tok.begin()), end_token(tok.end()); cur_token != end_token; ++cur_token) { + if (!cur_token->empty()) + result.push_back(*cur_token); } - return result; + return result; } -}}} +}}} // namespace namespace boost { namespace program_options { // Take a command line string and splits in into tokens, according // to the given collection of seperators chars. BOOST_PROGRAM_OPTIONS_DECL std::vector - split(const std::string& cmdline, const std::string& sep) + split_unix(const std::string& cmdline, const std::string& seperator, + const std::string& quote, const std::string& escape) { - return detail::split(cmdline, sep); + return detail::split_unix< char >(cmdline, seperator, quote, escape); } #ifndef BOOST_NO_STD_WSTRING BOOST_PROGRAM_OPTIONS_DECL std::vector - split(const std::wstring& cmdline, const std::wstring& sep) + split_unix(const std::wstring& cmdline, const std::wstring& seperator, + const std::wstring& quote, const std::wstring& escape) { - return detail::split(cmdline, sep); + return detail::split_unix< wchar_t >(cmdline, seperator, quote, escape); } #endif -}} +}} // namespace + diff --git a/test/split_test.cpp b/test/split_test.cpp index f51d780..46e868d 100644 --- a/test/split_test.cpp +++ b/test/split_test.cpp @@ -26,11 +26,12 @@ void check_value(const string& option, const string& value) void split_whitespace(const options_description& description) { - const char* cmdline = "prg --input input.txt \t --optimization 4 \t\n --opt option"; + const char* cmdline = "prg --input input.txt \r --optimization 4 \t --opt \n option"; - vector< string > tokens = split(cmdline, " \t\n"); + vector< string > tokens = split_unix(cmdline, " \t\n\r"); BOOST_REQUIRE(tokens.size() == 7); + check_value(tokens[0], "prg"); check_value(tokens[1], "--input"); check_value(tokens[2], "input.txt"); @@ -49,8 +50,8 @@ void split_equalsign(const options_description& description) const char* cmdline = "prg --input=input.txt --optimization=4 --opt=option"; - vector< string > tokens = split(cmdline, "= "); - + vector< string > tokens = split_unix(cmdline, "= "); + BOOST_REQUIRE(tokens.size() == 7); check_value(tokens[0], "prg"); check_value(tokens[1], "--input"); @@ -70,7 +71,7 @@ void split_semi(const options_description& description) const char* cmdline = "prg;--input input.txt;--optimization 4;--opt option"; - vector< string > tokens = split(cmdline, "; "); + vector< string > tokens = split_unix(cmdline, "; "); BOOST_REQUIRE(tokens.size() == 7); check_value(tokens[0], "prg"); @@ -86,6 +87,66 @@ void split_semi(const options_description& description) notify(vm); } +void split_quotes(const options_description& description) +{ + const char* cmdline = "prg --input \"input.txt input.txt\" --optimization 4 --opt \"option1 option2\""; + + vector< string > tokens = split_unix(cmdline, " "); + + BOOST_REQUIRE(tokens.size() == 7); + check_value(tokens[0], "prg"); + check_value(tokens[1], "--input"); + check_value(tokens[2], "input.txt input.txt"); + check_value(tokens[3], "--optimization"); + check_value(tokens[4], "4"); + check_value(tokens[5], "--opt"); + check_value(tokens[6], "option1 option2"); + + variables_map vm; + store(command_line_parser(tokens).options(description).run(), vm); + notify(vm); +} + +void split_escape(const options_description& description) +{ + const char* cmdline = "prg --input \\\"input.txt\\\" --optimization 4 --opt \\\"option1\\ option2\\\""; + + vector< string > tokens = split_unix(cmdline, " "); + + BOOST_REQUIRE(tokens.size() == 7); + check_value(tokens[0], "prg"); + check_value(tokens[1], "--input"); + check_value(tokens[2], "\"input.txt\""); + check_value(tokens[3], "--optimization"); + check_value(tokens[4], "4"); + check_value(tokens[5], "--opt"); + check_value(tokens[6], "\"option1 option2\""); + + variables_map vm; + store(command_line_parser(tokens).options(description).run(), vm); + notify(vm); +} + + +void split_single_quote(const options_description& description) +{ + const char* cmdline = "prg --input 'input.txt input.txt' --optimization 4 --opt 'option1 option2'"; + + vector< string > tokens = split_unix(cmdline, " ", "'"); + + BOOST_REQUIRE(tokens.size() == 7); + check_value(tokens[0], "prg"); + check_value(tokens[1], "--input"); + check_value(tokens[2], "input.txt input.txt"); + check_value(tokens[3], "--optimization"); + check_value(tokens[4], "4"); + check_value(tokens[5], "--opt"); + check_value(tokens[6], "option1 option2"); + + variables_map vm; + store(command_line_parser(tokens).options(description).run(), vm); + notify(vm); +} int main(int /*ac*/, char** /*av*/) { @@ -99,6 +160,9 @@ int main(int /*ac*/, char** /*av*/) split_whitespace(desc); split_equalsign(desc); split_semi(desc); + split_quotes(desc); + split_escape(desc); + split_single_quote(desc); return 0; }