diff --git a/include/boost/uuid/detail/basic_name_generator.hpp b/include/boost/uuid/detail/basic_name_generator.hpp index a01da02..eccaae5 100644 --- a/include/boost/uuid/detail/basic_name_generator.hpp +++ b/include/boost/uuid/detail/basic_name_generator.hpp @@ -29,31 +29,24 @@ private: uuid namespace_uuid_; +private: + + using digest_type = typename HashAlgo::digest_type; + public: using result_type = uuid; - using digest_type = typename HashAlgo::digest_type; explicit basic_name_generator( uuid const& namespace_uuid ) noexcept : namespace_uuid_( namespace_uuid ) {} - uuid operator()( char const* name ) const noexcept + template uuid operator()( Ch const* name ) const noexcept { HashAlgo hash; hash.process_bytes( namespace_uuid_.begin(), namespace_uuid_.size() ); - process_characters( hash, name, std::strlen( name ) ); - - return hash_to_uuid( hash ); - } - - uuid operator()( wchar_t const* name ) const noexcept - { - HashAlgo hash; - - hash.process_bytes( namespace_uuid_.begin(), namespace_uuid_.size() ); - process_characters( hash, name, std::wcslen( name ) ); + process_characters( hash, name, std::char_traits().length( name ) ); return hash_to_uuid( hash ); } @@ -80,32 +73,120 @@ public: } private: - // we convert all characters to uint32_t so that each - // character is 4 bytes regardless of sizeof(char) or - // sizeof(wchar_t). We want the name string on any - // platform / compiler to generate the same uuid - // except for char - template - void process_characters( HashAlgo& hash, Ch const* characters, std::size_t count ) const noexcept + + void process_characters( HashAlgo& hash, char const* p, std::size_t n ) const noexcept { - BOOST_UUID_STATIC_ASSERT( sizeof(std::uint32_t) >= sizeof(Ch) ); + hash.process_bytes( p, n ); + } - for( std::size_t i = 0; i < count; ++i) + // For portability, we convert all wide characters to uint32_t so that each + // character is 4 bytes regardless of sizeof(wchar_t). + + void process_characters( HashAlgo& hash, wchar_t const* p, std::size_t n ) const noexcept + { + BOOST_UUID_STATIC_ASSERT( sizeof( std::uint32_t ) >= sizeof( wchar_t ) ); + + for( std::size_t i = 0; i < n; ++i) { - std::size_t c = characters[ i ]; + std::uint32_t ch = p[ i ]; - hash.process_byte( static_cast( (c >> 0) & 0xFF ) ); - hash.process_byte( static_cast( (c >> 8) & 0xFF ) ); - hash.process_byte( static_cast( (c >> 16) & 0xFF ) ); - hash.process_byte( static_cast( (c >> 24) & 0xFF ) ); + unsigned char bytes[ 4 ] = + { + static_cast( ( ch >> 0 ) & 0xFF ), + static_cast( ( ch >> 8 ) & 0xFF ), + static_cast( ( ch >> 16 ) & 0xFF ), + static_cast( ( ch >> 24 ) & 0xFF ) + }; + + hash.process_bytes( bytes, 4 ); } } - void process_characters( HashAlgo& hash, char const* characters, std::size_t count ) const noexcept + void process_characters( HashAlgo& hash, char32_t const* p, std::size_t n ) const noexcept { - hash.process_bytes( characters, count ); + for( std::size_t i = 0; i < n; ++i) + { + process_utf32_codepoint( hash, p[ i ] ); + } } + void process_characters( HashAlgo& hash, char16_t const* p, std::size_t n ) const noexcept + { + for( std::size_t i = 0; i < n; ++i) + { + char16_t ch = p[ i ]; + + if( ch >= 0xD800 && ch <= 0xDBFF && i + 1 < n && p[ i+1 ] >= 0xDC00 && p[ i+1 ] <= 0xDFFF ) + { + char16_t ch2 = p[ ++i ]; + + std::uint32_t high = ch - 0xD800; + std::uint32_t low = ch2 - 0xDC00; + + process_utf32_codepoint( hash, ( high << 10 ) + low + 0x10000 ); + } + else + { + process_utf32_codepoint( hash, ch ); + } + } + } + + void process_utf32_codepoint( HashAlgo& hash, std::uint32_t cp ) const noexcept + { + if( ( cp >= 0xD800 && cp <= 0xDFFF ) || cp > 0x10FFFF ) + { + cp = 0xFFFD; // Unicode replacement character + } + + if( cp < 0x80 ) + { + hash.process_byte( static_cast( cp ) ); + } + else if( cp < 0x800 ) + { + unsigned char bytes[ 2 ] = + { + static_cast( 0xC0 | (cp >> 6) ), + static_cast( 0x80 | (cp & 0x3F) ) + }; + + hash.process_bytes( bytes, 2 ); + } + else if( cp < 0x10000 ) + { + unsigned char bytes[ 3 ] = + { + static_cast( 0xE0 | (cp >> 12) ), + static_cast( 0x80 | ((cp >> 6) & 0x3F) ), + static_cast( 0x80 | (cp & 0x3F) ) + }; + + hash.process_bytes( bytes, 3 ); + } + else + { + unsigned char bytes[ 4 ] = + { + static_cast( 0xF0 | ( cp >> 18 ) ), + static_cast( 0x80 | ((cp >> 12 ) & 0x3F ) ), + static_cast( 0x80 | ((cp >> 6 ) & 0x3F ) ), + static_cast( 0x80 | (cp & 0x3F) ) + }; + + hash.process_bytes( bytes, 4 ); + } + } + +#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L + + void process_characters( HashAlgo& hash, char8_t const* p, std::size_t n ) const noexcept + { + hash.process_bytes( p, n ); + } + +#endif + uuid hash_to_uuid( HashAlgo& hash ) const noexcept { digest_type digest; diff --git a/test/test_name_generator_md5.cpp b/test/test_name_generator_md5.cpp index 2e53919..0008b7c 100644 --- a/test/test_name_generator_md5.cpp +++ b/test/test_name_generator_md5.cpp @@ -7,67 +7,70 @@ #include #include #include +#include + +using namespace boost::uuids; + +void test( uuid const& nmsp, char const* name, char const* expected ) +{ + name_generator_md5 gen( nmsp ); + + uuid u0 = string_generator()( expected ); + + uuid u1 = gen( name ); + BOOST_TEST_EQ( u1, u0 ); + + uuid u2 = gen( std::string( name ) ); + BOOST_TEST_EQ( u2, u0 ); + + uuid u3 = gen( name, std::strlen( name ) ); + BOOST_TEST_EQ( u3, u0 ); +} + +template void test( uuid const& nmsp, Ch const* name, char const* expected ) +{ + name_generator_md5 gen( nmsp ); + + uuid u0 = string_generator()( expected ); + + uuid u1 = gen( name ); + BOOST_TEST_EQ( u1, u0 ); + + uuid u2 = gen( std::basic_string( name ) ); + BOOST_TEST_EQ( u2, u0 ); +} int main() { - using namespace boost::uuids; - // RFC 4122 Appendix B + Errata 1352 - { - name_generator_md5 gen( ns::dns() ); - - char const* name = "www.widgets.com"; - - uuid u0 = string_generator()( "3d813cbb-47fb-32ba-91df-831e1593ac29" ); - - uuid u1 = gen( name ); - BOOST_TEST_EQ( u1, u0 ); - - uuid u2 = gen( std::string( name ) ); - BOOST_TEST_EQ( u2, u0 ); - - uuid u3 = gen( name, std::strlen( name ) ); - BOOST_TEST_EQ( u3, u0 ); - } + test( ns::dns(), "www.widgets.com", "3d813cbb-47fb-32ba-91df-831e1593ac29" ); + test( ns::dns(), u"www.widgets.com", "3d813cbb-47fb-32ba-91df-831e1593ac29" ); + test( ns::dns(), U"www.widgets.com", "3d813cbb-47fb-32ba-91df-831e1593ac29" ); + test( ns::dns(), u8"www.widgets.com", "3d813cbb-47fb-32ba-91df-831e1593ac29" ); // RFC 4122bis Section A.2 - { - name_generator_md5 gen( ns::dns() ); - - char const* name = "www.example.com"; - - uuid u0 = string_generator()( "5df41881-3aed-3515-88a7-2f4a814cf09e" ); - - uuid u1 = gen( name ); - BOOST_TEST_EQ( u1, u0 ); - - uuid u2 = gen( std::string( name ) ); - BOOST_TEST_EQ( u2, u0 ); - - uuid u3 = gen( name, std::strlen( name ) ); - BOOST_TEST_EQ( u3, u0 ); - } + test( ns::dns(), "www.example.com", "5df41881-3aed-3515-88a7-2f4a814cf09e" ); + test( ns::dns(), u"www.example.com", "5df41881-3aed-3515-88a7-2f4a814cf09e" ); + test( ns::dns(), U"www.example.com", "5df41881-3aed-3515-88a7-2f4a814cf09e" ); + test( ns::dns(), u8"www.example.com", "5df41881-3aed-3515-88a7-2f4a814cf09e" ); // https://uuid.ramsey.dev/en/stable/rfc4122/version3.html - { - name_generator_md5 gen( ns::url() ); + test( ns::url(), "https://www.php.net", "3f703955-aaba-3e70-a3cb-baff6aa3b28f" ); + test( ns::url(), u"https://www.php.net", "3f703955-aaba-3e70-a3cb-baff6aa3b28f" ); + test( ns::url(), U"https://www.php.net", "3f703955-aaba-3e70-a3cb-baff6aa3b28f" ); + test( ns::url(), u8"https://www.php.net", "3f703955-aaba-3e70-a3cb-baff6aa3b28f" ); - char const* name = "https://www.php.net"; + // test case from test_name_generator.cpp - uuid u0 = string_generator()( "3f703955-aaba-3e70-a3cb-baff6aa3b28f" ); + test( ns::url(), "www.widgets.com", "06205cec-255b-300e-a8bc-a8605ab8244e" ); - uuid u1 = gen( name ); - BOOST_TEST_EQ( u1, u0 ); + // examples from documentation - uuid u2 = gen( std::string( name ) ); - BOOST_TEST_EQ( u2, u0 ); - - uuid u3 = gen( name, std::strlen( name ) ); - BOOST_TEST_EQ( u3, u0 ); - } + test( ns::dns(), "boost.org", "888eca9c-e655-31a2-a46b-a2a821f6b150" ); + test( ns::dns(), L"boost.org", "48149232-8cda-361b-b355-0bdb71d2cab3" ); // test wide strings @@ -85,5 +88,38 @@ int main() BOOST_TEST_EQ( u1, u2 ); } + // test unicode strings + + { + uuid nmsp = string_generator()( "70a4abc5-80ab-4176-8e11-bc5836b6fef9" ); + + name_generator_md5 gen( nmsp ); + + char32_t const name32[] = { 0x0024, 0x00A3, 0x0418, 0x0939, 0x20AC, 0xD55C, 0xDC12, 0xD834, 0x10348, 0x1096B3, 0xD956, 0 }; + char16_t const name16[] = { 0x0024, 0x00A3, 0x0418, 0x0939, 0x20AC, 0xD55C, 0xDC12, 0xD834, 0xD800, 0xDF48, 0xDBE5, 0xDEB3, 0xD956, 0 }; + + unsigned char name8[] = + { + /*U+0024*/ 0x24, + /*U+00A3*/ 0xC2, 0xA3, + /*U+0418*/ 0xD0, 0x98, + /*U+0939*/ 0xE0, 0xA4, 0xB9, + /*U+20AC*/ 0xE2, 0x82, 0xAC, + /*U+D55C*/ 0xED, 0x95, 0x9C, + /*U+DC12*/ 0xEF, 0xBF, 0xBD, // U+FFFD + /*U+D834*/ 0xEF, 0xBF, 0xBD, // U+FFFD + /*U+10348*/ 0xF0, 0x90, 0x8D, 0x88, + /*U+1096B3*/ 0xF4, 0x89, 0x9A, 0xB3, + /*U+D956*/ 0xEF, 0xBF, 0xBD, // U+FFFD + }; + + uuid u1 = gen( name32 ); + uuid u2 = gen( name16 ); + uuid u3 = gen( name8, sizeof( name8 ) ); + + BOOST_TEST_EQ( u1, u3 ); + BOOST_TEST_EQ( u2, u3 ); + } + return boost::report_errors(); } diff --git a/test/test_name_generator_sha1.cpp b/test/test_name_generator_sha1.cpp index a9baf1b..90e568d 100644 --- a/test/test_name_generator_sha1.cpp +++ b/test/test_name_generator_sha1.cpp @@ -8,67 +8,70 @@ #include #include +using namespace boost::uuids; + +void test( uuid const& nmsp, char const* name, char const* expected ) +{ + name_generator_sha1 gen( nmsp ); + + uuid u0 = string_generator()( expected ); + + uuid u1 = gen( name ); + BOOST_TEST_EQ( u1, u0 ); + + uuid u2 = gen( std::string( name ) ); + BOOST_TEST_EQ( u2, u0 ); + + uuid u3 = gen( name, std::strlen( name ) ); + BOOST_TEST_EQ( u3, u0 ); +} + +template void test( uuid const& nmsp, Ch const* name, char const* expected ) +{ + name_generator_sha1 gen( nmsp ); + + uuid u0 = string_generator()( expected ); + + uuid u1 = gen( name ); + BOOST_TEST_EQ( u1, u0 ); + + uuid u2 = gen( std::basic_string( name ) ); + BOOST_TEST_EQ( u2, u0 ); +} + int main() { - using namespace boost::uuids; - // https://stackoverflow.com/questions/5515880/test-vectors-for-uuid-version-5-converting-hash-into-guid-generation-algorith // https://de.wikipedia.org/wiki/Universally_Unique_Identifier#Namensbasierte_UUIDs_.28Version_3_und_5.29 - { - name_generator_sha1 gen( ns::dns() ); - - char const* name = "www.example.org"; - - uuid u0 = string_generator()( "74738ff5-5367-5958-9aee-98fffdcd1876" ); - - uuid u1 = gen( name ); - BOOST_TEST_EQ( u1, u0 ); - - uuid u2 = gen( std::string( name ) ); - BOOST_TEST_EQ( u2, u0 ); - - uuid u3 = gen( name, std::strlen( name ) ); - BOOST_TEST_EQ( u3, u0 ); - } + test( ns::dns(), "www.example.org", "74738ff5-5367-5958-9aee-98fffdcd1876" ); + test( ns::dns(), u"www.example.org", "74738ff5-5367-5958-9aee-98fffdcd1876" ); + test( ns::dns(), U"www.example.org", "74738ff5-5367-5958-9aee-98fffdcd1876" ); + test( ns::dns(), u8"www.example.org", "74738ff5-5367-5958-9aee-98fffdcd1876" ); // RFC 4122bis Section A.4 - { - name_generator_sha1 gen( ns::dns() ); - - char const* name = "www.example.com"; - - uuid u0 = string_generator()( "2ed6657d-e927-568b-95e1-2665a8aea6a2" ); - - uuid u1 = gen( name ); - BOOST_TEST_EQ( u1, u0 ); - - uuid u2 = gen( std::string( name ) ); - BOOST_TEST_EQ( u2, u0 ); - - uuid u3 = gen( name, std::strlen( name ) ); - BOOST_TEST_EQ( u3, u0 ); - } + test( ns::dns(), "www.example.com", "2ed6657d-e927-568b-95e1-2665a8aea6a2" ); + test( ns::dns(), u"www.example.com", "2ed6657d-e927-568b-95e1-2665a8aea6a2" ); + test( ns::dns(), U"www.example.com", "2ed6657d-e927-568b-95e1-2665a8aea6a2" ); + test( ns::dns(), u8"www.example.com", "2ed6657d-e927-568b-95e1-2665a8aea6a2" ); // https://uuid.ramsey.dev/en/stable/rfc4122/version5.html - { - name_generator_sha1 gen( ns::url() ); + test( ns::url(), "https://www.php.net", "a8f6ae40-d8a7-58f0-be05-a22f94eca9ec" ); + test( ns::url(), u"https://www.php.net", "a8f6ae40-d8a7-58f0-be05-a22f94eca9ec" ); + test( ns::url(), U"https://www.php.net", "a8f6ae40-d8a7-58f0-be05-a22f94eca9ec" ); + test( ns::url(), u8"https://www.php.net", "a8f6ae40-d8a7-58f0-be05-a22f94eca9ec" ); - char const* name = "https://www.php.net"; + // test cases from test_name_generator.cpp - uuid u0 = string_generator()( "a8f6ae40-d8a7-58f0-be05-a22f94eca9ec" ); + test( ns::dns(), "www.widgets.com", "21f7f8de-8051-5b89-8680-0195ef798b6a" ); + test( ns::dns(), L"www.widgets.com", "c315270b-a466-5872-aca4-9626cec0f4be" ); - uuid u1 = gen( name ); - BOOST_TEST_EQ( u1, u0 ); + // examples from documentation - uuid u2 = gen( std::string( name ) ); - BOOST_TEST_EQ( u2, u0 ); - - uuid u3 = gen( name, std::strlen( name ) ); - BOOST_TEST_EQ( u3, u0 ); - } + test( ns::dns(), "boost.org", "0043f363-bbb4-5369-840a-322df6ec1926" ); + test( ns::dns(), L"boost.org", "c31c5016-3493-5dc2-8484-5813d495cc18" ); // test wide strings @@ -86,5 +89,38 @@ int main() BOOST_TEST_EQ( u1, u2 ); } + // test unicode strings + + { + uuid nmsp = string_generator()( "70a4abc5-80ab-4176-8e11-bc5836b6fef9" ); + + name_generator_sha1 gen( nmsp ); + + char32_t const name32[] = { 0x0024, 0x00A3, 0x0418, 0x0939, 0x20AC, 0xD55C, 0xDC12, 0xD834, 0x10348, 0x1096B3, 0xD956, 0 }; + char16_t const name16[] = { 0x0024, 0x00A3, 0x0418, 0x0939, 0x20AC, 0xD55C, 0xDC12, 0xD834, 0xD800, 0xDF48, 0xDBE5, 0xDEB3, 0xD956, 0 }; + + unsigned char name8[] = + { + /*U+0024*/ 0x24, + /*U+00A3*/ 0xC2, 0xA3, + /*U+0418*/ 0xD0, 0x98, + /*U+0939*/ 0xE0, 0xA4, 0xB9, + /*U+20AC*/ 0xE2, 0x82, 0xAC, + /*U+D55C*/ 0xED, 0x95, 0x9C, + /*U+DC12*/ 0xEF, 0xBF, 0xBD, // U+FFFD + /*U+D834*/ 0xEF, 0xBF, 0xBD, // U+FFFD + /*U+10348*/ 0xF0, 0x90, 0x8D, 0x88, + /*U+1096B3*/ 0xF4, 0x89, 0x9A, 0xB3, + /*U+D956*/ 0xEF, 0xBF, 0xBD, // U+FFFD + }; + + uuid u1 = gen( name32 ); + uuid u2 = gen( name16 ); + uuid u3 = gen( name8, sizeof( name8 ) ); + + BOOST_TEST_EQ( u1, u3 ); + BOOST_TEST_EQ( u2, u3 ); + } + return boost::report_errors(); }