Merge pull request #60 from Flamefire/fix_codecvt

Fix codecvt and improve conversion tests
2026-02-22 03:22:32 +00:00 · 2020-01-09 13:10:00 +01:00
parent 0ecb87549b 14675cd822
commit fdad59cf61
5 changed files with 221 additions and 113 deletions
--- a/include/boost/nowide/utf8_codecvt.hpp
+++ b/include/boost/nowide/utf8_codecvt.hpp
@@ -282,7 +282,7 @@ namespace nowide {
            }
            from_next = from;
            to_next = to;
-            if(r == std::codecvt_base::ok && from != from_end)
+            if(r == std::codecvt_base::ok && (from != from_end || state != 0))
                r = std::codecvt_base::partial;
            detail::write_state(std_state, state);
            return r;
--- a/test/test_codecvt.cpp
+++ b/test/test_codecvt.cpp
@@ -20,18 +20,6 @@ static const char* utf8_name = "\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB
 static const std::wstring wide_name_str = boost::nowide::widen(utf8_name);
 static const wchar_t* wide_name = wide_name_str.c_str();

-const char* res(std::codecvt_base::result r)
-{
-    switch(r)
-    {
-    case std::codecvt_base::ok: return "ok";
-    case std::codecvt_base::partial: return "partial";
-    case std::codecvt_base::error: return "error";
-    case std::codecvt_base::noconv: return "noconv";
-    default: return "error";
-    }
-}
-
 typedef std::codecvt<wchar_t, char, std::mbstate_t> cvt_type;

 void test_codecvt_in_n_m(const cvt_type& cvt, int n, int m)
@@ -60,8 +48,6 @@ void test_codecvt_in_n_m(const cvt_type& cvt, int n, int m)

        std::mbstate_t mb2 = mb;
        std::codecvt_base::result r = cvt.in(mb, from, end, from_next, to, to_end, to_next);
-        // std::cout << "In from_size=" << (end-from) << " from move=" <<  (from_next - from) << " to move= " << to_next - to << " state = "
-        // << res(r) << std::endl;

        int count = cvt.length(mb2, from, end, to_end - to);
 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST
@@ -124,14 +110,17 @@ void test_codecvt_out_n_m(const cvt_type& cvt, int n, int m)
        }

        std::codecvt_base::result r = cvt.out(mb, from, from_end, from_next, to, to_end, to_next);
-        // std::cout << "In from_size=" << (end-from) << " from move=" <<  (from_next - from) << " to move= " << to_next - to << " state = "
-        // << res(r) << std::endl;
        if(r == cvt_type::partial)
        {
-            TEST(to_end - to_next < cvt.max_length());
-            to_end += n;
-            if(to_end > real_to_end)
-                to_end = real_to_end;
+            // If those are equal, then "partial" probably means: Need more input
+            // Otherwise "Need more output"
+            if(from_next != from_end)
+            {
+                TEST(to_end - to_next < cvt.max_length());
+                to_end += n;
+                if(to_end > real_to_end)
+                    to_end = real_to_end;
+            }
        } else
        {
            TEST(r == cvt_type::ok);
@@ -184,11 +173,11 @@ void test_codecvt_err()

    std::cout << "- UTF-8" << std::endl;
    {
-        wchar_t buf[4];
-        wchar_t* const to = buf;
-        wchar_t* const to_end = buf + 4;
-        const char* err_utf = "1\xFF\xFF\xd7\xa9";
        {
+            wchar_t buf[4];
+            wchar_t* const to = buf;
+            wchar_t* const to_end = buf + 4;
+            const char* err_utf = "1\xFF\xFF\xd7\xa9";
            std::mbstate_t mb = std::mbstate_t();
            const char* from = err_utf;
            const char* from_end = from + std::strlen(from);
@@ -199,6 +188,50 @@ void test_codecvt_err()
            TEST(to_next == to + 4);
            TEST(std::wstring(to, to_end) == boost::nowide::widen(err_utf));
        }
+        {
+            wchar_t buf[4];
+            wchar_t* const to = buf;
+            wchar_t* const to_end = buf + 4;
+            const char* err_utf = "1\xd7"; // 1 valid, 1 incomplete UTF-8 char
+            std::mbstate_t mb = std::mbstate_t();
+            const char* from = err_utf;
+            const char* from_end = from + std::strlen(from);
+            const char* from_next = from;
+            wchar_t* to_next = to;
+            TEST(cvt.in(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::partial);
+            TEST(from_next == from + 1);
+            TEST(to_next == to + 1);
+            TEST(std::wstring(to, to_next) == std::wstring(L"1"));
+        }
+        {
+            char buf[4] = {};
+            char* const to = buf;
+            char* const to_end = buf + 4;
+            char* to_next = to;
+            const wchar_t* err_utf = L"\xD800"; // Trailing UTF-16 surrogate
+            std::mbstate_t mb = std::mbstate_t();
+            const wchar_t* from = err_utf;
+            const wchar_t* from_end = from + 1;
+            const wchar_t* from_next = from;
+            cvt_type::result res = cvt.out(mb, from, from_end, from_next, to, to_end, to_next);
+#ifdef BOOST_MSVC
+#pragma warning(disable : 4127) // Constant expression detected
+#endif
+            if(sizeof(wchar_t) == 2)
+            {
+                TEST(res == cvt_type::partial);
+                TEST(from_next == from_end);
+                TEST(to_next == to);
+                TEST(buf[0] == 0);
+            } else
+            {
+                TEST(res == cvt_type::ok);
+                TEST(from_next == from_end);
+                TEST(to_next == to + 3);
+                // surrogate is invalid
+                TEST(std::string(to, to_next) == boost::nowide::narrow(wreplacement_str));
+            }
+        }
    }

    std::cout << "- UTF-16/32" << std::endl;
@@ -217,7 +250,7 @@ void test_codecvt_err()
            TEST(cvt.out(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok);
            TEST(from_next == from + 2);
            TEST(to_next == to + 4);
-            TEST(std::memcmp(to, "1\xEF\xBF\xBD", 4) == 0);
+            TEST(std::string(to, to_next) == "1" + boost::nowide::narrow(wreplacement_str));
        }
    }
 }
@@ -229,19 +262,24 @@ std::wstring codecvt_to_wide(const std::string& s)
    const cvt_type& cvt = std::use_facet<cvt_type>(l);

    std::mbstate_t mb = std::mbstate_t();
-    const char* from = s.c_str();
-    const char* from_end = from + s.size();
+    const char* const from = s.c_str();
+    const char* const from_end = from + s.size();
    const char* from_next = from;

-    std::vector<wchar_t> buf(s.size() + 1);
-    wchar_t* to = &buf[0];
-    wchar_t* to_end = to + buf.size();
+    std::vector<wchar_t> buf(s.size() + 2); // +1 for possible incomplete char, +1 for NULL
+    wchar_t* const to = &buf[0];
+    wchar_t* const to_end = to + buf.size();
    wchar_t* to_next = to;

-    TEST(cvt.in(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok);
+    cvt_type::result res = cvt.in(mb, from, from_end, from_next, to, to_end, to_next);
+    if(res == cvt_type::partial)
+    {
+        TEST(to_next < to_end);
+        *(to_next++) = BOOST_NOWIDE_REPLACEMENT_CHARACTER;
+    } else
+        TEST(res == cvt_type::ok);

-    std::wstring res(to, to_next);
-    return res;
+    return std::wstring(to, to_next);
 }

 std::string codecvt_to_narrow(const std::wstring& s)
@@ -251,19 +289,24 @@ std::string codecvt_to_narrow(const std::wstring& s)
    const cvt_type& cvt = std::use_facet<cvt_type>(l);

    std::mbstate_t mb = std::mbstate_t();
-    const wchar_t* from = s.c_str();
-    const wchar_t* from_end = from + s.size();
+    const wchar_t* const from = s.c_str();
+    const wchar_t* const from_end = from + s.size();
    const wchar_t* from_next = from;

-    std::vector<char> buf(s.size() * 4 + 1);
-    char* to = &buf[0];
-    char* to_end = to + buf.size();
+    std::vector<char> buf((s.size() + 1) * 4 + 1); // +1 for possible incomplete char, +1 for NULL
+    char* const to = &buf[0];
+    char* const to_end = to + buf.size();
    char* to_next = to;

-    TEST(cvt.out(mb, from, from_end, from_next, to, to_end, to_next) == cvt_type::ok);
+    cvt_type::result res = cvt.out(mb, from, from_end, from_next, to, to_end, to_next);
+    if(res == cvt_type::partial)
+    {
+        TEST(to_next < to_end);
+        return std::string(to, to_next) + boost::nowide::narrow(wreplacement_str);
+    } else
+        TEST(res == cvt_type::ok);

-    std::string res(to, to_next);
-    return res;
+    return std::string(to, to_next);
 }

 void test_codecvt_subst()
--- a/test/test_convert.cpp
+++ b/test/test_convert.cpp
@@ -15,6 +15,44 @@
 #pragma warning(disable : 4428) // universal-character-name encountered in source
 #endif

+std::wstring widen_buf_ptr(const std::string& s)
+{
+    wchar_t buf[50];
+    TEST(boost::nowide::widen(buf, 50, s.c_str()) == buf);
+    return buf;
+}
+
+std::string narrow_buf_ptr(const std::wstring& s)
+{
+    char buf[50];
+    TEST(boost::nowide::narrow(buf, 50, s.c_str()) == buf);
+    return buf;
+}
+
+std::wstring widen_buf_range(const std::string& s)
+{
+    wchar_t buf[50];
+    TEST(boost::nowide::widen(buf, 50, s.c_str(), s.c_str() + s.size()) == buf);
+    return buf;
+}
+
+std::string narrow_buf_range(const std::wstring& s)
+{
+    char buf[50];
+    TEST(boost::nowide::narrow(buf, 50, s.c_str(), s.c_str() + s.size()) == buf);
+    return buf;
+}
+
+std::wstring widen_raw_string(const std::string& s)
+{
+    return boost::nowide::widen(s.c_str());
+}
+
+std::string narrow_raw_string(const std::wstring& s)
+{
+    return boost::nowide::narrow(s.c_str());
+}
+
 int main()
 {
    try
@@ -23,14 +61,11 @@ int main()
        std::wstring whello = L"\u05e9\u05dc\u05d5\u05dd";
        std::wstring whello_3e = L"\u05e9\u05dc\u05d5\ufffd";
        std::wstring whello_3 = L"\u05e9\u05dc\u05d5";
-        // Example filenames used in tests
-        std::string example = "\xd7\xa9-\xd0\xbc-\xce\xbd.txt";
-        std::wstring wexample = L"\u05e9-\u043c-\u03bd.txt";

        std::cout << "- boost::nowide::widen" << std::endl;
        {
            const char* b = hello.c_str();
-            const char* e = b + 8;
+            const char* e = b + hello.size();
            wchar_t buf[6] = {0, 0, 0, 0, 0, 1};
            TEST(boost::nowide::widen(buf, 5, b, e) == buf);
            TEST(buf == whello);
@@ -42,26 +77,11 @@ int main()
            TEST(buf == whello_3);
            TEST(boost::nowide::widen(buf, 5, b, b) == buf && buf[0] == 0);
            TEST(boost::nowide::widen(buf, 5, b, b + 2) == buf && buf[1] == 0 && buf[0] == whello[0]);
-            b = "\xFF\xFF";
-            e = b + 2;
-            TEST(boost::nowide::widen(buf, 5, b, e) == buf);
-            TEST(buf == std::wstring(L"\ufffd\ufffd"));
-            b = "\xd7\xa9\xFF";
-            e = b + 3;
-            TEST(boost::nowide::widen(buf, 5, b, e) == buf);
-            TEST(buf == std::wstring(L"\u05e9\ufffd"));
-            TEST(boost::nowide::widen(buf, 5, b, b + 1) == buf);
-            TEST(buf == std::wstring(L"\ufffd"));
-            b = "\xFF\xd7\xa9";
-            e = b + 3;
-            TEST(boost::nowide::widen(buf, 5, b, e) == buf);
-            TEST(buf == std::wstring(L"\ufffd\u05e9"));
-            TEST(boost::nowide::widen(example) == wexample);
        }
        std::cout << "- boost::nowide::narrow" << std::endl;
        {
            const wchar_t* b = whello.c_str();
-            const wchar_t* e = b + 4;
+            const wchar_t* e = b + whello.size();
            char buf[10] = {0};
            buf[9] = 1;
            TEST(boost::nowide::narrow(buf, 9, b, e) == buf);
@@ -70,23 +90,15 @@ int main()
            TEST(boost::nowide::narrow(buf, 8, b, e) == 0);
            TEST(boost::nowide::narrow(buf, 7, b, e - 1) == buf);
            TEST(buf == hello.substr(0, 6));
-            wchar_t tmp[3] = {0xDC01, 0x05e9, 0};
-            b = tmp;
-            TEST(boost::nowide::narrow(buf, 10, b, b + 2) == buf);
-            TEST(buf == std::string("\xEF\xBF\xBD\xd7\xa9"));
-            wchar_t tmp2[3] = {0x05e9, 0xD800, 0};
-            b = tmp2;
-            TEST(boost::nowide::narrow(buf, 10, b, b + 2) == buf);
-            TEST(buf == std::string("\xd7\xa9\xEF\xBF\xBD"));
-            TEST(boost::nowide::narrow(wexample) == example);
        }
-        {
-            char buf[3];
-            wchar_t wbuf[3];
-            TEST(boost::nowide::narrow(buf, 3, L"xy") == std::string("xy"));
-            TEST(boost::nowide::widen(wbuf, 3, "xy") == std::wstring(L"xy"));
-        }
-        std::cout << "- Substitutions" << std::endl;
+
+        std::cout << "- (output_buffer, buffer_size, input_raw_string)" << std::endl;
+        run_all(widen_buf_ptr, narrow_buf_ptr);
+        std::cout << "- (output_buffer, buffer_size, input_raw_string, string_len)" << std::endl;
+        run_all(widen_buf_range, narrow_buf_range);
+        std::cout << "- (input_raw_string)" << std::endl;
+        run_all(widen_raw_string, narrow_raw_string);
+        std::cout << "- (const std::string&)" << std::endl;
        run_all(boost::nowide::widen, boost::nowide::narrow);
    } catch(const std::exception& e)
    {
--- a/test/test_sets.hpp
+++ b/test/test_sets.hpp
@@ -8,7 +8,7 @@
 #ifndef BOOST_NOWIDE_TEST_SETS_HPP_INCLUDED
 #define BOOST_NOWIDE_TEST_SETS_HPP_INCLUDED

-#include <boost/config.hpp>
+#include <boost/nowide/config.hpp>
 #include <iostream>
 #include <string>

@@ -28,67 +28,102 @@ struct wide_to_utf8
 #pragma warning(disable : 4428) // universal-character-name encountered in source
 #endif

-utf8_to_wide n2w_tests[] = {{"\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt",
-                             L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt"},
-                            {"\xFF\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82", L"\uFFFD\u043F\u0440\u0438\u0432\u0435\u0442"},
-                            {"\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82\xFF", L"\u043F\u0440\u0438\u0432\u0435\u0442\uFFFD"},
-                            {"\xE3\x82\xFF\xE3\x81\x82", L"\ufffd\u3042"},
-                            {"\xE3\xFF\x84\xE3\x81\x82", L"\ufffd\ufffd\u3042"}};
+const std::wstring wreplacement_str(1, wchar_t(BOOST_NOWIDE_REPLACEMENT_CHARACTER));

-wide_to_utf8 w2n_tests_utf16[] = {
-  {
-    L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt",
-    "\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt",
-  },
+// clang-format off
+const utf8_to_wide roundtrip_tests[] = {
+    {"", L""},
+    {"\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt",
+    L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt"},
+    {"\xd7\xa9-\xd0\xbc-\xce\xbd.txt",
+    L"\u05e9-\u043c-\u03bd.txt"},
+    {"\xd7\xa9\xd7\x9c\xd7\x95\xd7\x9d",
+    L"\u05e9\u05dc\u05d5\u05dd"},
+};
+
+const utf8_to_wide invalid_utf8_tests[] = {
+    {"\xFF\xFF", L"\ufffd\ufffd"},
+    {"\xd7\xa9\xFF", L"\u05e9\ufffd"},
+    {"\xd7", L"\ufffd"},
+    {"\xFF\xd7\xa9", L"\ufffd\u05e9"},
+    {"\xFF\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82", L"\uFFFD\u043F\u0440\u0438\u0432\u0435\u0442"},
+    {"\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82\xFF", L"\u043F\u0440\u0438\u0432\u0435\u0442\uFFFD"},
+    {"\xE3\x82\xFF\xE3\x81\x82", L"\ufffd\u3042"},
+    {"\xE3\xFF\x84\xE3\x81\x82", L"\ufffd\ufffd\u3042"},
+};
+
+const wide_to_utf8 invalid_wide_tests[] = {
+  {L"\xDC01\x05e9", "\xEF\xBF\xBD\xd7\xa9"},
+  {L"\x05e9\xD800", "\xd7\xa9\xEF\xBF\xBD"},
+  {L"\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
+   "\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
+  {L"\u3084\u3042\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
+   "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
+};
+
+
+const wide_to_utf8 invalid_utf16_tests[] = {
  {L"\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
   "\xEF\xBF\xBD\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
-  {L"\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
-   "\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
  {L"\u3084\u3042\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
   "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
-  {L"\u3084\u3042\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
-   "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}};
+};

-wide_to_utf8 w2n_tests_utf32[] = {
-  {
-    L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt",
-    "\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt",
-  },
+const wide_to_utf8 invalid_utf32_tests[] = {
  {L"\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
   "\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
-  {L"\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
-   "\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
  {L"\u3084\u3042\xD800\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
   "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"},
-  {L"\u3084\u3042\xDC00\x20\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042",
-   "\xE3\x82\x84\xE3\x81\x82\xEF\xBF\xBD \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82"}};
+};
+
+// clang-format on

 #ifdef BOOST_MSVC
 #pragma warning(push)
 #pragma warning(disable : 4127) // Constant expression detected
 #endif

+template<typename T, size_t N>
+size_t array_size(const T (&)[N])
+{
+    return N;
+}
+
 void run_all(std::wstring (*to_wide)(const std::string&), std::string (*to_narrow)(const std::wstring&))
 {
-    for(size_t i = 0; i < sizeof(n2w_tests) / sizeof(n2w_tests[0]); i++)
+    for(size_t i = 0; i < array_size(roundtrip_tests); i++)
    {
-        std::cout << "  N2W  " << i << std::endl;
-        TEST(to_wide(n2w_tests[i].utf8) == n2w_tests[i].wide);
+        std::cout << "  Roundtrip  " << i << std::endl;
+        TEST(roundtrip_tests[i].utf8 == to_narrow(roundtrip_tests[i].wide));
+        TEST(to_wide(roundtrip_tests[i].utf8) == roundtrip_tests[i].wide);
    }
+
+    for(size_t i = 0; i < array_size(invalid_utf8_tests); i++)
+    {
+        std::cout << "  Invalid UTF8  " << i << std::endl;
+        TEST(to_wide(invalid_utf8_tests[i].utf8) == invalid_utf8_tests[i].wide);
+    }
+
+    for(size_t i = 0; i < array_size(invalid_wide_tests); i++)
+    {
+        std::cout << "  Invalid Wide  " << i << std::endl;
+        TEST(to_narrow(invalid_wide_tests[i].wide) == invalid_wide_tests[i].utf8);
+    }
+
    size_t total = 0;
    const wide_to_utf8* ptr = 0;
    if(sizeof(wchar_t) == 2)
    {
-        ptr = w2n_tests_utf16;
-        total = sizeof(w2n_tests_utf16) / sizeof(w2n_tests_utf16[0]);
+        ptr = invalid_utf16_tests;
+        total = array_size(invalid_utf16_tests);
    } else
    {
-        ptr = w2n_tests_utf32;
-        total = sizeof(w2n_tests_utf32) / sizeof(w2n_tests_utf32[0]);
+        ptr = invalid_utf32_tests;
+        total = array_size(invalid_utf32_tests);
    }
    for(size_t i = 0; i < total; i++)
    {
-        std::cout << "  W2N  " << i << std::endl;
+        std::cout << "  Invalid UTF16/32  " << i << std::endl;
        TEST(to_narrow(ptr[i].wide) == ptr[i].utf8);
    }
 }
--- a/test/test_stackstring.cpp
+++ b/test/test_stackstring.cpp
@@ -27,12 +27,24 @@ std::string stackstring_to_narrow(const std::wstring& s)
    return ss.get();
 }

+std::wstring heap_stackstring_to_wide(const std::string& s)
+{
+    const boost::nowide::basic_stackstring<wchar_t, char, 1> ss(s.c_str());
+    return ss.get();
+}
+
+std::string heap_stackstring_to_narrow(const std::wstring& s)
+{
+    const boost::nowide::basic_stackstring<char, wchar_t, 1> ss(s.c_str());
+    return ss.get();
+}
+
 int main()
 {
    try
    {
        std::string hello = "\xd7\xa9\xd7\x9c\xd7\x95\xd7\x9d";
-        std::wstring whello = L"\u05e9\u05dc\u05d5\u05dd";
+        std::wstring whello = boost::nowide::widen(hello);
        const wchar_t* wempty = L"";

        {
@@ -76,6 +88,7 @@ int main()
            TEST(s2.get() == std::string());
        }
        {
+            // Will be put on heap
            TEST(whello.size() >= 3);
            boost::nowide::basic_stackstring<wchar_t, char, 3> sw;
            TEST(sw.convert(hello.c_str()));
@@ -84,6 +97,7 @@ int main()
            TEST(sw.get() == whello);
        }
        {
+            // Will be put on stack
            TEST(whello.size() < 5);
            boost::nowide::basic_stackstring<wchar_t, char, 5> sw;
            TEST(sw.convert(hello.c_str()));
@@ -92,6 +106,7 @@ int main()
            TEST(sw.get() == whello);
        }
        {
+            // Will be put on heap
            TEST(hello.size() >= 5);
            boost::nowide::basic_stackstring<char, wchar_t, 5> sw;
            TEST(sw.convert(whello.c_str()));
@@ -100,6 +115,7 @@ int main()
            TEST(sw.get() == hello);
        }
        {
+            // Will be put on stack
            TEST(hello.size() < 10);
            boost::nowide::basic_stackstring<char, wchar_t, 10> sw;
            TEST(sw.convert(whello.c_str()));
@@ -168,8 +184,10 @@ int main()
            TEST(stack.get() == stackVal);
            TEST(heap.get() == heapVal);
        }
-        std::cout << "- Substitutions" << std::endl;
+        std::cout << "- Stackstring" << std::endl;
        run_all(stackstring_to_wide, stackstring_to_narrow);
+        std::cout << "- Heap Stackstring" << std::endl;
+        run_all(heap_stackstring_to_wide, heap_stackstring_to_narrow);
    } catch(const std::exception& e)
    {
        std::cerr << "Failed :" << e.what() << std::endl;