From 668a526c5adbe458fc0dd7ca13faf19d9ca60689 Mon Sep 17 00:00:00 2001 From: Peter Dimov Date: Wed, 4 Dec 2019 19:31:05 +0200 Subject: [PATCH] Change line endings to LF --- include/boost/nowide/utf8_codecvt.hpp | 996 +++++++++++++------------- 1 file changed, 498 insertions(+), 498 deletions(-) diff --git a/include/boost/nowide/utf8_codecvt.hpp b/include/boost/nowide/utf8_codecvt.hpp index c81af8c..b1df004 100644 --- a/include/boost/nowide/utf8_codecvt.hpp +++ b/include/boost/nowide/utf8_codecvt.hpp @@ -1,498 +1,498 @@ -// -// Copyright (c) 2015 Artyom Beilis (Tonkikh) -// -// Distributed under the Boost Software License, Version 1.0. (See -// accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) -// -#ifndef BOOST_NOWIDE_UTF8_CODECVT_HPP -#define BOOST_NOWIDE_UTF8_CODECVT_HPP - -#include -#include -#include -#include -#include - -namespace boost { -namespace nowide { - -// -// Make sure that mbstate can keep 16 bit of UTF-16 sequence -// -BOOST_STATIC_ASSERT(sizeof(std::mbstate_t)>=2); - -#if defined _MSC_VER && _MSC_VER < 1700 -// MSVC do_length is non-standard it counts wide characters instead of narrow and does not change mbstate -#define BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST -#endif - -template -class utf8_codecvt; - -template -class utf8_codecvt : public std::codecvt -{ -public: - utf8_codecvt(size_t refs = 0) : std::codecvt(refs) - { - } -protected: - - typedef CharType uchar; - - virtual std::codecvt_base::result do_unshift(std::mbstate_t &s,char *from,char * /*to*/,char *&next) const - { - boost::uint16_t &state = *reinterpret_cast(&s); -#ifdef DEBUG_CODECVT - std::cout << "Entering unshift " << std::hex << state << std::dec << std::endl; -#endif - if(state != 0) - return std::codecvt_base::error; - next=from; - return std::codecvt_base::ok; - } - virtual int do_encoding() const throw() - { - return 0; - } - virtual int do_max_length() const throw() - { - return 4; - } - virtual bool do_always_noconv() const throw() - { - return false; - } - - virtual int - do_length( std::mbstate_t - #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST - const - #endif - &std_state, - char const *from, - char const *from_end, - size_t max) const - { - #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST - char const *save_from = from; - boost::uint16_t &state = *reinterpret_cast(&std_state); - #else - size_t save_max = max; - boost::uint16_t state = *reinterpret_cast(&std_state); - #endif - while(max > 0 && from < from_end){ - char const *prev_from = from; - boost::uint32_t ch=boost::locale::utf::utf_traits::decode(from,from_end); - if(ch==boost::locale::utf::illegal) { - ch = BOOST_NOWIDE_REPLACEMENT_CHARACTER; - } - else if(ch==boost::locale::utf::incomplete) { - from = prev_from; - break; - } - max --; - if(ch > 0xFFFF) { - if(state == 0) { - from = prev_from; - state = 1; - } - else { - state = 0; - } - } - } - #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST - return from - save_from; - #else - return save_max - max; - #endif - } - - - virtual std::codecvt_base::result - do_in( std::mbstate_t &std_state, - char const *from, - char const *from_end, - char const *&from_next, - uchar *to, - uchar *to_end, - uchar *&to_next) const - { - std::codecvt_base::result r=std::codecvt_base::ok; - - // mbstate_t is POD type and should be initialized to 0 (i.a. state = stateT()) - // according to standard. We use it to keep a flag 0/1 for surrogate pair writing - // - // if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observerd - // and first pair is written, but no input consumed - boost::uint16_t &state = *reinterpret_cast(&std_state); - while(to < to_end && from < from_end) - { -#ifdef DEBUG_CODECVT - std::cout << "Entering IN--------------" << std::endl; - std::cout << "State " << std::hex << state <::decode(from,from_end); - - if(ch==boost::locale::utf::illegal) { - ch = BOOST_NOWIDE_REPLACEMENT_CHARACTER; - } - else if(ch==boost::locale::utf::incomplete) { - from = from_saved; - r=std::codecvt_base::partial; - break; - } - // Normal codepoints go direcly to stream - if(ch <= 0xFFFF) { - *to++=ch; - } - else { - // for other codepoints we do following - // - // 1. We can't consume our input as we may find ourselfs - // in state where all input consumed but not all output written,i.e. only - // 1st pair is written - // 2. We only write first pair and mark this in the state, we also revert back - // the from pointer in order to make sure this codepoint would be read - // once again and then we would consume our input together with writing - // second surrogate pair - ch-=0x10000; - boost::uint16_t vh = ch >> 10; - boost::uint16_t vl = ch & 0x3FF; - boost::uint16_t w1 = vh + 0xD800; - boost::uint16_t w2 = vl + 0xDC00; - if(state == 0) { - from = from_saved; - *to++ = w1; - state = 1; - } - else { - *to++ = w2; - state = 0; - } - } - } - from_next=from; - to_next=to; - if(r == std::codecvt_base::ok && (from!=from_end || state!=0)) - r = std::codecvt_base::partial; -#ifdef DEBUG_CODECVT - std::cout << "Returning "; - switch(r) { - case std::codecvt_base::ok: - std::cout << "ok" << std::endl; - break; - case std::codecvt_base::partial: - std::cout << "partial" << std::endl; - break; - case std::codecvt_base::error: - std::cout << "error" << std::endl; - break; - default: - std::cout << "other" << std::endl; - break; - } - std::cout << "State " << std::hex << state <=2 in order - // to be able to store first observerd surrogate pair - // - // State: state!=0 - a first surrogate pair was observerd (state = first pair), - // we expect the second one to come and then zero the state - /// - boost::uint16_t &state = *reinterpret_cast(&std_state); - while(to < to_end && from < from_end) - { -#ifdef DEBUG_CODECVT - std::cout << "Entering OUT --------------" << std::endl; - std::cout << "State " << std::hex << state <::width(ch); - if(to_end - to < len) { - r=std::codecvt_base::partial; - break; - } - to = boost::locale::utf::utf_traits::encode(ch,to); - state = 0; - from++; - } - from_next=from; - to_next=to; - if(r==std::codecvt_base::ok && from!=from_end) - r = std::codecvt_base::partial; -#ifdef DEBUG_CODECVT - std::cout << "Returning "; - switch(r) { - case std::codecvt_base::ok: - std::cout << "ok" << std::endl; - break; - case std::codecvt_base::partial: - std::cout << "partial" << std::endl; - break; - case std::codecvt_base::error: - std::cout << "error" << std::endl; - break; - default: - std::cout << "other" << std::endl; - break; - } - std::cout << "State " << std::hex << state < -class utf8_codecvt : public std::codecvt -{ -public: - utf8_codecvt(size_t refs = 0) : std::codecvt(refs) - { - } -protected: - - typedef CharType uchar; - - virtual std::codecvt_base::result do_unshift(std::mbstate_t &/*s*/,char *from,char * /*to*/,char *&next) const - { - next=from; - return std::codecvt_base::ok; - } - virtual int do_encoding() const throw() - { - return 0; - } - virtual int do_max_length() const throw() - { - return 4; - } - virtual bool do_always_noconv() const throw() - { - return false; - } - - virtual int - do_length( std::mbstate_t - #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST - const - #endif - &/*state*/, - char const *from, - char const *from_end, - size_t max) const - { - #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST - char const *start_from = from; - #else - size_t save_max = max; - #endif - - while(max > 0 && from < from_end){ - char const *save_from = from; - boost::uint32_t ch=boost::locale::utf::utf_traits::decode(from,from_end); - if(ch==boost::locale::utf::incomplete) { - from = save_from; - break; - } - else if(ch == boost::locale::utf::illegal) { - ch = BOOST_NOWIDE_REPLACEMENT_CHARACTER; - } - max--; - } - #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST - return from - start_from; - #else - return save_max - max; - #endif - } - - - virtual std::codecvt_base::result - do_in( std::mbstate_t &/*state*/, - char const *from, - char const *from_end, - char const *&from_next, - uchar *to, - uchar *to_end, - uchar *&to_next) const - { - std::codecvt_base::result r=std::codecvt_base::ok; - - // mbstate_t is POD type and should be initialized to 0 (i.a. state = stateT()) - // according to standard. We use it to keep a flag 0/1 for surrogate pair writing - // - // if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observerd - // and first pair is written, but no input consumed - while(to < to_end && from < from_end) - { -#ifdef DEBUG_CODECVT - std::cout << "Entering IN--------------" << std::endl; - std::cout << "State " << std::hex << state <::decode(from,from_end); - - if(ch==boost::locale::utf::illegal) { - ch = BOOST_NOWIDE_REPLACEMENT_CHARACTER; - } - else if(ch==boost::locale::utf::incomplete) { - r=std::codecvt_base::partial; - from=from_saved; - break; - } - *to++=ch; - } - from_next=from; - to_next=to; - if(r == std::codecvt_base::ok && from!=from_end) - r = std::codecvt_base::partial; -#ifdef DEBUG_CODECVT - std::cout << "Returning "; - switch(r) { - case std::codecvt_base::ok: - std::cout << "ok" << std::endl; - break; - case std::codecvt_base::partial: - std::cout << "partial" << std::endl; - break; - case std::codecvt_base::error: - std::cout << "error" << std::endl; - break; - default: - std::cout << "other" << std::endl; - break; - } - std::cout << "State " << std::hex << state <::width(ch); - if(to_end - to < len) { - r=std::codecvt_base::partial; - break; - } - to = boost::locale::utf::utf_traits::encode(ch,to); - from++; - } - from_next=from; - to_next=to; - if(r==std::codecvt_base::ok && from!=from_end) - r = std::codecvt_base::partial; -#ifdef DEBUG_CODECVT - std::cout << "Returning "; - switch(r) { - case std::codecvt_base::ok: - std::cout << "ok" << std::endl; - break; - case std::codecvt_base::partial: - std::cout << "partial" << std::endl; - break; - case std::codecvt_base::error: - std::cout << "error" << std::endl; - break; - default: - std::cout << "other" << std::endl; - break; - } - std::cout << "State " << std::hex << state < +#include +#include +#include +#include + +namespace boost { +namespace nowide { + +// +// Make sure that mbstate can keep 16 bit of UTF-16 sequence +// +BOOST_STATIC_ASSERT(sizeof(std::mbstate_t)>=2); + +#if defined _MSC_VER && _MSC_VER < 1700 +// MSVC do_length is non-standard it counts wide characters instead of narrow and does not change mbstate +#define BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST +#endif + +template +class utf8_codecvt; + +template +class utf8_codecvt : public std::codecvt +{ +public: + utf8_codecvt(size_t refs = 0) : std::codecvt(refs) + { + } +protected: + + typedef CharType uchar; + + virtual std::codecvt_base::result do_unshift(std::mbstate_t &s,char *from,char * /*to*/,char *&next) const + { + boost::uint16_t &state = *reinterpret_cast(&s); +#ifdef DEBUG_CODECVT + std::cout << "Entering unshift " << std::hex << state << std::dec << std::endl; +#endif + if(state != 0) + return std::codecvt_base::error; + next=from; + return std::codecvt_base::ok; + } + virtual int do_encoding() const throw() + { + return 0; + } + virtual int do_max_length() const throw() + { + return 4; + } + virtual bool do_always_noconv() const throw() + { + return false; + } + + virtual int + do_length( std::mbstate_t + #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST + const + #endif + &std_state, + char const *from, + char const *from_end, + size_t max) const + { + #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST + char const *save_from = from; + boost::uint16_t &state = *reinterpret_cast(&std_state); + #else + size_t save_max = max; + boost::uint16_t state = *reinterpret_cast(&std_state); + #endif + while(max > 0 && from < from_end){ + char const *prev_from = from; + boost::uint32_t ch=boost::locale::utf::utf_traits::decode(from,from_end); + if(ch==boost::locale::utf::illegal) { + ch = BOOST_NOWIDE_REPLACEMENT_CHARACTER; + } + else if(ch==boost::locale::utf::incomplete) { + from = prev_from; + break; + } + max --; + if(ch > 0xFFFF) { + if(state == 0) { + from = prev_from; + state = 1; + } + else { + state = 0; + } + } + } + #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST + return from - save_from; + #else + return save_max - max; + #endif + } + + + virtual std::codecvt_base::result + do_in( std::mbstate_t &std_state, + char const *from, + char const *from_end, + char const *&from_next, + uchar *to, + uchar *to_end, + uchar *&to_next) const + { + std::codecvt_base::result r=std::codecvt_base::ok; + + // mbstate_t is POD type and should be initialized to 0 (i.a. state = stateT()) + // according to standard. We use it to keep a flag 0/1 for surrogate pair writing + // + // if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observerd + // and first pair is written, but no input consumed + boost::uint16_t &state = *reinterpret_cast(&std_state); + while(to < to_end && from < from_end) + { +#ifdef DEBUG_CODECVT + std::cout << "Entering IN--------------" << std::endl; + std::cout << "State " << std::hex << state <::decode(from,from_end); + + if(ch==boost::locale::utf::illegal) { + ch = BOOST_NOWIDE_REPLACEMENT_CHARACTER; + } + else if(ch==boost::locale::utf::incomplete) { + from = from_saved; + r=std::codecvt_base::partial; + break; + } + // Normal codepoints go direcly to stream + if(ch <= 0xFFFF) { + *to++=ch; + } + else { + // for other codepoints we do following + // + // 1. We can't consume our input as we may find ourselfs + // in state where all input consumed but not all output written,i.e. only + // 1st pair is written + // 2. We only write first pair and mark this in the state, we also revert back + // the from pointer in order to make sure this codepoint would be read + // once again and then we would consume our input together with writing + // second surrogate pair + ch-=0x10000; + boost::uint16_t vh = ch >> 10; + boost::uint16_t vl = ch & 0x3FF; + boost::uint16_t w1 = vh + 0xD800; + boost::uint16_t w2 = vl + 0xDC00; + if(state == 0) { + from = from_saved; + *to++ = w1; + state = 1; + } + else { + *to++ = w2; + state = 0; + } + } + } + from_next=from; + to_next=to; + if(r == std::codecvt_base::ok && (from!=from_end || state!=0)) + r = std::codecvt_base::partial; +#ifdef DEBUG_CODECVT + std::cout << "Returning "; + switch(r) { + case std::codecvt_base::ok: + std::cout << "ok" << std::endl; + break; + case std::codecvt_base::partial: + std::cout << "partial" << std::endl; + break; + case std::codecvt_base::error: + std::cout << "error" << std::endl; + break; + default: + std::cout << "other" << std::endl; + break; + } + std::cout << "State " << std::hex << state <=2 in order + // to be able to store first observerd surrogate pair + // + // State: state!=0 - a first surrogate pair was observerd (state = first pair), + // we expect the second one to come and then zero the state + /// + boost::uint16_t &state = *reinterpret_cast(&std_state); + while(to < to_end && from < from_end) + { +#ifdef DEBUG_CODECVT + std::cout << "Entering OUT --------------" << std::endl; + std::cout << "State " << std::hex << state <::width(ch); + if(to_end - to < len) { + r=std::codecvt_base::partial; + break; + } + to = boost::locale::utf::utf_traits::encode(ch,to); + state = 0; + from++; + } + from_next=from; + to_next=to; + if(r==std::codecvt_base::ok && from!=from_end) + r = std::codecvt_base::partial; +#ifdef DEBUG_CODECVT + std::cout << "Returning "; + switch(r) { + case std::codecvt_base::ok: + std::cout << "ok" << std::endl; + break; + case std::codecvt_base::partial: + std::cout << "partial" << std::endl; + break; + case std::codecvt_base::error: + std::cout << "error" << std::endl; + break; + default: + std::cout << "other" << std::endl; + break; + } + std::cout << "State " << std::hex << state < +class utf8_codecvt : public std::codecvt +{ +public: + utf8_codecvt(size_t refs = 0) : std::codecvt(refs) + { + } +protected: + + typedef CharType uchar; + + virtual std::codecvt_base::result do_unshift(std::mbstate_t &/*s*/,char *from,char * /*to*/,char *&next) const + { + next=from; + return std::codecvt_base::ok; + } + virtual int do_encoding() const throw() + { + return 0; + } + virtual int do_max_length() const throw() + { + return 4; + } + virtual bool do_always_noconv() const throw() + { + return false; + } + + virtual int + do_length( std::mbstate_t + #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST + const + #endif + &/*state*/, + char const *from, + char const *from_end, + size_t max) const + { + #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST + char const *start_from = from; + #else + size_t save_max = max; + #endif + + while(max > 0 && from < from_end){ + char const *save_from = from; + boost::uint32_t ch=boost::locale::utf::utf_traits::decode(from,from_end); + if(ch==boost::locale::utf::incomplete) { + from = save_from; + break; + } + else if(ch == boost::locale::utf::illegal) { + ch = BOOST_NOWIDE_REPLACEMENT_CHARACTER; + } + max--; + } + #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST + return from - start_from; + #else + return save_max - max; + #endif + } + + + virtual std::codecvt_base::result + do_in( std::mbstate_t &/*state*/, + char const *from, + char const *from_end, + char const *&from_next, + uchar *to, + uchar *to_end, + uchar *&to_next) const + { + std::codecvt_base::result r=std::codecvt_base::ok; + + // mbstate_t is POD type and should be initialized to 0 (i.a. state = stateT()) + // according to standard. We use it to keep a flag 0/1 for surrogate pair writing + // + // if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observerd + // and first pair is written, but no input consumed + while(to < to_end && from < from_end) + { +#ifdef DEBUG_CODECVT + std::cout << "Entering IN--------------" << std::endl; + std::cout << "State " << std::hex << state <::decode(from,from_end); + + if(ch==boost::locale::utf::illegal) { + ch = BOOST_NOWIDE_REPLACEMENT_CHARACTER; + } + else if(ch==boost::locale::utf::incomplete) { + r=std::codecvt_base::partial; + from=from_saved; + break; + } + *to++=ch; + } + from_next=from; + to_next=to; + if(r == std::codecvt_base::ok && from!=from_end) + r = std::codecvt_base::partial; +#ifdef DEBUG_CODECVT + std::cout << "Returning "; + switch(r) { + case std::codecvt_base::ok: + std::cout << "ok" << std::endl; + break; + case std::codecvt_base::partial: + std::cout << "partial" << std::endl; + break; + case std::codecvt_base::error: + std::cout << "error" << std::endl; + break; + default: + std::cout << "other" << std::endl; + break; + } + std::cout << "State " << std::hex << state <::width(ch); + if(to_end - to < len) { + r=std::codecvt_base::partial; + break; + } + to = boost::locale::utf::utf_traits::encode(ch,to); + from++; + } + from_next=from; + to_next=to; + if(r==std::codecvt_base::ok && from!=from_end) + r = std::codecvt_base::partial; +#ifdef DEBUG_CODECVT + std::cout << "Returning "; + switch(r) { + case std::codecvt_base::ok: + std::cout << "ok" << std::endl; + break; + case std::codecvt_base::partial: + std::cout << "partial" << std::endl; + break; + case std::codecvt_base::error: + std::cout << "error" << std::endl; + break; + default: + std::cout << "other" << std::endl; + break; + } + std::cout << "State " << std::hex << state <