diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index d0e5676..c4130a1 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -194,8 +194,8 @@ explicit has_icu ; # This function is called whenever the 'boost_locale' metatarget -# below is generated and figures what external components we have, -# what user wants, and what sources have to be compiled in the end. +# below is generated and figures out what external components we have, +# what the user wants, and what sources have to be compiled in the end. rule configure-full ( properties * : flags-only ) { diff --git a/doc/boundary_analysys.txt b/doc/boundary_analysys.txt index ef0eff2..d8aa554 100644 --- a/doc/boundary_analysys.txt +++ b/doc/boundary_analysys.txt @@ -21,28 +21,29 @@ \section boundary_analysys_basics Basics Boost.Locale provides a boundary analysis tool, allowing you to split text into characters, -words, or sentences, and find appropriate places for line breaks. +words, or sentences. It is commonly used to find appropriate places for line breaks. -\note This task is not a trivial task. +\note This is not a trivial task! \par -A Unicode code point and a character are not equivalent, for example: -Hebrew word Shalom - "שָלוֹם" that consists of 4 characters and 6 code points (4 base letters and 2 diacritical marks) +A Unicode code point and a character are not equivalent. +For example, the Hebrew word Shalom - "שָלוֹם" - consists of 4 characters and +6 code points (4 base letters and 2 diacritical marks). \par Words may not be separated by space characters in some languages like in Japanese or Chinese. Boost.Locale provides 2 major classes for boundary analysis: -- \ref boost::locale::boundary::segment_index - an object that holds an index of segments in the text (like words, characters, - sentences). It provides an access to \ref boost::locale::boundary::segment "segment" objects via iterators. -- \ref boost::locale::boundary::boundary_point_index - an object that holds an index of boundary points in the text. - It allows to iterate over the \ref boost::locale::boundary::boundary_point "boundary_point" objects. +- \ref boost::locale::boundary::segment_index - an object that holds the index of segments in text (like words, characters, + sentences). It provides access to \ref boost::locale::boundary::segment "segment" objects via iterators. +- \ref boost::locale::boundary::boundary_point_index - an object that holds the index of boundary points in text. + It can iterate over the \ref boost::locale::boundary::boundary_point "boundary_point" objects. Each of the classes above use an iterator type as template parameter. -Both of these classes accept in their constructor: +Both of these classes accept in their constructors: - A flag that defines boundary analysis \ref boost::locale::boundary::boundary_type "boundary_type". -- The pair of iterators that define the text range that should be analysed -- A locale parameter (if not given the global one is used) +- The pair of iterators that define the text range to be analysed +- A locale parameter (if not given, the global one is used) For example: \code @@ -52,13 +53,13 @@ std::locale loc = ... ; ba::segment_index map(ba::word,text.begin(),text.end(),loc); \endcode -Each of them provide a members \c begin(), \c end() and \c find() that allow to iterate +Each class implements members \c begin(), \c end() and \c find() making it possible to iterate over the selected segments or boundaries in the text or find a location of a segment or -boundary for given iterator. +boundary for a given iterator. -Convenience a typedefs like \ref boost::locale::boundary::ssegment_index "ssegment_index" -or \ref boost::locale::boundary::wcboundary_point_index "wcboundary_point_index" provided as well, +Convenience typedefs like \ref boost::locale::boundary::ssegment_index "ssegment_index" +or \ref boost::locale::boundary::wcboundary_point_index "wcboundary_point_index" are provided as well, where "w", "u16" and "u32" prefixes define a character type \c wchar_t, \c char16_t and \c char32_t and "c" and "s" prefixes define whether std::basic_string::const_iterator or CharType const * are used. @@ -80,7 +81,7 @@ For example: using namespace boost::locale::boundary; boost::locale::generator gen; std::string text="To be or not to be, that is the question." -// Create mapping of text for token iterator using global locale. +// Create mapping of text for token iterator using the global locale. ssegment_index map(word,text.begin(),text.end(),gen("en_US.UTF-8")); // Print all "words" -- chunks of word boundary for(ssegment_index::iterator it=map.begin(),e=map.end();it!=e;++it) @@ -95,15 +96,15 @@ Would print: \endverbatim This sentence "生きるか死ぬか、それが問題だ。" (from Tatoeba database) -would be split into following segments in \c ja_JP.UTF-8 (Japanese) locale: +would be split into following segments in the \c ja_JP.UTF-8 (Japanese) locale: \verbatim "生", "きるか", "死", "ぬか", "、", "それが", "問題", "だ", "。", \endverbatim The boundary analysis that is done by Boost.Locale -is much more complicated then just splitting the text according -to white space characters, even thou it is not perfect. +is much more complicated than just splitting the text according +to white space characters, although it is not always perfect. \section boundary_analysys_segments_rules Using Rules @@ -111,12 +112,12 @@ to white space characters, even thou it is not perfect. The segments selection can be customized using \ref boost::locale::boundary::segment_index::rule(rule_type) "rule()" and \ref boost::locale::boundary::segment_index::full_select(bool) "full_select()" member functions. -By default segment_index's iterator return each text segment defined by two boundary points regardless +By default, segment_index's iterator returns each text segment defined by two boundary points regardless the way they were selected. Thus in the example above we could see text segments like "." or " " that were selected as words. Using a \c rule() member function we can specify a binary mask of rules we want to use for selection of -the boundary points using \ref bl_boundary_word_rules "word", \ref bl_boundary_line_rules "line" +boundary points using \ref bl_boundary_word_rules "word", \ref bl_boundary_line_rules "line" and \ref bl_boundary_sentence_rules "sentence" boundary rules. For example, by calling @@ -133,7 +134,7 @@ So the code: \code using namespace boost::locale::boundary; std::string text="To be or not to be, that is the question." -// Create mapping of text for token iterator using global locale. +// Create mapping of text for token iterator using the global locale. ssegment_index map(word,text.begin(),text.end()); // Define a rule map.rule(word_any); @@ -149,14 +150,14 @@ Would print: "To", "be", "or", "not", "to", "be", "that", "is", "the", "question", \endverbatim -And the for given text="生きるか死ぬか、それが問題だ。" and rule(\ref boost::locale::boundary::word_ideo "word_ideo"), the example above would print. +And the for given text="生きるか死ぬか、それが問題だ。" and rule(\ref boost::locale::boundary::word_ideo "word_ideo"), the example above would print: \verbatim "生", "死", "問題", \endverbatim -You can access specific rules the segments where selected it using \ref boost::locale::boundary::segment::rule() "segment::rule()" member -function. Using a bit-mask of rules. +You can determine why a segment was selected by using the \ref boost::locale::boundary::segment::rule() "segment::rule()" member +function. The return value is a bit-mask of rules. For example: @@ -177,7 +178,7 @@ for(ssegment_index::iterator it=map.begin(),e=map.end();it!=e;++it) { } \endcode -Would print +Would print: \verbatim Segment 生 contains: ideographic characters @@ -191,25 +192,23 @@ Segment だ contains: kana characters Segment 。 contains: white space or punctuation marks \endverbatim -One important things that should be noted that each segment is defined -by a pair of boundaries and the rule of its ending point defines -if it is selected or not. -In some cases it may be not what we actually look like. +Note that rules are applied to the end boundary of a segment when deciding +whether to include a segment. In some cases this can cause unexpected behavior. -For example we have a text: +For example, consider the text: \verbatim Hello! How are you? \endverbatim -And we want to fetch all sentences from the text. +Suppose we want to fetch all sentences from the text. The \ref bl_boundary_sentence_rules "sentence rules" have two options: -- Split the text on the point where sentence terminator like ".!?" detected: \ref boost::locale::boundary::sentence_term "sentence_term" -- Split the text on the point where sentence separator like "line feed" detected: \ref boost::locale::boundary::sentence_sep "sentence_sep" +- Split the text where sentence terminator like ".!?" are detected: \ref boost::locale::boundary::sentence_term "sentence_term" +- Split the text where sentence separators such as "line feed" are detected: \ref boost::locale::boundary::sentence_sep "sentence_sep" Naturally to ignore sentence separators we would call \ref boost::locale::boundary::segment_index::rule(rule_type v) "segment_index::rule(rule_type v)" with sentence_term parameter and then run the iterator. @@ -225,18 +224,18 @@ for(ssegment_index::iterator it=map.begin(),e=map.end();it!=e;++it) std::cout << "Sentence [" << *it << "]" << std::endl; \endcode -However we would get the expected segments: +Would result in: \verbatim Sentence [Hello! ] Sentence [are you? ] \endverbatim -The reason is that "How\n" is still considered a sentence but selected by different -rule. +These (potentially unexpected) results occur because "How\n" is still considered +a sentence but is selected by a different rule. This behavior can be changed by setting \ref boost::locale::boundary::segment_index::full_select(bool) "segment_index::full_select(bool)" -to \c true. It would force iterator to join the current segment with all previous segments that may not fit the required rule. +to \c true. It will force the iterator to join the current segment with all previous segments even if they do not fit the required rule. So we add this line: @@ -255,17 +254,15 @@ are you? \subsection boundary_analysys_segments_search Locating Segments -Sometimes it is useful to find a segment that some specific iterator is pointing on. +Sometimes it is useful to find a segment that some specific iterator is pointing to. -For example a user had clicked at specific point, we want to select a word on this -location. +For example, suppose we want to find the word a user clicked on. \ref boost::locale::boundary::segment_index "segment_index" provides \ref boost::locale::boundary::segment_index::find() "find(base_iterator p)" member function for this purpose. -This function returns the iterator to the segmet such that \a p points to. - +This function returns an iterator to the segment that includes \a p. For example: @@ -285,41 +282,40 @@ be \note -if the iterator lays inside the segment this segment returned. If the segment does -not fit the selection rules, then the segment following requested position -is returned. +If the iterator is inside a segment, that segment is returned. If the segment does +not fit the selection rules, then the next segment following the requested position +that does fit the rules will be returned. For example: For \ref boost::locale::boundary::word "word" boundary analysis with \ref boost::locale::boundary::word_any "word_any" rule: - "t|o be or ", would point to "to" - the iterator in the middle of segment "to". -- "to |be or ", would point to "be" - the iterator at the beginning of the segment "be" -- "to| be or ", would point to "be" - the iterator does not point to segment with required rule so next valid segment is selected "be". -- "to be or| ", would point to end as not valid segment found. - +- "to |be or ", would point to "be" - the iterator at the beginning of the segment "be". +- "to| be or ", would point to "be" - the iterator is not pointing to a segment fitting the required rule, so next valid segment selected is "be". +- "to be or| ", would point to end as no valid segment can be found. \section boundary_analysys_break Iterating Over Boundary Points \section boundary_analysys_break_basics Basic Iteration The \ref boost::locale::boundary::boundary_point_index "boundary_point_index" is similar to -\ref boost::locale::boundary::segment_index "segment_index" in its interface but as a different role. -Instead of returning text chunks (\ref boost::locale::boundary::segment "segment"s), it returns +\ref boost::locale::boundary::segment_index "segment_index" in its interface but has a different role. +Instead of returning text chunks (\ref boost::locale::boundary::segment "segment"s, it returns a \ref boost::locale::boundary::boundary_point "boundary_point" object that -represents a position in text - a base iterator used that is used for +represents a position in text - a base iterator that is used for iteration of the source text C++ characters. The \ref boost::locale::boundary::boundary_point "boundary_point" object also provides a \ref boost::locale::boundary::boundary_point::rule() "rule()" member -function that defines a rule this boundary was selected according to. +function that returns why this boundary was selected, i.e. the matched rule. \note The beginning and the ending of the text are considered boundary points, so even an empty text consists of at least one boundary point. -Lets see an example of selecting first two sentences from a text: +Lets see an example of selecting the first two sentences from a text: \code using namespace boost::locale::boundary; boost::locale::generator gen; -// our text sample +// Our text sample std::string const text="First sentence. Second sentence! Third one?"; // Create an index sboundary_point_index map(sentence,text.begin(),text.end(),gen("en_US.UTF-8")); @@ -338,7 +334,7 @@ if(p!=e) { << std::endl; } else { - std::cout <<"There are less then two sentences in this " + std::cout <<"There are less than two sentences in this " <<"text: " << text << std::endl; }\endcode @@ -350,7 +346,7 @@ First two sentences are: First sentence. Second sentence! \section boundary_analysys_break_rules Using Rules -Similarly to the \ref boost::locale::boundary::segment_index "segment_index" the +Just like \ref boost::locale::boundary::segment_index "segment_index" the \ref boost::locale::boundary::boundary_point_index "boundary_point_index" provides a \ref boost::locale::boundary::boundary_point_index::rule(rule_type r) "rule(rule_type mask)" member function to filter boundary points that interest us. @@ -358,7 +354,7 @@ member function to filter boundary points that interest us. It allows to set \ref bl_boundary_word_rules "word", \ref bl_boundary_line_rules "line" and \ref bl_boundary_sentence_rules "sentence" rules for filtering boundary points. -Lets change an example above a little: +Lets change the example above a bit: \code // our text sample @@ -371,11 +367,11 @@ If we run our program as is on the sample above we would get: First two sentences are: First sentence. Second \endverbatim -Which is not something that we really expected. As the "Second\n" +Which is not really what we expected, because the "Second\n" is considered an independent sentence that was separated by a line separator "Line Feed". -However, we can set set a rule \ref boost::locale::boundary::sentence_term "sentence_term" +However, we can set set the rule \ref boost::locale::boundary::sentence_term "sentence_term" and the iterator would use only boundary points that are created by a sentence terminators like ".!?". @@ -391,8 +387,8 @@ First two sentences are: First sentence. Second sentence! \endverbatim -You can also use \ref boost::locale::boundary::boundary_point::rule() "boundary_point::rule()" member -function to learn about the reason this boundary point was created by comparing it with an appropriate +You can also use the \ref boost::locale::boundary::boundary_point::rule() "boundary_point::rule()" member +function to learn about the reason why this boundary point was created by comparing it with an appropriate mask. For example: @@ -431,27 +427,26 @@ sentence! Third one?|] \subsection boundary_analysys_break_search Locating Boundary Points -Sometimes it is useful to find a specific boundary point according to given +Sometimes it is useful to find a specific boundary point according to a given iterator. \ref boost::locale::boundary::boundary_point_index "boundary_point_index" provides a \ref boost::locale::boundary::boundary_point_index::find() "iterator find(base_iterator p)" member function. -It would return an iterator to a boundary point on \a p's location or at the -location following it if \a p does not point to appropriate position. +It returns a boundary point on \a p or at the location following \a p if \a p does not point to an appropriate position. For example, for word boundary analysis: - If a base iterator points to "to |be", then the returned boundary point would be "to |be" (same position) - If a base iterator points to "t|o be", then the returned boundary point would be "to| be" (next valid position) -For example if we want to select 6 words around specific boundary point we can use following code: +For example, if we want to select 6 words around a specific boundary point we can use following code: \code using namespace boost::locale::boundary; boost::locale::generator gen; -// our text sample +// Our text sample std::string const text= "To be or not to be, that is the question."; // Create a mapping @@ -459,35 +454,35 @@ sboundary_point_index map(word,text.begin(),text.end(),gen("en_US.UTF-8")); // Ignore wite space map.rule(word_any); -// define our arbitraty point +// Define our arbitrary point std::string::const_iterator pos = text.begin() + 12; // "no|t"; // Get the search range sboundary_point_index::iterator - begin =map.begin(), + begin = map.begin(), end = map.end(), it = map.find(pos); // find a boundary -// go 3 words backward +// Go 3 words backward for(int count = 0;count <3 && it!=begin; count ++) --it; // Save the start std::string::const_iterator start = *it; -// go 6 words forward +// Go 6 words forward for(int count = 0;count < 6 && it!=end; count ++) ++it; -// make sure we at valid position +// Make sure we are at a valid position if(it==end) --it; -// print the text +// Print the text std::cout << std::string(start,it->iterator()) << std::endl; \endcode -That would print: +This would print: \verbatim be or not to be, that diff --git a/doc/building_boost_locale.txt b/doc/building_boost_locale.txt index faf85c5..e48ff8b 100644 --- a/doc/building_boost_locale.txt +++ b/doc/building_boost_locale.txt @@ -5,7 +5,7 @@ // https://www.boost.org/LICENSE_1_0.txt /*! -\page building_boost_locale Building The library +\page building_boost_locale Building the Library - \ref building_boost_locale_bb - \ref bb_building_deps @@ -20,26 +20,25 @@ \subsection bb_building_deps Dependencies - ICU library 3.6 or above is strongly recommended -- If no ICU library is given, iconv support is required under POSIX platforms. +- If no ICU library is given, iconv support is required on POSIX platforms. \subsection bb_platform_opts Platform Notes - If you use Boost.Locale on Windows with MinGW/GCC < 4.5 you'll be - able to use static version only. Mingw/GCC prior to 4.5 have no - support of dynamic runtime linking.\n + able to use the static version only. Mingw/GCC prior to 4.5 has no + support for dynamic runtime linking.\n Using Boost.Locale DLL's with MinGW gcc also requires dynamic linking with the runtime libraries libstdc++ and libgcc. Some gcc builds use - static linking by default so make sure you use correct link options - with your compiler when you build your own programs. + static linking by default so make sure you use the correct link options. - The AIX's iconv misses important character sets that Boost.Locale requires, - so you need to either use GNU iconv or link with ICU library. -- If iconv library is not found on Darwin/Mac OS X builds make sure there - is no multiple iconv installations and provide -sICONV_PATH build option - to point to correct location of iconv library. + so you need to either use GNU iconv or link with the ICU library. +- If the iconv library is not found on Darwin/Mac OS X builds make sure there + are not multiple iconv installations and provide the -sICONV_PATH build option + to point to the correct location of the iconv library. \subsection bb_building_proc Building Process -Now all you need to do is invoke bjam command: +Now all you need to do is to invoke bjam: \verbatim ./bjam --with-locale stage @@ -50,8 +49,8 @@ Or on Windows .\bjam --with-locale stage \endverbatim -If you are using custom ICU build or you are using Microsoft Windows -you need to provide a path to location of ICU library using \c -sICU_PATH option +If you are using a custom ICU build or you are using Microsoft Windows, +you need to provide a path to the location of the ICU library using the \c -sICU_PATH option For example: @@ -82,42 +81,41 @@ For example: \endverbatim \note Don't forget to put both debug and release versions of ICU libraries in this path -when using Microsoft Visual Studio so Boost.Build will link correctly debug and release -versions of boost_locale library. +when using Microsoft Visual Studio so Boost.Build will correctly link debug and release +versions of Boost.Locale. \section bb_build_opts Build Options -Boost.Locale supports following options with values \c off or \c on +Boost.Locale supports the following options with values \c off or \c on -- \c boost.locale.icu=off disable build of ICU backend even if ICU library exists -- \c boost.locale.iconv=off or \c boost.locale.iconv=on enable or disable use of iconv - library. It is off by default on Windows and Solaris -- \c boost.locale.winapi=off - disable winapi backend, it is on by default on Windows and Cygwin -- \c boost.locale.std=off or \c boost.locale.winapi=on Disable or enable std backends. \c std backend +- \c boost.locale.icu=off prevents building the ICU backend even if the ICU library exists +- \c boost.locale.iconv enables or disables the iconv backend. It is off by default on Windows and Solaris +- \c boost.locale.winapi=off disables the winapi backend. It is on by default on Windows and Cygwin +- \c boost.locale.std enables or disables the std backend. The std backend is disabled by default when using Sun Studio. -- \c boost.locale.posix=on or \c boost.locale.posix=off Enable or disable support of POSIX backend, - it is on by default on Linux and Mac OS X +- \c boost.locale.posix enables or disables support of the POSIX backend. + It is on by default on Linux and Mac OS X -Also Boost.Locale supports following options +Also Boost.Locale supports the following options -- \c -sICU_PATH=/path/to/location/of/icu - the location of custom ICU library -- \c -sICONV_PATH=/path/to/location/of/iconv - the location of custom iconv library +- \c -sICU_PATH=/path/to/location/of/icu - the location of the ICU library +- \c -sICONV_PATH=/path/to/location/of/iconv - the location of the iconv library For example: -- Build the library on Windows with ICU backend only: +- Build the library on Windows with the ICU backend only: \verbatim .\bjam boost.locale.winapi=off boost.locale.std=off -sICU_PATH=c:\icu46 --with-locale stage \endverbatim -- Build the library on Linux with std backend only +- Build the library on Linux with the std backend only \verbatim .\bjam boost.locale.posix=off boost.locale.icu=off --with-locale stage \endverbatim \section bb_build_test Running Unit Tests -You can run unit tests by invoking \c bjam with \c libs/locale/test project parameter +You can run unit tests by invoking \c bjam with the \c libs/locale/test project parameter \verbatim ./bjam libs/locale/test \endverbatim @@ -127,10 +125,11 @@ You can run unit tests by invoking \c bjam with \c libs/locale/test project para Boost.Locale is built with binary compatibility in mind. Switching localization back ends on or off, or using iconv or not, does not affect binary compatibility. So if a dynamic library was built with all possible backends, other dynamic libraries compiled with, for example, only the \c std, \c posix -or \c winapi backends would still be binary-compatible with it. +or \c winapi backends would still be binary-compatible. -However this definitely has an effect on some features. For example, if you -try to use boundary analysis or a calendar facet when the library does not support the icu backend -you would get an exception. + +Using a feature not included in the binary will result in an exception. +For example, if you try to use boundary analysis or a calendar facet when the library does not support the ICU backend, +you will get an exception. */ diff --git a/doc/charset_handling.txt b/doc/charset_handling.txt index 0a7c6b6..b37eaa9 100644 --- a/doc/charset_handling.txt +++ b/doc/charset_handling.txt @@ -12,8 +12,7 @@ Boost.Locale provides \ref boost::locale::conv::to_utf() "to_utf", \ref boost::locale::conv::from_utf() "from_utf" and \ref boost::locale::conv::utf_to_utf() "utf_to_utf" functions in the \c boost::locale::conv namespace. They are simple and -convenient functions to convert a string to and from -UTF-8/16/32 strings and strings using other encodings. +convenient functions to convert between UTF-8/16/32 and other encodings. For example: @@ -25,14 +24,13 @@ std::string utf8_string2 = utf_to_utf(wide_string); \endcode -This function may use an explicit encoding name like "Latin1" or "ISO-8859-8", -or use std::locale as a parameter to fetch this information from it. -It also receives a policy parameter that tells it how to behave if the -conversion can't be performed (i.e. an illegal or unsupported character is found). -By default this function skips all illegal characters and tries to do the best it -can, however, it is possible ask it to throw -a \ref boost::locale::conv::conversion_error "conversion_error" exception -by passing the \c stop flag to it: +These functions accept an explicit encoding name like "Latin1" or "ISO-8859-8", +or a std::locale which is used to get the encoding. +They also accept a policy parameter that determines what happens if a conversion can't be performed +(i.e. an illegal or unsupported character is found). +By default, these functions skip all illegal characters and try to do the best they can. +However, these functions can throw a \ref boost::locale::conv::conversion_error "conversion_error" +when passed the \c stop flag: \code std::wstring s=to_utf("\xFF\xFF","UTF-8",stop); @@ -44,18 +42,18 @@ std::wstring s=to_utf("\xFF\xFF","UTF-8",stop); Boost.Locale provides stream codepage conversion facets based on the \c std::codecvt facet. This allows conversion between wide-character encodings and 8-bit encodings like UTF-8, ISO-8859 or Shift-JIS. -Most of compilers provide such facets, but: +Most compilers provide such facets, but: -- Under Windows MSVC does not support UTF-8 encodings at all. -- Under Linux the encodings are supported only if the required locales are generated. For example +- Windows MSVC does not support UTF-8 encodings at all. +- In Linux, the encodings are supported only if the required locales are generated. For example it may be impossible to create a \c he_IL.CP1255 locale even when the \c he_IL locale is available. -Thus Boost.Locale provides an option to generate code-page conversion facets for use with +Boost.Locale provides an option to generate code-page conversion facets for use with Boost.Iostreams filters or \c std::wfstream. For example: \code std::locale loc= generator().generate("he_IL.UTF-8"); - std::wofstream file. + std::wofstream file; file.imbue(loc); file.open("hello.txt"); file << L"שלום!" << endl; @@ -69,7 +67,7 @@ You can use the \c std::codecvt facet directly, but this is quite tricky and requires accurate buffer and error management. You can use the \c boost::iostreams::code_converter class for stream-oriented -conversions between the wide-character set and narrow locale character set. +conversions between the wide character set and narrow locale character set. This is a sample program that converts wide to narrow characters for an arbitrary stream: @@ -84,7 +82,7 @@ stream: namespace io = boost::iostreams; -// Device that consumes the converted text, +// Device that consumes the converted text // In our case it just writes to standard output class consumer { public: @@ -117,8 +115,8 @@ int main() converter_stream stream; stream.open(dev); // Now wide characters that are written - // to the stream would be given to - // our consumer as narrow characters + // to the stream will be given to + // our consumer as narrow characters // in UTF-8 encoding stream << L"שלום" << std::flush; } @@ -138,9 +136,7 @@ ISO-8859, and Shift-JIS, but not with stateful encodings like UTF-7 or SCSU. \note -The implementation of codecvt for single byte encodings like ISO-8859-X and for UTF-8 is very efficient -and would allow fast conversion of the content, however its performance may be sub-optimal for -double-width encodings like Shift-JIS, due to the stateless problem described above. - +The implementation of codecvt is very fast and efficient for single byte encodings like ISO-8859-X and UTF-8, +however its performance may be sub-optimal for double-width encodings like Shift-JIS, due to the stateless problem described above. */ diff --git a/doc/collation.txt b/doc/collation.txt index efdab31..d56d8b5 100644 --- a/doc/collation.txt +++ b/doc/collation.txt @@ -8,7 +8,7 @@ \page collation Collation Boost.Locale provides a \ref boost::locale::collator "collator" class, derived from \c std::collate, that adds support for -primary, secondary, tertiary, quaternary and identical comparison levels. They can be approximately defined as: +primary, secondary, tertiary, quaternary, and identical comparison levels. They can be approximately defined as: -# Primary -- ignore accents and character case, comparing base letters only. For example "facade" and "Façade" are the same. -# Secondary -- ignore character case but consider accents. "facade" and "façade" are different but "Façade" and "façade" are the same. @@ -16,7 +16,7 @@ primary, secondary, tertiary, quaternary and identical comparison levels. They c -# Quaternary -- consider all case, accents, and punctuation. The words must be identical in terms of Unicode representation. -# Identical -- as quaternary, but compare code points as well. -There are two ways of using the \ref boost::locale::collator "collator" facet: directly, by calling its member functions \ref boost::locale::collator::compare() "compare", \ref boost::locale::collator::transform() "transform" and \ref +There are two ways of using the \ref boost::locale::collator "collator" facet: directly: by calling its member functions \ref boost::locale::collator::compare() "compare", \ref boost::locale::collator::transform() "transform", and \ref boost::locale::collator::hash() "hash", or indirectly by using the \ref boost::locale::comparator "comparator" template class in STL algorithms. diff --git a/doc/conversions.txt b/doc/conversions.txt index 246be8e..6a957b7 100644 --- a/doc/conversions.txt +++ b/doc/conversions.txt @@ -7,9 +7,13 @@ /*! \page conversions Text Conversions -There is a set of functions that perform basic string conversion operations: -upper, lower and \ref term_title_case "title case" conversions, \ref term_case_folding "case folding" -and Unicode \ref term_normalization "normalization". These are \ref boost::locale::to_upper "to_upper" , \ref boost::locale::to_lower "to_lower", \ref boost::locale::to_title "to_title", \ref boost::locale::fold_case "fold_case" and \ref boost::locale::normalize "normalize". +Boost.Locale provides several functions for basic string manipulation: + +- \ref boost::locale::to_upper "to_upper": convert a string to upper case +- \ref boost::locale::to_upper "to_lower": convert a string to lower case +- \ref boost::locale::to_title "to_title": convert a string to title case +- \ref boost::locale::fold_case "fold_case": makes a string case-agnostic (see \ref term_case_folding "case folding") +- \ref boost::locale::normalize "normalize": convert equivalent code points to a consistent binary form (\ref term_normalization "normalization") All these functions receive an \c std::locale object as parameter or use a global locale by default. @@ -35,8 +39,8 @@ Title Grüßen Fold grüssen \endverbatim -You may notice that there are existing functions \c to_upper and \c to_lower in the Boost.StringAlgo library. -The difference is that these function operate over an entire string instead of performing incorrect character-by-character conversions. +There are existing functions \c to_upper and \c to_lower in the Boost.StringAlgo library, however the +Boost.Locale functions operate on an entire string instead of performing incorrect character-by-character conversions. For example: @@ -54,7 +58,7 @@ GRÜßEN GRÜSSEN Where a letter "ß" was not converted correctly to double-S in first case because of a limitation of \c std::ctype facet. This is even more problematic in case of UTF-8 encodings where non US-ASCII are not converted at all. -For example, this code +For example, this code: \code std::string grussen = "grüßen"; @@ -81,7 +85,7 @@ to \ref boost::locale::normalize() "normalize" function: - NFKD - Compatibility decomposition - boost::locale::norm_nfkd - NFKC - Compatibility decomposition followed by canonical composition - boost::locale::norm_nfkc -For more details on normalization forms, read this article. +For more details on normalization forms, read this report on unicode.org. \section conversions_notes Notes @@ -91,5 +95,5 @@ For more details on normalization forms, read I try to use some Boost.Locale functions and I get an \c std::bad_cast exception thrown? +- \anchor faq_bad_cast Some Boost.Locale functions throw \c std::bad_cast exception? \n \n - \b Answer: You probably try to use incorrect \c std::locale object. All Boost.Locale tools relay on \c std::locale object's facets. - The locale object should be generated with \ref boost::locale::generator "generator" class and then passed to - the function or alternatively global locale should be set using \c std::locale::global() function such that - global locale (and default created one) would have required facets to use. -- \anchor faq_number I had installed global locale and try to write something to stream but still get wrong output? + \b Answer: You probably try to use an incorrect \c std::locale object. All Boost.Locale tools rely on \c std::locale object's facets. + The locale object should be generated with the \ref boost::locale::generator "generator" class and then passed to + the function or alternatively global locale should be set using the \c std::locale::global() function such that + global locale (and default created one) would have the required facets. +- \anchor faq_number I have installed global locale, but when I try to write something to a stream I still get the wrong output? For example: \code #include @@ -29,8 +29,8 @@ Prints a number instead of a date. \n \b Answer: You forget to imbue the locale to the stream. Changing the global locale does not affect the - locale in existing \c iostream objects. Thus because \c std::out and other global streams were created - before changing the global locale Boost.Locale manipulators have no effect. You need to write: + locale in existing \c iostream objects. Thus, because \c std::out and other global streams were created + before changing the global locale, Boost.Locale manipulators have no effect. You need to write: \code #include #include diff --git a/doc/formatting_and_parsing.txt b/doc/formatting_and_parsing.txt index 8d6e36f..54ea20a 100644 --- a/doc/formatting_and_parsing.txt +++ b/doc/formatting_and_parsing.txt @@ -6,7 +6,7 @@ /*! -\page formatting_and_parsing Numbers, Time and Currency formatting and parsing +\page formatting_and_parsing Numbers, Time and Currency Formatting and Parsing All formatting and parsing is performed via the standard I/O streams. Each of the above information types is represented as a number. The formatting information is set using iostream manipulators. All manipulators are placed in the boost::locale::as namespace. @@ -21,16 +21,16 @@ For example: \endcode There is a special manipulator \c as::posix that "unsets" locale-specific settings and returns them to the default \c iostream formatting -and parsing methods. Please note, such formats may still be localized by the default \c std::num_put and \c std::num_get facets. +and parsing methods. Please note, such formats may still be localized by the default e.g. \c std::num_put and \c std::num_get facets. -\section numbers_formatting Numbers and number manipulators +\section numbers_formatting Numbers and Number Manipulators Here are the manipulators for number formatting: -- \c as::number -- format number according to local specifications, it takes into account various \c std::ios_base flags like scientific +- \c as::number -- format numbers according to local specifications. Takes into account various \c std::ios_base flags like scientific format and precision. \n -- \c as::percent -- format number as "percent" format. For example: +- \c as::percent -- format numbers as percents. For example: \code cout << as::percent << 0.25 <CppCMS project (where the Boost.Locale was developed originally) -- Download the a set of packages from MinGW project +- Download the package from CppCMS (where the Boost.Locale was developed originally) +- Download the a set of packages from the MinGW project - Build it on your own - Use Cygwin's packages -\section gettext_for_windows_cppcms Getting gettext utilities from CppCMS project +\section gettext_for_windows_cppcms Getting Gettext Utilities from CppCMS -Boost.Locale was developed for needs of CppCMS project -and thus CppCMS hosts a convince package for Windows users of pre-build, statically liked \c gettext +Boost.Locale was developed for needs of CppCMS +and thus CppCMS hosts a convenience package for Windows users of pre-built, statically liked \c gettext runtime utilities like \c xgettext, \c msgfmt, etc. -So you can download a zip file \c gettext-tools-static-XXX.zip from a CppCMS downloads page +You can download a zip file \c gettext-tools-static-XXX.zip from the CppCMS downloads page under boost_locale/gettext_for_windows. Extract the file and use the executable files inside. -\section gettext_for_windows_mingw Getting Gettext via MinGW project +\section gettext_for_windows_mingw Getting Gettext via MinGW MinGW project provides GNU tools for Windows, including GNU compilers and various runtime utilities. Thus you can always install full MinGW distribution including gettext tools. However, if you a want minimalistic runtime version that allows you to extract messages and create catalogs you need to download several packages manually. -In order to install Gettext via MinGW distributing you need to download, a GCC's runtime, -iconv library and gettext itself. +In order to install Gettext via MinGW you need to download a GCC runtime, +an iconv library and Gettext itself. -So visit a downloads page of MinGW project -and download following files (chose the latest versions of each package): +So visit MinGW's downloads page +and download the following files (chose the latest versions of each package): - From: \c MinGW/BaseSystem/GCC/Version4/gcc-xxx/ \n File: \c libgcc-xxx-mingw32-dll-1.tar.lzma @@ -55,16 +55,17 @@ For example, at June 23, 2011 it was: - \c gettext: \c libintl-0.17-1-mingw32-dll-8.tar.lzma, \c libgettextpo-0.17-1-mingw32-dll-0.tar.lzma and \c gettext-0.17-1-mingw32-dev.tar.lzma. After you download the packages, extract all the files to the same directory using tools like -\c 7zip and you'll get all the executables and \c dll's you need under \c bin subdirectory. +\c 7zip and you'll get all the executables and \c dll's you need under the \c bin subdirectory. -\note the version on MinGW site is slightly outdated (0.17.1) while gettext provides currently 0.18.1. +\note The version on MinGW site is slightly outdated (0.17.1) while Gettext provides currently 0.18.1. -\section gettext_for_windows_build Building latest version on your own. +\section gettext_for_windows_build Building the latest version on your own. -You can build your own version of GNU Gettext using MinGW environment, you'll need to have up-to-date gcc compiler -and the shell, you'll need to install iconv first and then build a gettext with it. +You can build your own version of GNU Gettext using the MinGW environment. +You'll need to have an up-to-date gcc compiler and the shell. +You'll need to install iconv first and then build Gettext with it. -Basic and simplest way would be to open a MinGW shell +The simplest way would be to open a MinGW shell. Build \c iconv: @@ -80,11 +81,10 @@ make make install \endcode -And now you have in c:\\mygettext\\bin all appropriate executable files -to use. +And now you have all appropriate executable files ready to use in c:\\mygettext\\bin. \section gettext_for_windows_cygwin Using Cygwin -If you already have Cygwin - just use gettext tools provided with it. +If you already have Cygwin - just use the Gettext tools provided with it. */ diff --git a/doc/localized_text_formatting.txt b/doc/localized_text_formatting.txt index 1466937..20cf79d 100644 --- a/doc/localized_text_formatting.txt +++ b/doc/localized_text_formatting.txt @@ -7,10 +7,10 @@ /*! \page localized_text_formatting Localized Text Formatting -The \c iostream manipulators are very useful, but when we create a messages for the user, sometimes we need something +The \c iostream manipulators are very useful, but when we create messages for the user, sometimes we need something like good old \c printf or \c boost::format. -Unfortunately \c boost::format has several limitations in context of localization: +Unfortunately \c boost::format has several limitations in the context of localization: -# It renders all parameters using global locale rather than target \c ostream locale. For example: \n @@ -20,18 +20,18 @@ Unfortunately \c boost::format has several limitations in context of localizatio output << boost::format("%1%") % 1234.345; \endcode \n - This would write "1,234.235" to output, instead of the "1.234,234" that is expected for "de_DE" locale --# It knows nothing about the new Boost.Locale manipulators. + This would write "1,234.235" to output, instead of the "1.234,234" that is expected for "de_DE" locale. +-# It knows nothing about the Boost.Locale manipulators. -# The \c printf-like syntax is very limited for formatting complex localized data, not allowing the formatting of dates, times, or currencies -Thus a new class, boost::locale::format, was introduced. For example: +Thus a new class, boost::locale::format, is introduced. For example: \code wcout << wformat(L"Today {1,date} I would meet {2} at home") % time(0) % name <CppCMS - C++ Web Framework project and then contributed to Boost. @@ -19,7 +19,7 @@ Boost.Locale gives powerful tools for development of cross platform localized software - the software that talks to user in its language. -Provided Features: +Features: - Correct case conversion, case folding and normalization. - Collation (sorting), including support for 4 Unicode @@ -34,17 +34,16 @@ Provided Features: - Powerful message formatting (string translation) including support for plural forms, using GNU catalogs. - Character set conversion. -- Transparent support for 8-bit character sets like Latin1 -- Support for \c char and \c wchar_t +- Transparent support for 8-bit character sets like Latin1. +- Support for \c char and \c wchar_t. - Experimental support for C++11 \c char16_t and \c char32_t strings and streams. -Boost.Locale enhances and unifies the standard library's API -the way it becomes useful and convenient for development -of cross platform and "cross-culture" software. +Boost.Local complements the standard library's API, making it easy +to write cross platform and "cross culture" software. -In order to achieve this goal Boost.Locale uses -the-state-of-the-art Unicode and Localization +In order to achieve this goal, Boost.Locale uses the +state-of-the-art Unicode and Localization library: ICU - International Components for Unicode. Boost.Locale creates the natural glue between the C++ locales @@ -53,8 +52,7 @@ framework, iostreams, and the powerful ICU library. Boost.Locale provides non-ICU based localization support as well. It is based on the operating system native API or on the standard C++ library support. Sacrificing some less important features, -Boost.Locale becomes less powerful but lighter and easier to deploy -and use library. +Boost.Locale becomes less powerful but lighter and easier to deploy. \section main_tutorial Tutorials diff --git a/doc/messages_formatting.txt b/doc/messages_formatting.txt index 5fc7366..9509cc2 100644 --- a/doc/messages_formatting.txt +++ b/doc/messages_formatting.txt @@ -210,10 +210,10 @@ int main() \subsection plural_forms Plural Forms -GNU Gettext catalogs have simple, robust and yet powerful plural forms support. We recommend to read the +GNU Gettext catalogs have simple, robust and yet powerful plural forms support. We recommend reading the original GNU documentation here. -Let's try to solve a simple problem, displaying a message to the user: +Let's try to solve a simple problem: displaying a message to the user. \code if(files == 1) @@ -244,7 +244,7 @@ For example, the Slavic language family has 3 plural forms, that can be chosen u plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2; \endcode -Such equation is stored in the message catalog itself and it is evaluated during translation to supply the correct form. +Such an equation is stored in the message catalog itself and it is evaluated during translation to supply the correct form. So the code above would display 3 different forms in Russian locale for values of 1, 3 and 5: @@ -401,7 +401,7 @@ xgettext --keyword=translate:1,1t --keyword=translate:1c,2,2t \ source_file_1.cpp ... source_file_N.cpp \endcode -Of course, if you do not use "gettext" like translation you +Of course, if you do not use "gettext"-like translations, you may ignore some of these parameters. \subsection custom_file_system_support Custom Filesystem Support diff --git a/doc/rationale.txt b/doc/rationale.txt index f796e99..7a3d943 100644 --- a/doc/rationale.txt +++ b/doc/rationale.txt @@ -18,9 +18,9 @@ - \ref why_abstract_api - \ref why_no_special_character_type -\section rationale_why Why is it needed? +\section rationale_why Why do I need Boost.Locale? -Why do we need a localization library, when standard C++ facets (should) provide most of the required functionality: +Why do we need a localization library, when standard C++ facets (should) provide most of the required functionality? - Case conversion is done using the \c std::ctype facet - Collation is supported by \c std::collate and has nice integration with \c std::locale @@ -28,11 +28,11 @@ Why do we need a localization library, when standard C++ facets (should) provide time, and currency formatting and parsing. - There is a \c std::messages class that supports localized message formatting. -So why do we need such library if we have all the functionality within the standard library? +So why do we need such a library if we have all the functionality within the standard library? Almost every(!) facet has design flaws: -- \c std::collate supports only one level of collation, not allowing you to choose whether case- or accent-sensitive comparisons +- \c std::collate supports only one level of collation; it does not allow you to choose whether case- or accent-sensitive comparisons should be performed. - \c std::ctype, which is responsible for case conversion, assumes that all conversions can be done on a per-character basis. This is @@ -176,17 +176,17 @@ to provide all the required information. - ICU fully understands POSIX locales and knows how to treat them correctly. - They are native locale names for most operating system APIs (with the exception of Windows) -\section why_linear_chunks Why most parts of Boost.Locale work only on linear/contiguous chunks of text +\section why_linear_chunks Why do most parts of Boost.Locale work only on linear/contiguous chunks of text? There are two reasons: - Boost.Locale relies heavily on the third-party APIs like ICU, POSIX or Win32 API, all of them work only on linear chunks of text, so providing non-linear API would just hide the - real situation and would not bring real performance advantage. + real situation and would hurt performance. - In fact, all known libraries that work with Unicode: ICU, Qt, Glib, Win32 API, POSIX API - and others accept an input as single linear chunk of text and there is a good reason for this: + and others accept an input as single linear chunks of text and there is a good reason for this: \n - -# Most of supported operations on text like collation, case handling usually work on small + -# Most supported operations on text like collation, case handling usually work on small chunks of text. For example: you probably would never want to compare two chapters of a book, but rather their titles. -# We should remember that even very large texts require quite a small amount of memory, for example @@ -201,7 +201,7 @@ However: on large chunks of text, will provide an interface for non-linear text handling. -\section why_abstract_api Why all Boost.Locale implementation is hidden behind abstract interfaces and does not use template metaprogramming? +\section why_abstract_api Why is all Boost.Locale implementation hidden behind abstract interfaces instead of using template metaprogramming? There are several major reasons: @@ -212,7 +212,7 @@ There are several major reasons: - This approach reduces compilation times significantly. This is very important for library that may be used in almost every part of specific program. -\section why_no_special_character_type Why Boost.Locale does not provide char16_t/char32_t for non-C++11 platforms. +\section why_no_special_character_type Why doesn't Boost.Locale provide char16_t/char32_t for non-C++11 platforms? There are several reasons: @@ -226,7 +226,7 @@ There are several reasons: These are exactly the reasons why Boost.Locale fails with current limited C++11 characters support on GCC-4.5 (the second reason) and MSVC-2010 (the first reason) -So basically it is impossible to use non-C++ characters with the C++'s locales framework. +Basically it is impossible to use non-C++ characters with the C++'s locales framework. The best and the most portable solution is to use the C++'s \c char type and UTF-8 encodings. diff --git a/doc/recommendations_and_myths.txt b/doc/recommendations_and_myths.txt index 6f58d17..ec45c11 100644 --- a/doc/recommendations_and_myths.txt +++ b/doc/recommendations_and_myths.txt @@ -33,7 +33,7 @@ on the platform, so they may be even less convenient when dealing with Unicode t \subsection myths_utf16 UTF-16 is the best encoding to work with. -There is common assumption that UTF-16 is the best encoding for storing information because it gives "shortest" representation +There is common assumption that UTF-16 is the best encoding for storing information because it gives the "shortest" representation of strings. In fact, it is probably the most error-prone encoding to work with. The biggest issue is code points that lay outside of the BMP, diff --git a/doc/running_examples_under_windows.txt b/doc/running_examples_under_windows.txt index 259ba7d..047fae2 100644 --- a/doc/running_examples_under_windows.txt +++ b/doc/running_examples_under_windows.txt @@ -7,13 +7,13 @@ /*! \page running_examples_under_windows Running Examples under Microsoft Windows -All of the examples that come with Boost.Locale are designed for UTF-8 and it is +All of the examples that come with Boost.Locale are designed for UTF-8; it is the default encoding used by Boost.Locale. However, the default narrow encoding under Microsoft Windows is not UTF-8 and -the output of the applications would not be displayed correctly in the console. +the output of the applications will not be displayed correctly in the console. -So in order to use UTF-8 encoding under the Windows console and see the output correctly, do the following: +In order to use UTF-8 encoding in the Windows console and see the output correctly, do the following: -# Open a \c cmd window -# Change the default font to a TrueType font: go to properties-\>font (right click on title-bar-\>properties-\>font) and diff --git a/doc/status_of_cpp0x_characters_support.txt b/doc/status_of_cpp0x_characters_support.txt index a5fc571..a3d8659 100644 --- a/doc/status_of_cpp0x_characters_support.txt +++ b/doc/status_of_cpp0x_characters_support.txt @@ -7,9 +7,9 @@ /*! \page status_of_cpp0x_characters_support Status of C++11 char16_t/char32_t support -The support of C++11 \c char16_t and \c char32_t is experimental, mostly does not work and not -intended to be used in production with current latest compilers: GCC-4.5, MSVC10 till major -compiler's flaws would be fixed. +The support of C++11 \c char16_t and \c char32_t is experimental, mostly does not work, and is not +intended to be used in production with the latest compilers: GCC-4.5, MSVC10 until major +compiler flaws are fixed. \section status_of_cpp0x_characters_support_gnu GNU GCC 4.5/C++11 Status diff --git a/examples/calendar.cpp b/examples/calendar.cpp index 57dd5bf..d9eb12b 100644 --- a/examples/calendar.cpp +++ b/examples/calendar.cpp @@ -43,7 +43,7 @@ int main() const int first = calendar().first_day_of_week(); - // Print weeks days + // Print week days for(int i = 0; i < 7; i++) { date_time tmp(now, period::day_of_week() * (first + i)); std::cout << format("{1,w=8,ftime='%a'} ") % tmp; diff --git a/examples/collate.cpp b/examples/collate.cpp index e7528dc..3b6673c 100644 --- a/examples/collate.cpp +++ b/examples/collate.cpp @@ -18,7 +18,7 @@ int main() std::locale::global(gen("")); /// Set global locale to requested - /// Create a set that includes all strings sorted according to ABC order + /// Create a set that includes all strings sorted in alphabetical order /// std::locale can be used as object for comparison typedef std::set set_type; set_type all_strings; diff --git a/examples/wboundary.cpp b/examples/wboundary.cpp index 5ebfed6..7b11ec1 100644 --- a/examples/wboundary.cpp +++ b/examples/wboundary.cpp @@ -47,9 +47,8 @@ int main() std::locale::global(loc); std::wcout.imbue(loc); - // This is needed to prevent C library to - // convert strings to narrow - // instead of C++ on some platforms + // This is needed to prevent the C stdio library from + // converting strings to narrow on some platforms std::ios_base::sync_with_stdio(false); std::wstring text = L"Hello World! あにま! Linux2.6 and Windows7 is word and number. שָלוֹם עוֹלָם!"; diff --git a/examples/wconversions.cpp b/examples/wconversions.cpp index 9d90cbd..f98a7a4 100644 --- a/examples/wconversions.cpp +++ b/examples/wconversions.cpp @@ -43,9 +43,8 @@ int main() std::locale::global(loc); std::wcout.imbue(loc); - // This is needed to prevent C library to - // convert strings to narrow - // instead of C++ on some platforms + // This is needed to prevent the C stdio library from + // converting strings to narrow on some platforms std::ios_base::sync_with_stdio(false); std::wcout << L"Correct case conversion can't be done by simple, character by character conversion\n"; diff --git a/examples/whello.cpp b/examples/whello.cpp index 314b91b..6012919 100644 --- a/examples/whello.cpp +++ b/examples/whello.cpp @@ -18,9 +18,8 @@ int main() std::locale::global(loc); std::wcout.imbue(loc); - // This is needed to prevent C library to - // convert strings to narrow - // instead of C++ on some platforms + // This is needed to prevent the C stdio library from + // converting strings to narrow on some platforms std::ios_base::sync_with_stdio(false); std::wcout << wformat(L"Today {1,date} at {1,time} we had run our first localization example") % time(0) diff --git a/include/boost/locale/boundary/boundary_point.hpp b/include/boost/locale/boundary/boundary_point.hpp index c0971ec..494352a 100644 --- a/include/boost/locale/boundary/boundary_point.hpp +++ b/include/boost/locale/boundary/boundary_point.hpp @@ -20,18 +20,18 @@ namespace boost { namespace locale { namespace boundary { /// It represents a pair - an iterator and a rule that defines this /// point. /// - /// This type of object is dereference by the iterators of boundary_point_index. Using a rule() + /// This type of object is dereferenced by the iterators of boundary_point_index. Using a rule() /// member function you can get the reason why this specific boundary point was selected. /// - /// For example, When you use a sentence boundary analysis, the (rule() & \ref sentence_term) != 0 means + /// For example, when you use sentence boundary analysis, the (rule() & \ref sentence_term) != 0 means /// that this boundary point was selected because a sentence terminator (like .?!) was spotted /// and the (rule() & \ref sentence_sep)!=0 means that a separator like line feed or carriage /// return was observed. /// /// \note /// - /// - The beginning of analyzed range is always considered a boundary point and its rule is always 0. - /// - when using a word boundary analysis the returned rule relates to a chunk of text preceding + /// - The beginning of the analyzed range is always considered a boundary point and its rule is always 0. + /// - When using word boundary analysis, the returned rule relates to a chunk of text preceding /// this point. /// /// \see diff --git a/include/boost/locale/boundary/facets.hpp b/include/boost/locale/boundary/facets.hpp index 5504e1d..4d443c9 100644 --- a/include/boost/locale/boundary/facets.hpp +++ b/include/boost/locale/boundary/facets.hpp @@ -24,13 +24,13 @@ namespace boost { namespace locale { /// /// @{ - /// \brief This structure is used for representing boundary point - /// that follows the offset. + /// \brief This structure is used for representing boundary points + /// that follow the offset. struct break_info { /// Create empty break point at beginning break_info() : offset(0), rule(0) {} - /// Create empty break point at offset v. + /// Create an empty break point at offset v. /// it is useful for order comparison with other points. break_info(size_t v) : offset(v), rule(0) {} diff --git a/include/boost/locale/boundary/index.hpp b/include/boost/locale/boundary/index.hpp index d552769..7d2eb66 100644 --- a/include/boost/locale/boundary/index.hpp +++ b/include/boost/locale/boundary/index.hpp @@ -31,7 +31,7 @@ namespace boost { namespace locale { namespace boundary { /// /// \defgroup boundary Boundary Analysis /// - /// This module contains all operations required for %boundary analysis of text: character, word, like and sentence + /// This module contains all operations required for %boundary analysis of text: character, word, line and sentence /// boundaries /// /// @{ @@ -753,7 +753,7 @@ namespace boost { namespace locale { namespace boundary { /// Create a boundary_point_index from a \ref segment_index. It copies all indexing information /// and uses the default rule (all possible %boundary points) /// - /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text + /// This operation is very cheap, so if you use boundary_point_index and segment_index on the same text /// range it is much better to create one from another rather then indexing the same /// range twice. /// @@ -762,7 +762,7 @@ namespace boost { namespace locale { namespace boundary { /// Copy a boundary_point_index from a \ref segment_index. It copies all indexing information /// and keeps the current \ref rule() unchanged /// - /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text + /// This operation is very cheap, so if you use boundary_point_index and segment_index on the same text /// range it is much better to create one from another rather then indexing the same /// range twice. /// diff --git a/include/boost/locale/boundary/types.hpp b/include/boost/locale/boundary/types.hpp index ce8631c..9fac23b 100644 --- a/include/boost/locale/boundary/types.hpp +++ b/include/boost/locale/boundary/types.hpp @@ -17,7 +17,7 @@ namespace boost { namespace locale { - /// \brief This namespase contains all operations required for boundary analysis of text + /// \brief This namespace contains all operations required for boundary analysis of text namespace boundary { /// \defgroup boundary Boundary Analysis /// diff --git a/include/boost/locale/generic_codecvt.hpp b/include/boost/locale/generic_codecvt.hpp index c2f34cf..0ad94ba 100644 --- a/include/boost/locale/generic_codecvt.hpp +++ b/include/boost/locale/generic_codecvt.hpp @@ -238,7 +238,7 @@ namespace boost { namespace locale { // mbstate_t is POD type and should be initialized to 0 (i.a. state = stateT()) // according to standard. We use it to keep a flag 0/1 for surrogate pair writing // - // if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observerd + // if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observed // and first pair is written, but no input consumed boost::uint16_t& state = *reinterpret_cast(&std_state); typename CodecvtImpl::state_type cvt_state = @@ -269,7 +269,7 @@ namespace boost { namespace locale { } else { // for other codepoints we do following // - // 1. We can't consume our input as we may find ourselfs + // 1. We can't consume our input as we may find ourselves // in state where all input consumed but not all output written,i.e. only // 1st pair is written // 2. We only write first pair and mark this in the state, we also revert back @@ -320,7 +320,7 @@ namespace boost { namespace locale { // according to standard. We assume that sizeof(mbstate_t) >=2 in order // to be able to store first observed surrogate pair // - // State: state!=0 - a first surrogate pair was observerd (state = first pair), + // State: state!=0 - a first surrogate pair was observed (state = first pair), // we expect the second one to come and then zero the state boost::uint16_t& state = *reinterpret_cast(&std_state); typename CodecvtImpl::state_type cvt_state = @@ -472,7 +472,7 @@ namespace boost { namespace locale { // mbstate_t is POD type and should be initialized to 0 (i.a. state = stateT()) // according to standard. We use it to keep a flag 0/1 for surrogate pair writing // - // if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observerd + // if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observed // and first pair is written, but no input consumed auto cvt_state = implementation().initial_state(generic_codecvt_base::to_unicode_state); while(to < to_end && from < from_end) { diff --git a/include/boost/locale/localization_backend.hpp b/include/boost/locale/localization_backend.hpp index 808b6e4..a4b79bf 100644 --- a/include/boost/locale/localization_backend.hpp +++ b/include/boost/locale/localization_backend.hpp @@ -36,8 +36,8 @@ namespace boost { namespace locale { /// -# \c message_path - path to the location of message catalogs (vector of strings) /// -# \c message_application - the name of applications that use message catalogs (vector of strings) /// - /// Each backend can be installed with a different default priotiry so when you work with two different backends, - /// you can specify priotiry so this backend will be chosen according to their priority. + /// Each backend can be installed with a different default priority so when you work with two different backends, + /// you can specify priority so this backend will be chosen according to their priority. class BOOST_LOCALE_DECL localization_backend { protected: localization_backend(const localization_backend&) = default; diff --git a/include/boost/locale/message.hpp b/include/boost/locale/message.hpp index eefe695..690c224 100644 --- a/include/boost/locale/message.hpp +++ b/include/boost/locale/message.hpp @@ -97,7 +97,7 @@ namespace boost { namespace locale { namespace detail { inline bool is_us_ascii_char(char c) { - // works for null terminated strings regardless char "signness" + // works for null terminated strings regardless char "signedness" return 0 < c && c < 0x7F; } inline bool is_us_ascii_string(const char* msg) diff --git a/include/boost/locale/util.hpp b/include/boost/locale/util.hpp index b7228e0..0ef67ef 100644 --- a/include/boost/locale/util.hpp +++ b/include/boost/locale/util.hpp @@ -16,7 +16,7 @@ #include namespace boost { namespace locale { - /// \brief This namespace provides various utility function useful for Boost.Locale backends + /// \brief This namespace provides various utility function useful for Boost.Locale's backends /// implementations namespace util { @@ -205,7 +205,7 @@ namespace boost { namespace locale { /// This function installs codecvt that can be used for conversion between single byte /// character encodings like ISO-8859-1, koi8-r, windows-1255 and Unicode code points, /// - /// Throws boost::locale::conv::invalid_charset_error if the chacater set is not supported or isn't single byte + /// Throws boost::locale::conv::invalid_charset_error if the character set is not supported or isn't single byte /// character set BOOST_LOCALE_DECL std::locale create_simple_codecvt(const std::locale& in, const std::string& encoding, char_facet_t type);