mirror of
https://github.com/boostorg/locale.git
synced 2026-01-19 04:22:08 +00:00
spelling and grammar fixes
This commit is contained in:
committed by
Alexander Grund
parent
49a338d378
commit
6f36132f1d
@@ -194,8 +194,8 @@ explicit has_icu ;
|
||||
|
||||
|
||||
# This function is called whenever the 'boost_locale' metatarget
|
||||
# below is generated and figures what external components we have,
|
||||
# what user wants, and what sources have to be compiled in the end.
|
||||
# below is generated and figures out what external components we have,
|
||||
# what the user wants, and what sources have to be compiled in the end.
|
||||
rule configure-full ( properties * : flags-only )
|
||||
{
|
||||
|
||||
|
||||
@@ -21,28 +21,29 @@
|
||||
\section boundary_analysys_basics Basics
|
||||
|
||||
Boost.Locale provides a boundary analysis tool, allowing you to split text into characters,
|
||||
words, or sentences, and find appropriate places for line breaks.
|
||||
words, or sentences. It is commonly used to find appropriate places for line breaks.
|
||||
|
||||
\note This task is not a trivial task.
|
||||
\note This is not a trivial task!
|
||||
\par
|
||||
A Unicode code point and a character are not equivalent, for example:
|
||||
Hebrew word Shalom - "שָלוֹם" that consists of 4 characters and 6 code points (4 base letters and 2 diacritical marks)
|
||||
A Unicode code point and a character are not equivalent.
|
||||
For example, the Hebrew word Shalom - "שָלוֹם" - consists of 4 characters and
|
||||
6 code points (4 base letters and 2 diacritical marks).
|
||||
\par
|
||||
Words may not be separated by space characters in some languages like in Japanese or Chinese.
|
||||
|
||||
Boost.Locale provides 2 major classes for boundary analysis:
|
||||
|
||||
- \ref boost::locale::boundary::segment_index - an object that holds an index of segments in the text (like words, characters,
|
||||
sentences). It provides an access to \ref boost::locale::boundary::segment "segment" objects via iterators.
|
||||
- \ref boost::locale::boundary::boundary_point_index - an object that holds an index of boundary points in the text.
|
||||
It allows to iterate over the \ref boost::locale::boundary::boundary_point "boundary_point" objects.
|
||||
- \ref boost::locale::boundary::segment_index - an object that holds the index of segments in text (like words, characters,
|
||||
sentences). It provides access to \ref boost::locale::boundary::segment "segment" objects via iterators.
|
||||
- \ref boost::locale::boundary::boundary_point_index - an object that holds the index of boundary points in text.
|
||||
It can iterate over the \ref boost::locale::boundary::boundary_point "boundary_point" objects.
|
||||
|
||||
Each of the classes above use an iterator type as template parameter.
|
||||
Both of these classes accept in their constructor:
|
||||
Both of these classes accept in their constructors:
|
||||
|
||||
- A flag that defines boundary analysis \ref boost::locale::boundary::boundary_type "boundary_type".
|
||||
- The pair of iterators that define the text range that should be analysed
|
||||
- A locale parameter (if not given the global one is used)
|
||||
- The pair of iterators that define the text range to be analysed
|
||||
- A locale parameter (if not given, the global one is used)
|
||||
|
||||
For example:
|
||||
\code
|
||||
@@ -52,13 +53,13 @@ std::locale loc = ... ;
|
||||
ba::segment_index<std::string::const_iterator> map(ba::word,text.begin(),text.end(),loc);
|
||||
\endcode
|
||||
|
||||
Each of them provide a members \c begin(), \c end() and \c find() that allow to iterate
|
||||
Each class implements members \c begin(), \c end() and \c find() making it possible to iterate
|
||||
over the selected segments or boundaries in the text or find a location of a segment or
|
||||
boundary for given iterator.
|
||||
boundary for a given iterator.
|
||||
|
||||
|
||||
Convenience a typedefs like \ref boost::locale::boundary::ssegment_index "ssegment_index"
|
||||
or \ref boost::locale::boundary::wcboundary_point_index "wcboundary_point_index" provided as well,
|
||||
Convenience typedefs like \ref boost::locale::boundary::ssegment_index "ssegment_index"
|
||||
or \ref boost::locale::boundary::wcboundary_point_index "wcboundary_point_index" are provided as well,
|
||||
where "w", "u16" and "u32" prefixes define a character type \c wchar_t,
|
||||
\c char16_t and \c char32_t and "c" and "s" prefixes define whether <tt>std::basic_string<CharType>::const_iterator</tt>
|
||||
or <tt>CharType const *</tt> are used.
|
||||
@@ -80,7 +81,7 @@ For example:
|
||||
using namespace boost::locale::boundary;
|
||||
boost::locale::generator gen;
|
||||
std::string text="To be or not to be, that is the question."
|
||||
// Create mapping of text for token iterator using global locale.
|
||||
// Create mapping of text for token iterator using the global locale.
|
||||
ssegment_index map(word,text.begin(),text.end(),gen("en_US.UTF-8"));
|
||||
// Print all "words" -- chunks of word boundary
|
||||
for(ssegment_index::iterator it=map.begin(),e=map.end();it!=e;++it)
|
||||
@@ -95,15 +96,15 @@ Would print:
|
||||
\endverbatim
|
||||
|
||||
This sentence "生きるか死ぬか、それが問題だ。" (<a href="http://tatoeba.org/eng/sentences/show/868189">from Tatoeba database</a>)
|
||||
would be split into following segments in \c ja_JP.UTF-8 (Japanese) locale:
|
||||
would be split into following segments in the \c ja_JP.UTF-8 (Japanese) locale:
|
||||
|
||||
\verbatim
|
||||
"生", "きるか", "死", "ぬか", "、", "それが", "問題", "だ", "。",
|
||||
\endverbatim
|
||||
|
||||
The boundary analysis that is done by Boost.Locale
|
||||
is much more complicated then just splitting the text according
|
||||
to white space characters, even thou it is not perfect.
|
||||
is much more complicated than just splitting the text according
|
||||
to white space characters, although it is not always perfect.
|
||||
|
||||
|
||||
\section boundary_analysys_segments_rules Using Rules
|
||||
@@ -111,12 +112,12 @@ to white space characters, even thou it is not perfect.
|
||||
The segments selection can be customized using \ref boost::locale::boundary::segment_index::rule(rule_type) "rule()" and
|
||||
\ref boost::locale::boundary::segment_index::full_select(bool) "full_select()" member functions.
|
||||
|
||||
By default segment_index's iterator return each text segment defined by two boundary points regardless
|
||||
By default, segment_index's iterator returns each text segment defined by two boundary points regardless
|
||||
the way they were selected. Thus in the example above we could see text segments like "." or " "
|
||||
that were selected as words.
|
||||
|
||||
Using a \c rule() member function we can specify a binary mask of rules we want to use for selection of
|
||||
the boundary points using \ref bl_boundary_word_rules "word", \ref bl_boundary_line_rules "line"
|
||||
boundary points using \ref bl_boundary_word_rules "word", \ref bl_boundary_line_rules "line"
|
||||
and \ref bl_boundary_sentence_rules "sentence" boundary rules.
|
||||
|
||||
For example, by calling
|
||||
@@ -133,7 +134,7 @@ So the code:
|
||||
\code
|
||||
using namespace boost::locale::boundary;
|
||||
std::string text="To be or not to be, that is the question."
|
||||
// Create mapping of text for token iterator using global locale.
|
||||
// Create mapping of text for token iterator using the global locale.
|
||||
ssegment_index map(word,text.begin(),text.end());
|
||||
// Define a rule
|
||||
map.rule(word_any);
|
||||
@@ -149,14 +150,14 @@ Would print:
|
||||
"To", "be", "or", "not", "to", "be", "that", "is", "the", "question",
|
||||
\endverbatim
|
||||
|
||||
And the for given text="生きるか死ぬか、それが問題だ。" and rule(\ref boost::locale::boundary::word_ideo "word_ideo"), the example above would print.
|
||||
And the for given text="生きるか死ぬか、それが問題だ。" and rule(\ref boost::locale::boundary::word_ideo "word_ideo"), the example above would print:
|
||||
|
||||
\verbatim
|
||||
"生", "死", "問題",
|
||||
\endverbatim
|
||||
|
||||
You can access specific rules the segments where selected it using \ref boost::locale::boundary::segment::rule() "segment::rule()" member
|
||||
function. Using a bit-mask of rules.
|
||||
You can determine why a segment was selected by using the \ref boost::locale::boundary::segment::rule() "segment::rule()" member
|
||||
function. The return value is a bit-mask of rules.
|
||||
|
||||
For example:
|
||||
|
||||
@@ -177,7 +178,7 @@ for(ssegment_index::iterator it=map.begin(),e=map.end();it!=e;++it) {
|
||||
}
|
||||
\endcode
|
||||
|
||||
Would print
|
||||
Would print:
|
||||
|
||||
\verbatim
|
||||
Segment 生 contains: ideographic characters
|
||||
@@ -191,25 +192,23 @@ Segment だ contains: kana characters
|
||||
Segment 。 contains: white space or punctuation marks
|
||||
\endverbatim
|
||||
|
||||
One important things that should be noted that each segment is defined
|
||||
by a pair of boundaries and the rule of its ending point defines
|
||||
if it is selected or not.
|
||||
|
||||
In some cases it may be not what we actually look like.
|
||||
Note that rules are applied to the end boundary of a segment when deciding
|
||||
whether to include a segment. In some cases this can cause unexpected behavior.
|
||||
|
||||
For example we have a text:
|
||||
For example, consider the text:
|
||||
|
||||
\verbatim
|
||||
Hello! How
|
||||
are you?
|
||||
\endverbatim
|
||||
|
||||
And we want to fetch all sentences from the text.
|
||||
Suppose we want to fetch all sentences from the text.
|
||||
|
||||
The \ref bl_boundary_sentence_rules "sentence rules" have two options:
|
||||
|
||||
- Split the text on the point where sentence terminator like ".!?" detected: \ref boost::locale::boundary::sentence_term "sentence_term"
|
||||
- Split the text on the point where sentence separator like "line feed" detected: \ref boost::locale::boundary::sentence_sep "sentence_sep"
|
||||
- Split the text where sentence terminator like ".!?" are detected: \ref boost::locale::boundary::sentence_term "sentence_term"
|
||||
- Split the text where sentence separators such as "line feed" are detected: \ref boost::locale::boundary::sentence_sep "sentence_sep"
|
||||
|
||||
Naturally to ignore sentence separators we would call \ref boost::locale::boundary::segment_index::rule(rule_type v) "segment_index::rule(rule_type v)"
|
||||
with sentence_term parameter and then run the iterator.
|
||||
@@ -225,18 +224,18 @@ for(ssegment_index::iterator it=map.begin(),e=map.end();it!=e;++it)
|
||||
std::cout << "Sentence [" << *it << "]" << std::endl;
|
||||
\endcode
|
||||
|
||||
However we would get the expected segments:
|
||||
Would result in:
|
||||
\verbatim
|
||||
Sentence [Hello! ]
|
||||
Sentence [are you?
|
||||
]
|
||||
\endverbatim
|
||||
|
||||
The reason is that "How\n" is still considered a sentence but selected by different
|
||||
rule.
|
||||
These (potentially unexpected) results occur because "How\n" is still considered
|
||||
a sentence but is selected by a different rule.
|
||||
|
||||
This behavior can be changed by setting \ref boost::locale::boundary::segment_index::full_select(bool) "segment_index::full_select(bool)"
|
||||
to \c true. It would force iterator to join the current segment with all previous segments that may not fit the required rule.
|
||||
to \c true. It will force the iterator to join the current segment with all previous segments even if they do not fit the required rule.
|
||||
|
||||
So we add this line:
|
||||
|
||||
@@ -255,17 +254,15 @@ are you?
|
||||
|
||||
\subsection boundary_analysys_segments_search Locating Segments
|
||||
|
||||
Sometimes it is useful to find a segment that some specific iterator is pointing on.
|
||||
Sometimes it is useful to find a segment that some specific iterator is pointing to.
|
||||
|
||||
For example a user had clicked at specific point, we want to select a word on this
|
||||
location.
|
||||
For example, suppose we want to find the word a user clicked on.
|
||||
|
||||
\ref boost::locale::boundary::segment_index "segment_index" provides
|
||||
\ref boost::locale::boundary::segment_index::find() "find(base_iterator p)"
|
||||
member function for this purpose.
|
||||
|
||||
This function returns the iterator to the segmet such that \a p points to.
|
||||
|
||||
This function returns an iterator to the segment that includes \a p.
|
||||
|
||||
For example:
|
||||
|
||||
@@ -285,41 +282,40 @@ be
|
||||
|
||||
\note
|
||||
|
||||
if the iterator lays inside the segment this segment returned. If the segment does
|
||||
not fit the selection rules, then the segment following requested position
|
||||
is returned.
|
||||
If the iterator is inside a segment, that segment is returned. If the segment does
|
||||
not fit the selection rules, then the next segment following the requested position
|
||||
that does fit the rules will be returned.
|
||||
|
||||
For example: For \ref boost::locale::boundary::word "word" boundary analysis with \ref boost::locale::boundary::word_any "word_any" rule:
|
||||
|
||||
- "t|o be or ", would point to "to" - the iterator in the middle of segment "to".
|
||||
- "to |be or ", would point to "be" - the iterator at the beginning of the segment "be"
|
||||
- "to| be or ", would point to "be" - the iterator does not point to segment with required rule so next valid segment is selected "be".
|
||||
- "to be or| ", would point to end as not valid segment found.
|
||||
|
||||
- "to |be or ", would point to "be" - the iterator at the beginning of the segment "be".
|
||||
- "to| be or ", would point to "be" - the iterator is not pointing to a segment fitting the required rule, so next valid segment selected is "be".
|
||||
- "to be or| ", would point to end as no valid segment can be found.
|
||||
|
||||
\section boundary_analysys_break Iterating Over Boundary Points
|
||||
\section boundary_analysys_break_basics Basic Iteration
|
||||
|
||||
The \ref boost::locale::boundary::boundary_point_index "boundary_point_index" is similar to
|
||||
\ref boost::locale::boundary::segment_index "segment_index" in its interface but as a different role.
|
||||
Instead of returning text chunks (\ref boost::locale::boundary::segment "segment"s), it returns
|
||||
\ref boost::locale::boundary::segment_index "segment_index" in its interface but has a different role.
|
||||
Instead of returning text chunks (\ref boost::locale::boundary::segment "segment"s, it returns a
|
||||
\ref boost::locale::boundary::boundary_point "boundary_point" object that
|
||||
represents a position in text - a base iterator used that is used for
|
||||
represents a position in text - a base iterator that is used for
|
||||
iteration of the source text C++ characters.
|
||||
The \ref boost::locale::boundary::boundary_point "boundary_point" object
|
||||
also provides a \ref boost::locale::boundary::boundary_point::rule() "rule()" member
|
||||
function that defines a rule this boundary was selected according to.
|
||||
function that returns why this boundary was selected, i.e. the matched rule.
|
||||
|
||||
\note The beginning and the ending of the text are considered boundary points, so even
|
||||
an empty text consists of at least one boundary point.
|
||||
|
||||
Lets see an example of selecting first two sentences from a text:
|
||||
Lets see an example of selecting the first two sentences from a text:
|
||||
|
||||
\code
|
||||
using namespace boost::locale::boundary;
|
||||
boost::locale::generator gen;
|
||||
|
||||
// our text sample
|
||||
// Our text sample
|
||||
std::string const text="First sentence. Second sentence! Third one?";
|
||||
// Create an index
|
||||
sboundary_point_index map(sentence,text.begin(),text.end(),gen("en_US.UTF-8"));
|
||||
@@ -338,7 +334,7 @@ if(p!=e) {
|
||||
<< std::endl;
|
||||
}
|
||||
else {
|
||||
std::cout <<"There are less then two sentences in this "
|
||||
std::cout <<"There are less than two sentences in this "
|
||||
<<"text: " << text << std::endl;
|
||||
}\endcode
|
||||
|
||||
@@ -350,7 +346,7 @@ First two sentences are: First sentence. Second sentence!
|
||||
|
||||
\section boundary_analysys_break_rules Using Rules
|
||||
|
||||
Similarly to the \ref boost::locale::boundary::segment_index "segment_index" the
|
||||
Just like \ref boost::locale::boundary::segment_index "segment_index" the
|
||||
\ref boost::locale::boundary::boundary_point_index "boundary_point_index" provides
|
||||
a \ref boost::locale::boundary::boundary_point_index::rule(rule_type r) "rule(rule_type mask)"
|
||||
member function to filter boundary points that interest us.
|
||||
@@ -358,7 +354,7 @@ member function to filter boundary points that interest us.
|
||||
It allows to set \ref bl_boundary_word_rules "word", \ref bl_boundary_line_rules "line"
|
||||
and \ref bl_boundary_sentence_rules "sentence" rules for filtering boundary points.
|
||||
|
||||
Lets change an example above a little:
|
||||
Lets change the example above a bit:
|
||||
|
||||
\code
|
||||
// our text sample
|
||||
@@ -371,11 +367,11 @@ If we run our program as is on the sample above we would get:
|
||||
First two sentences are: First sentence. Second
|
||||
\endverbatim
|
||||
|
||||
Which is not something that we really expected. As the "Second\n"
|
||||
Which is not really what we expected, because the "Second\n"
|
||||
is considered an independent sentence that was separated by
|
||||
a line separator "Line Feed".
|
||||
|
||||
However, we can set set a rule \ref boost::locale::boundary::sentence_term "sentence_term"
|
||||
However, we can set set the rule \ref boost::locale::boundary::sentence_term "sentence_term"
|
||||
and the iterator would use only boundary points that are created
|
||||
by a sentence terminators like ".!?".
|
||||
|
||||
@@ -391,8 +387,8 @@ First two sentences are: First sentence. Second
|
||||
sentence!
|
||||
\endverbatim
|
||||
|
||||
You can also use \ref boost::locale::boundary::boundary_point::rule() "boundary_point::rule()" member
|
||||
function to learn about the reason this boundary point was created by comparing it with an appropriate
|
||||
You can also use the \ref boost::locale::boundary::boundary_point::rule() "boundary_point::rule()" member
|
||||
function to learn about the reason why this boundary point was created by comparing it with an appropriate
|
||||
mask.
|
||||
|
||||
For example:
|
||||
@@ -431,27 +427,26 @@ sentence! Third one?|]
|
||||
|
||||
\subsection boundary_analysys_break_search Locating Boundary Points
|
||||
|
||||
Sometimes it is useful to find a specific boundary point according to given
|
||||
Sometimes it is useful to find a specific boundary point according to a given
|
||||
iterator.
|
||||
|
||||
\ref boost::locale::boundary::boundary_point_index "boundary_point_index" provides
|
||||
a \ref boost::locale::boundary::boundary_point_index::find() "iterator find(base_iterator p)" member
|
||||
function.
|
||||
|
||||
It would return an iterator to a boundary point on \a p's location or at the
|
||||
location following it if \a p does not point to appropriate position.
|
||||
It returns a boundary point on \a p or at the location following \a p if \a p does not point to an appropriate position.
|
||||
|
||||
For example, for word boundary analysis:
|
||||
|
||||
- If a base iterator points to "to |be", then the returned boundary point would be "to |be" (same position)
|
||||
- If a base iterator points to "t|o be", then the returned boundary point would be "to| be" (next valid position)
|
||||
|
||||
For example if we want to select 6 words around specific boundary point we can use following code:
|
||||
For example, if we want to select 6 words around a specific boundary point we can use following code:
|
||||
|
||||
\code
|
||||
using namespace boost::locale::boundary;
|
||||
boost::locale::generator gen;
|
||||
// our text sample
|
||||
// Our text sample
|
||||
std::string const text= "To be or not to be, that is the question.";
|
||||
|
||||
// Create a mapping
|
||||
@@ -459,35 +454,35 @@ sboundary_point_index map(word,text.begin(),text.end(),gen("en_US.UTF-8"));
|
||||
// Ignore wite space
|
||||
map.rule(word_any);
|
||||
|
||||
// define our arbitraty point
|
||||
// Define our arbitrary point
|
||||
std::string::const_iterator pos = text.begin() + 12; // "no|t";
|
||||
|
||||
// Get the search range
|
||||
sboundary_point_index::iterator
|
||||
begin =map.begin(),
|
||||
begin = map.begin(),
|
||||
end = map.end(),
|
||||
it = map.find(pos); // find a boundary
|
||||
|
||||
// go 3 words backward
|
||||
// Go 3 words backward
|
||||
for(int count = 0;count <3 && it!=begin; count ++)
|
||||
--it;
|
||||
|
||||
// Save the start
|
||||
std::string::const_iterator start = *it;
|
||||
|
||||
// go 6 words forward
|
||||
// Go 6 words forward
|
||||
for(int count = 0;count < 6 && it!=end; count ++)
|
||||
++it;
|
||||
|
||||
// make sure we at valid position
|
||||
// Make sure we are at a valid position
|
||||
if(it==end)
|
||||
--it;
|
||||
|
||||
// print the text
|
||||
// Print the text
|
||||
std::cout << std::string(start,it->iterator()) << std::endl;
|
||||
\endcode
|
||||
|
||||
That would print:
|
||||
This would print:
|
||||
|
||||
\verbatim
|
||||
be or not to be, that
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
// https://www.boost.org/LICENSE_1_0.txt
|
||||
|
||||
/*!
|
||||
\page building_boost_locale Building The library
|
||||
\page building_boost_locale Building the Library
|
||||
|
||||
- \ref building_boost_locale_bb
|
||||
- \ref bb_building_deps
|
||||
@@ -20,26 +20,25 @@
|
||||
\subsection bb_building_deps Dependencies
|
||||
|
||||
- ICU library 3.6 or above is strongly recommended
|
||||
- If no ICU library is given, iconv support is required under POSIX platforms.
|
||||
- If no ICU library is given, iconv support is required on POSIX platforms.
|
||||
|
||||
\subsection bb_platform_opts Platform Notes
|
||||
|
||||
- If you use Boost.Locale on Windows with MinGW/GCC < 4.5 you'll be
|
||||
able to use static version only. Mingw/GCC prior to 4.5 have no
|
||||
support of dynamic runtime linking.\n
|
||||
able to use the static version only. Mingw/GCC prior to 4.5 has no
|
||||
support for dynamic runtime linking.\n
|
||||
Using Boost.Locale DLL's with MinGW gcc also requires dynamic linking
|
||||
with the runtime libraries libstdc++ and libgcc. Some gcc builds use
|
||||
static linking by default so make sure you use correct link options
|
||||
with your compiler when you build your own programs.
|
||||
static linking by default so make sure you use the correct link options.
|
||||
- The AIX's iconv misses important character sets that Boost.Locale requires,
|
||||
so you need to either use GNU iconv or link with ICU library.
|
||||
- If iconv library is not found on Darwin/Mac OS X builds make sure there
|
||||
is no multiple iconv installations and provide -sICONV_PATH build option
|
||||
to point to correct location of iconv library.
|
||||
so you need to either use GNU iconv or link with the ICU library.
|
||||
- If the iconv library is not found on Darwin/Mac OS X builds make sure there
|
||||
are not multiple iconv installations and provide the -sICONV_PATH build option
|
||||
to point to the correct location of the iconv library.
|
||||
|
||||
\subsection bb_building_proc Building Process
|
||||
|
||||
Now all you need to do is invoke bjam command:
|
||||
Now all you need to do is to invoke bjam:
|
||||
|
||||
\verbatim
|
||||
./bjam --with-locale stage
|
||||
@@ -50,8 +49,8 @@ Or on Windows
|
||||
.\bjam --with-locale stage
|
||||
\endverbatim
|
||||
|
||||
If you are using custom ICU build or you are using Microsoft Windows
|
||||
you need to provide a path to location of ICU library using \c -sICU_PATH option
|
||||
If you are using a custom ICU build or you are using Microsoft Windows,
|
||||
you need to provide a path to the location of the ICU library using the \c -sICU_PATH option
|
||||
|
||||
For example:
|
||||
|
||||
@@ -82,42 +81,41 @@ For example:
|
||||
\endverbatim
|
||||
|
||||
\note Don't forget to put both debug and release versions of ICU libraries in this path
|
||||
when using Microsoft Visual Studio so Boost.Build will link correctly debug and release
|
||||
versions of boost_locale library.
|
||||
when using Microsoft Visual Studio so Boost.Build will correctly link debug and release
|
||||
versions of Boost.Locale.
|
||||
|
||||
\section bb_build_opts Build Options
|
||||
|
||||
Boost.Locale supports following options with values \c off or \c on
|
||||
Boost.Locale supports the following options with values \c off or \c on
|
||||
|
||||
- \c boost.locale.icu=off disable build of ICU backend even if ICU library exists
|
||||
- \c boost.locale.iconv=off or \c boost.locale.iconv=on enable or disable use of iconv
|
||||
library. It is off by default on Windows and Solaris
|
||||
- \c boost.locale.winapi=off - disable winapi backend, it is on by default on Windows and Cygwin
|
||||
- \c boost.locale.std=off or \c boost.locale.winapi=on Disable or enable std backends. \c std backend
|
||||
- \c boost.locale.icu=off prevents building the ICU backend even if the ICU library exists
|
||||
- \c boost.locale.iconv enables or disables the iconv backend. It is off by default on Windows and Solaris
|
||||
- \c boost.locale.winapi=off disables the winapi backend. It is on by default on Windows and Cygwin
|
||||
- \c boost.locale.std enables or disables the std backend. The std backend
|
||||
is disabled by default when using Sun Studio.
|
||||
- \c boost.locale.posix=on or \c boost.locale.posix=off Enable or disable support of POSIX backend,
|
||||
it is on by default on Linux and Mac OS X
|
||||
- \c boost.locale.posix enables or disables support of the POSIX backend.
|
||||
It is on by default on Linux and Mac OS X
|
||||
|
||||
Also Boost.Locale supports following options
|
||||
Also Boost.Locale supports the following options
|
||||
|
||||
- \c -sICU_PATH=/path/to/location/of/icu - the location of custom ICU library
|
||||
- \c -sICONV_PATH=/path/to/location/of/iconv - the location of custom iconv library
|
||||
- \c -sICU_PATH=/path/to/location/of/icu - the location of the ICU library
|
||||
- \c -sICONV_PATH=/path/to/location/of/iconv - the location of the iconv library
|
||||
|
||||
|
||||
For example:
|
||||
|
||||
- Build the library on Windows with ICU backend only:
|
||||
- Build the library on Windows with the ICU backend only:
|
||||
\verbatim
|
||||
.\bjam boost.locale.winapi=off boost.locale.std=off -sICU_PATH=c:\icu46 --with-locale stage
|
||||
\endverbatim
|
||||
- Build the library on Linux with std backend only
|
||||
- Build the library on Linux with the std backend only
|
||||
\verbatim
|
||||
.\bjam boost.locale.posix=off boost.locale.icu=off --with-locale stage
|
||||
\endverbatim
|
||||
|
||||
\section bb_build_test Running Unit Tests
|
||||
|
||||
You can run unit tests by invoking \c bjam with \c libs/locale/test project parameter
|
||||
You can run unit tests by invoking \c bjam with the \c libs/locale/test project parameter
|
||||
\verbatim
|
||||
./bjam libs/locale/test
|
||||
\endverbatim
|
||||
@@ -127,10 +125,11 @@ You can run unit tests by invoking \c bjam with \c libs/locale/test project para
|
||||
Boost.Locale is built with binary compatibility in mind. Switching localization back ends on or off,
|
||||
or using iconv or not, does not affect binary compatibility. So if a dynamic library was built
|
||||
with all possible backends, other dynamic libraries compiled with, for example, only the \c std, \c posix
|
||||
or \c winapi backends would still be binary-compatible with it.
|
||||
or \c winapi backends would still be binary-compatible.
|
||||
|
||||
However this definitely has an effect on some features. For example, if you
|
||||
try to use boundary analysis or a calendar facet when the library does not support the icu backend
|
||||
you would get an exception.
|
||||
|
||||
Using a feature not included in the binary will result in an exception.
|
||||
For example, if you try to use boundary analysis or a calendar facet when the library does not support the ICU backend,
|
||||
you will get an exception.
|
||||
|
||||
*/
|
||||
|
||||
@@ -12,8 +12,7 @@
|
||||
Boost.Locale provides \ref boost::locale::conv::to_utf() "to_utf", \ref boost::locale::conv::from_utf() "from_utf" and
|
||||
\ref boost::locale::conv::utf_to_utf() "utf_to_utf" functions in
|
||||
the \c boost::locale::conv namespace. They are simple and
|
||||
convenient functions to convert a string to and from
|
||||
UTF-8/16/32 strings and strings using other encodings.
|
||||
convenient functions to convert between UTF-8/16/32 and other encodings.
|
||||
|
||||
For example:
|
||||
|
||||
@@ -25,14 +24,13 @@ std::string utf8_string2 = utf_to_utf<char>(wide_string);
|
||||
\endcode
|
||||
|
||||
|
||||
This function may use an explicit encoding name like "Latin1" or "ISO-8859-8",
|
||||
or use std::locale as a parameter to fetch this information from it.
|
||||
It also receives a policy parameter that tells it how to behave if the
|
||||
conversion can't be performed (i.e. an illegal or unsupported character is found).
|
||||
By default this function skips all illegal characters and tries to do the best it
|
||||
can, however, it is possible ask it to throw
|
||||
a \ref boost::locale::conv::conversion_error "conversion_error" exception
|
||||
by passing the \c stop flag to it:
|
||||
These functions accept an explicit encoding name like "Latin1" or "ISO-8859-8",
|
||||
or a std::locale which is used to get the encoding.
|
||||
They also accept a policy parameter that determines what happens if a conversion can't be performed
|
||||
(i.e. an illegal or unsupported character is found).
|
||||
By default, these functions skip all illegal characters and try to do the best they can.
|
||||
However, these functions can throw a \ref boost::locale::conv::conversion_error "conversion_error"
|
||||
when passed the \c stop flag:
|
||||
|
||||
\code
|
||||
std::wstring s=to_utf<wchar_t>("\xFF\xFF","UTF-8",stop);
|
||||
@@ -44,18 +42,18 @@ std::wstring s=to_utf<wchar_t>("\xFF\xFF","UTF-8",stop);
|
||||
Boost.Locale provides stream codepage conversion facets based on the \c std::codecvt facet.
|
||||
This allows conversion between wide-character encodings and 8-bit encodings like UTF-8, ISO-8859 or Shift-JIS.
|
||||
|
||||
Most of compilers provide such facets, but:
|
||||
Most compilers provide such facets, but:
|
||||
|
||||
- Under Windows MSVC does not support UTF-8 encodings at all.
|
||||
- Under Linux the encodings are supported only if the required locales are generated. For example
|
||||
- Windows MSVC does not support UTF-8 encodings at all.
|
||||
- In Linux, the encodings are supported only if the required locales are generated. For example
|
||||
it may be impossible to create a \c he_IL.CP1255 locale even when the \c he_IL locale is available.
|
||||
|
||||
Thus Boost.Locale provides an option to generate code-page conversion facets for use with
|
||||
Boost.Locale provides an option to generate code-page conversion facets for use with
|
||||
Boost.Iostreams filters or \c std::wfstream. For example:
|
||||
|
||||
\code
|
||||
std::locale loc= generator().generate("he_IL.UTF-8");
|
||||
std::wofstream file.
|
||||
std::wofstream file;
|
||||
file.imbue(loc);
|
||||
file.open("hello.txt");
|
||||
file << L"שלום!" << endl;
|
||||
@@ -69,7 +67,7 @@ You can use the \c std::codecvt facet directly, but this is quite tricky and
|
||||
requires accurate buffer and error management.
|
||||
|
||||
You can use the \c boost::iostreams::code_converter class for stream-oriented
|
||||
conversions between the wide-character set and narrow locale character set.
|
||||
conversions between the wide character set and narrow locale character set.
|
||||
|
||||
This is a sample program that converts wide to narrow characters for an arbitrary
|
||||
stream:
|
||||
@@ -84,7 +82,7 @@ stream:
|
||||
|
||||
namespace io = boost::iostreams;
|
||||
|
||||
// Device that consumes the converted text,
|
||||
// Device that consumes the converted text
|
||||
// In our case it just writes to standard output
|
||||
class consumer {
|
||||
public:
|
||||
@@ -117,8 +115,8 @@ int main()
|
||||
converter_stream stream;
|
||||
stream.open(dev);
|
||||
// Now wide characters that are written
|
||||
// to the stream would be given to
|
||||
// our consumer as narrow characters
|
||||
// to the stream will be given to
|
||||
// our consumer as narrow characters
|
||||
// in UTF-8 encoding
|
||||
stream << L"שלום" << std::flush;
|
||||
}
|
||||
@@ -138,9 +136,7 @@ ISO-8859, and Shift-JIS, but not with stateful encodings like UTF-7 or SCSU.
|
||||
|
||||
\note
|
||||
|
||||
The implementation of codecvt for single byte encodings like ISO-8859-X and for UTF-8 is very efficient
|
||||
and would allow fast conversion of the content, however its performance may be sub-optimal for
|
||||
double-width encodings like Shift-JIS, due to the stateless problem described above.
|
||||
|
||||
The implementation of codecvt is very fast and efficient for single byte encodings like ISO-8859-X and UTF-8,
|
||||
however its performance may be sub-optimal for double-width encodings like Shift-JIS, due to the stateless problem described above.
|
||||
|
||||
*/
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
\page collation Collation
|
||||
|
||||
Boost.Locale provides a \ref boost::locale::collator "collator" class, derived from \c std::collate, that adds support for
|
||||
primary, secondary, tertiary, quaternary and identical comparison levels. They can be approximately defined as:
|
||||
primary, secondary, tertiary, quaternary, and identical comparison levels. They can be approximately defined as:
|
||||
|
||||
-# Primary -- ignore accents and character case, comparing base letters only. For example "facade" and "Façade" are the same.
|
||||
-# Secondary -- ignore character case but consider accents. "facade" and "façade" are different but "Façade" and "façade" are the same.
|
||||
@@ -16,7 +16,7 @@ primary, secondary, tertiary, quaternary and identical comparison levels. They c
|
||||
-# Quaternary -- consider all case, accents, and punctuation. The words must be identical in terms of Unicode representation.
|
||||
-# Identical -- as quaternary, but compare code points as well.
|
||||
|
||||
There are two ways of using the \ref boost::locale::collator "collator" facet: directly, by calling its member functions \ref boost::locale::collator::compare() "compare", \ref boost::locale::collator::transform() "transform" and \ref
|
||||
There are two ways of using the \ref boost::locale::collator "collator" facet: directly: by calling its member functions \ref boost::locale::collator::compare() "compare", \ref boost::locale::collator::transform() "transform", and \ref
|
||||
boost::locale::collator::hash() "hash", or indirectly by using the \ref boost::locale::comparator "comparator" template
|
||||
class in STL algorithms.
|
||||
|
||||
|
||||
@@ -7,9 +7,13 @@
|
||||
/*!
|
||||
\page conversions Text Conversions
|
||||
|
||||
There is a set of functions that perform basic string conversion operations:
|
||||
upper, lower and \ref term_title_case "title case" conversions, \ref term_case_folding "case folding"
|
||||
and Unicode \ref term_normalization "normalization". These are \ref boost::locale::to_upper "to_upper" , \ref boost::locale::to_lower "to_lower", \ref boost::locale::to_title "to_title", \ref boost::locale::fold_case "fold_case" and \ref boost::locale::normalize "normalize".
|
||||
Boost.Locale provides several functions for basic string manipulation:
|
||||
|
||||
- \ref boost::locale::to_upper "to_upper": convert a string to upper case
|
||||
- \ref boost::locale::to_upper "to_lower": convert a string to lower case
|
||||
- \ref boost::locale::to_title "to_title": convert a string to title case
|
||||
- \ref boost::locale::fold_case "fold_case": makes a string case-agnostic (see \ref term_case_folding "case folding")
|
||||
- \ref boost::locale::normalize "normalize": convert equivalent code points to a consistent binary form (\ref term_normalization "normalization")
|
||||
|
||||
All these functions receive an \c std::locale object as parameter or use a global locale by default.
|
||||
|
||||
@@ -35,8 +39,8 @@ Title Grüßen
|
||||
Fold grüssen
|
||||
\endverbatim
|
||||
|
||||
You may notice that there are existing functions \c to_upper and \c to_lower in the Boost.StringAlgo library.
|
||||
The difference is that these function operate over an entire string instead of performing incorrect character-by-character conversions.
|
||||
There are existing functions \c to_upper and \c to_lower in the Boost.StringAlgo library, however the
|
||||
Boost.Locale functions operate on an entire string instead of performing incorrect character-by-character conversions.
|
||||
|
||||
For example:
|
||||
|
||||
@@ -54,7 +58,7 @@ GRÜßEN GRÜSSEN
|
||||
Where a letter "ß" was not converted correctly to double-S in first case because of a limitation of \c std::ctype facet.
|
||||
|
||||
This is even more problematic in case of UTF-8 encodings where non US-ASCII are not converted at all.
|
||||
For example, this code
|
||||
For example, this code:
|
||||
|
||||
\code
|
||||
std::string grussen = "grüßen";
|
||||
@@ -81,7 +85,7 @@ to \ref boost::locale::normalize() "normalize" function:
|
||||
- NFKD - Compatibility decomposition - boost::locale::norm_nfkd
|
||||
- NFKC - Compatibility decomposition followed by canonical composition - boost::locale::norm_nfkc
|
||||
|
||||
For more details on normalization forms, read <a href="http://unicode.org/reports/tr15/#Norm_Forms">this article</a>.
|
||||
For more details on normalization forms, read <a href="http://unicode.org/reports/tr15/#Norm_Forms">this report on unicode.org</a>.
|
||||
|
||||
\section conversions_notes Notes
|
||||
|
||||
@@ -91,5 +95,5 @@ For more details on normalization forms, read <a href="http://unicode.org/report
|
||||
determine the 8-bit encoding.
|
||||
- All of these functions can work with an STL string, a NULL terminated string, or a range defined by two pointers. They always
|
||||
return a newly created STL string.
|
||||
- The length of the string may change, see the above example.
|
||||
- The length of the string may change; see the above example.
|
||||
*/
|
||||
|
||||
@@ -150,17 +150,16 @@ date_time now;
|
||||
std::cout << now << std::endl;
|
||||
\endcode
|
||||
|
||||
Would print in the default format, something like:
|
||||
Would print something like this (using the default format):
|
||||
|
||||
\verbatim
|
||||
2/3/2011 12:00 am
|
||||
\endverbatim
|
||||
|
||||
However if you need to change the default behavior (for example show only date),
|
||||
then you need to use specific iostream manipulator in order to display current date or time,
|
||||
it would override the default formatting.
|
||||
then you need to use specific iostream manipulators in order to display current date or time.
|
||||
|
||||
For example
|
||||
For example:
|
||||
|
||||
\code
|
||||
using namespace boost::locale;
|
||||
@@ -180,7 +179,7 @@ of the \c iostream's locale and time zone and not in the context of specific \c
|
||||
\section dates_times_timezones_qna Questions and Answers
|
||||
|
||||
|
||||
<b>Why should I use Boost.Locale over Boost.DateTime when I need Gregorian calendar only?</b>
|
||||
<b>Why should I use Boost.Locale over Boost.DateTime when I only need the Gregorian calendar?</b>
|
||||
|
||||
- Boost.DateTime is locale agnostic library and ignores the fact that the first day of week
|
||||
varies by the locale.
|
||||
@@ -189,13 +188,13 @@ of the \c iostream's locale and time zone and not in the context of specific \c
|
||||
local times and time-zones handling.
|
||||
\n
|
||||
For example, <tt>date_time(some_time.time() + 3600)</tt> may be not equal to
|
||||
<tt>some_time + hour()</tt>, because of the daylight savings time.
|
||||
<tt>some_time + hour()</tt> because of daylight savings time.
|
||||
|
||||
<b>Why don't you use Boost.DateTime time zone support?</b>
|
||||
|
||||
Boost.DateTime's time zone support is broken. Time zones can not be represented with
|
||||
a simple table of rules where daylight saving depend only on certain n'th day of week in month.
|
||||
The daylight savings time may vary by year, political issues and many other things.
|
||||
The daylight savings time may vary by year, political issues, and many other things.
|
||||
|
||||
Most of the modern operating systems (Linux, *BSD, Mac OS X, OpenVMS) and many important software packages
|
||||
(ICU, Java, Python) use so called Olson database in order to handle daylight saving time
|
||||
|
||||
@@ -10,18 +10,18 @@
|
||||
All modern operating systems use Unicode.
|
||||
|
||||
- Unix operating system family use UTF-8 encoding by default.
|
||||
- Microsoft Windows had migrated to Wide/UTF-16 API.
|
||||
The narrow encodings had been deprecated and the native OS API became so called "Wide API"
|
||||
- Microsoft Windows have migrated to Wide/UTF-16 API.
|
||||
The narrow encodings have been deprecated and the native OS API became the so called "Wide API"
|
||||
|
||||
As a result of radically different approaches, it is very hard to write portable Unicode aware applications.
|
||||
|
||||
Boost Locale fully supports both narrow and wide API. The default character
|
||||
Boost Locale fully supports both narrow and wide APIs. The default character
|
||||
encoding is assumed to be UTF-8 on Windows.
|
||||
|
||||
So if the default operating system Locale is "English_USA.1252" the default
|
||||
locale for Boost.Locale on Windows would be "en_US.UTF-8".
|
||||
|
||||
When the created locale object is installed globally then any libraries
|
||||
When the created locale object is installed globally, then any libraries
|
||||
that use \c std::codecvt for conversion between narrow API and the native
|
||||
wide API would handle UTF-8 correctly.
|
||||
|
||||
@@ -49,14 +49,14 @@ int main()
|
||||
However such behavior may break existing software that assumes that the current
|
||||
encoding is single byte encodings like code page 1252.
|
||||
|
||||
\ref boost::locale::generator class has a property \ref boost::locale::generator::use_ansi_encoding() "use_ansi_encoding()"
|
||||
that allows to change the behavior to legacy one and select an ANSI code page as
|
||||
The \ref boost::locale::generator class has a property \ref boost::locale::generator::use_ansi_encoding() "use_ansi_encoding()"
|
||||
that allows changing the behavior to the legacy one and selecting an ANSI code page as the
|
||||
default system encoding.
|
||||
|
||||
So, when the current locale is "English_USA.1252" and the \c use_ansi_encoding is turned on
|
||||
then the default locale would be "en_US.windows-1252"
|
||||
So, when the current locale is "English_USA.1252" and the \c use_ansi_encoding is turned on,
|
||||
then the default locale would be "en_US.windows-1252".
|
||||
|
||||
\note \c winapi backend does not support ANSI encodings, thus UTF-8 encoding is always used for narrow characters.
|
||||
\note The \c winapi backend does not support ANSI encodings; thus UTF-8 encoding is always used for narrow characters.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
16
doc/faq.txt
16
doc/faq.txt
@@ -7,14 +7,14 @@
|
||||
/*!
|
||||
\page faq Frequently Asked Questions
|
||||
|
||||
- \anchor faq_bad_cast <b>I try to use some Boost.Locale functions and I get an \c std::bad_cast exception thrown?</b>
|
||||
- \anchor faq_bad_cast <b>Some Boost.Locale functions throw \c std::bad_cast exception?</b>
|
||||
\n
|
||||
\n
|
||||
\b Answer: You probably try to use incorrect \c std::locale object. All Boost.Locale tools relay on \c std::locale object's facets.
|
||||
The locale object should be generated with \ref boost::locale::generator "generator" class and then passed to
|
||||
the function or alternatively global locale should be set using \c std::locale::global() function such that
|
||||
global locale (and default created one) would have required facets to use.
|
||||
- \anchor faq_number <b>I had installed global locale and try to write something to stream but still get wrong output?</b>
|
||||
\b Answer: You probably try to use an incorrect \c std::locale object. All Boost.Locale tools rely on \c std::locale object's facets.
|
||||
The locale object should be generated with the \ref boost::locale::generator "generator" class and then passed to
|
||||
the function or alternatively global locale should be set using the \c std::locale::global() function such that
|
||||
global locale (and default created one) would have the required facets.
|
||||
- \anchor faq_number <b>I have installed global locale, but when I try to write something to a stream I still get the wrong output?</b>
|
||||
For example:
|
||||
\code
|
||||
#include <boost/locale.hpp>
|
||||
@@ -29,8 +29,8 @@
|
||||
Prints a number instead of a date.
|
||||
\n
|
||||
\b Answer: You forget to imbue the locale to the stream. Changing the global locale does not affect the
|
||||
locale in existing \c iostream objects. Thus because \c std::out and other global streams were created
|
||||
before changing the global locale Boost.Locale manipulators have no effect. You need to write:
|
||||
locale in existing \c iostream objects. Thus, because \c std::out and other global streams were created
|
||||
before changing the global locale, Boost.Locale manipulators have no effect. You need to write:
|
||||
\code
|
||||
#include <boost/locale.hpp>
|
||||
#include <iostream>
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
/*!
|
||||
|
||||
\page formatting_and_parsing Numbers, Time and Currency formatting and parsing
|
||||
\page formatting_and_parsing Numbers, Time and Currency Formatting and Parsing
|
||||
|
||||
All formatting and parsing is performed via the standard I/O streams. Each of the above information types is represented as a number.
|
||||
The formatting information is set using iostream manipulators. All manipulators are placed in the boost::locale::as namespace.
|
||||
@@ -21,16 +21,16 @@ For example:
|
||||
\endcode
|
||||
|
||||
There is a special manipulator \c as::posix that "unsets" locale-specific settings and returns them to the default \c iostream formatting
|
||||
and parsing methods. Please note, such formats may still be localized by the default \c std::num_put and \c std::num_get facets.
|
||||
and parsing methods. Please note, such formats may still be localized by the default e.g. \c std::num_put and \c std::num_get facets.
|
||||
|
||||
\section numbers_formatting Numbers and number manipulators
|
||||
\section numbers_formatting Numbers and Number Manipulators
|
||||
|
||||
Here are the manipulators for number formatting:
|
||||
|
||||
- \c as::number -- format number according to local specifications, it takes into account various \c std::ios_base flags like scientific
|
||||
- \c as::number -- format numbers according to local specifications. Takes into account various \c std::ios_base flags like scientific
|
||||
format and precision.
|
||||
\n
|
||||
- \c as::percent -- format number as "percent" format. For example:
|
||||
- \c as::percent -- format numbers as percents. For example:
|
||||
\code
|
||||
cout << as::percent << 0.25 <<endl;
|
||||
\endcode
|
||||
@@ -40,12 +40,12 @@ Here are the manipulators for number formatting:
|
||||
\endverbatim
|
||||
\n
|
||||
- \c as::spellout -- spell the number. For example, under the English locale, 103 may be displayed as "one hundred three".
|
||||
\b Note: not all locales provide rules for spelling numbers. In such a case the number would be displayed in decimal format.
|
||||
\b Note: not all locales provide rules for spelling numbers. In such cases, the number would be displayed in decimal format.
|
||||
\n
|
||||
- \c as::ordinal -- display an order-of element. For example "2" would be displayed as "2nd" under the English locale. As in
|
||||
the above case, not all locales provide ordinal rules.
|
||||
|
||||
\section currency_formatting Currency formatting
|
||||
\section currency_formatting Currency Formatting
|
||||
|
||||
These are the manipulators for currency formatting:
|
||||
|
||||
@@ -58,7 +58,7 @@ These are the manipulators for currency formatting:
|
||||
\note \c as::currency_XYZ manipulators have no effect on general formatting, only on the currency format. You must use both currency
|
||||
and number manipulators to use a non-default format.
|
||||
|
||||
\section date_and_time_formatting Date and Time formatting
|
||||
\section date_and_time_formatting Date and Time Formatting
|
||||
|
||||
Dates and times are represented as POSIX time. When date-time formatting is turned on in the \c iostream, each number is treated as a
|
||||
POSIX time. The number may be an integer or a double.
|
||||
@@ -105,7 +105,7 @@ For example:
|
||||
cout << as::time_zone("EST") << "Eastern Standard Time is: "<< now <<endl;
|
||||
\endcode
|
||||
|
||||
There is a list of supported \c strftime flags by ICU backend:
|
||||
Here is a list of supported \c strftime flags by ICU backend:
|
||||
|
||||
- \c \%a -- Abbreviated weekday (Sun.)
|
||||
- \c \%A -- Full weekday (Sunday)
|
||||
@@ -134,10 +134,10 @@ There is a list of supported \c strftime flags by ICU backend:
|
||||
- \c \%Z -- Time Zone
|
||||
- \c \%\% -- Percent symbol
|
||||
|
||||
Unsupported \c strftime flags are: \c \%C , \c \%u , \c \%U , \c \%V , \c \%w , \c \%W . Also, the \c O and \c E modifiers are not supported.
|
||||
Unsupported \c strftime flags are: \c \%C , \c \%u , \c \%U , \c \%V , \c \%w , \c \%W , \c O , and \c E modifiers are not supported.
|
||||
|
||||
|
||||
\b General \b recommendations
|
||||
\b General \b Recommendations
|
||||
|
||||
- Prefer using generic date-time manipulators rather than specifying the full format using \c as::ftime.
|
||||
- Remember that current calendars may be not Gregorian.
|
||||
|
||||
@@ -8,36 +8,36 @@
|
||||
\page gettext_for_windows Using Gettext Tools on Windows
|
||||
|
||||
In order to get the Gettext tools like \c msgfmt, \c msgmerge, \c xgettext for Windows you have
|
||||
basically several options:
|
||||
several options:
|
||||
|
||||
- Download the package from <a href="http://cppcms.sourceforge.net">CppCMS</a> project (where the Boost.Locale was developed originally)
|
||||
- Download the a set of packages from MinGW project
|
||||
- Download the package from <a href="http://cppcms.sourceforge.net">CppCMS</a> (where the Boost.Locale was developed originally)
|
||||
- Download the a set of packages from the MinGW project
|
||||
- Build it on your own
|
||||
- Use Cygwin's packages
|
||||
|
||||
\section gettext_for_windows_cppcms Getting gettext utilities from CppCMS project
|
||||
\section gettext_for_windows_cppcms Getting Gettext Utilities from CppCMS
|
||||
|
||||
Boost.Locale was developed for needs of <a href="http://cppcms.sourceforge.net">CppCMS</a> project
|
||||
and thus CppCMS hosts a convince package for Windows users of pre-build, statically liked \c gettext
|
||||
Boost.Locale was developed for needs of <a href="http://cppcms.sourceforge.net">CppCMS</a>
|
||||
and thus CppCMS hosts a convenience package for Windows users of pre-built, statically liked \c gettext
|
||||
runtime utilities like \c xgettext, \c msgfmt, etc.
|
||||
|
||||
So you can download a zip file \c gettext-tools-static-XXX.zip from a CppCMS downloads page
|
||||
You can download a zip file \c gettext-tools-static-XXX.zip from the CppCMS downloads page
|
||||
under <a href="https://sourceforge.net/projects/cppcms/files/boost_locale/gettext_for_windows/">boost_locale/gettext_for_windows</a>.
|
||||
|
||||
Extract the file and use the executable files inside.
|
||||
|
||||
\section gettext_for_windows_mingw Getting Gettext via MinGW project
|
||||
\section gettext_for_windows_mingw Getting Gettext via MinGW
|
||||
|
||||
MinGW project provides GNU tools for Windows, including GNU compilers and various runtime utilities.
|
||||
Thus you can always install full MinGW distribution including gettext tools. However, if you
|
||||
a want minimalistic runtime version that allows you to extract messages and create catalogs
|
||||
you need to download several packages manually.
|
||||
|
||||
In order to install Gettext via MinGW distributing you need to download, a GCC's runtime,
|
||||
iconv library and gettext itself.
|
||||
In order to install Gettext via MinGW you need to download a GCC runtime,
|
||||
an iconv library and Gettext itself.
|
||||
|
||||
So visit a <a href="https://sourceforge.net/projects/mingw/files/">downloads page</a> of MinGW project
|
||||
and download following files (chose the latest versions of each package):
|
||||
So visit MinGW's <a href="https://sourceforge.net/projects/mingw/files/">downloads page</a>
|
||||
and download the following files (chose the latest versions of each package):
|
||||
|
||||
- From: \c MinGW/BaseSystem/GCC/Version4/gcc-xxx/ \n
|
||||
File: \c libgcc-xxx-mingw32-dll-1.tar.lzma
|
||||
@@ -55,16 +55,17 @@ For example, at June 23, 2011 it was:
|
||||
- \c gettext: \c libintl-0.17-1-mingw32-dll-8.tar.lzma, \c libgettextpo-0.17-1-mingw32-dll-0.tar.lzma and \c gettext-0.17-1-mingw32-dev.tar.lzma.
|
||||
|
||||
After you download the packages, extract all the files to the same directory using tools like
|
||||
\c 7zip and you'll get all the executables and \c dll's you need under \c bin subdirectory.
|
||||
\c 7zip and you'll get all the executables and \c dll's you need under the \c bin subdirectory.
|
||||
|
||||
\note the version on MinGW site is slightly outdated (0.17.1) while gettext provides currently 0.18.1.
|
||||
\note The version on MinGW site is slightly outdated (0.17.1) while Gettext provides currently 0.18.1.
|
||||
|
||||
\section gettext_for_windows_build Building latest version on your own.
|
||||
\section gettext_for_windows_build Building the latest version on your own.
|
||||
|
||||
You can build your own version of GNU Gettext using MinGW environment, you'll need to have up-to-date gcc compiler
|
||||
and the shell, you'll need to install iconv first and then build a gettext with it.
|
||||
You can build your own version of GNU Gettext using the MinGW environment.
|
||||
You'll need to have an up-to-date gcc compiler and the shell.
|
||||
You'll need to install iconv first and then build Gettext with it.
|
||||
|
||||
Basic and simplest way would be to open a MinGW shell
|
||||
The simplest way would be to open a MinGW shell.
|
||||
|
||||
Build \c iconv:
|
||||
|
||||
@@ -80,11 +81,10 @@ make
|
||||
make install
|
||||
\endcode
|
||||
|
||||
And now you have in <tt>c:\\mygettext\\bin</tt> all appropriate executable files
|
||||
to use.
|
||||
And now you have all appropriate executable files ready to use in <tt>c:\\mygettext\\bin</tt>.
|
||||
|
||||
\section gettext_for_windows_cygwin Using Cygwin
|
||||
|
||||
If you already have Cygwin - just use gettext tools provided with it.
|
||||
If you already have Cygwin - just use the Gettext tools provided with it.
|
||||
|
||||
*/
|
||||
|
||||
@@ -7,10 +7,10 @@
|
||||
/*!
|
||||
\page localized_text_formatting Localized Text Formatting
|
||||
|
||||
The \c iostream manipulators are very useful, but when we create a messages for the user, sometimes we need something
|
||||
The \c iostream manipulators are very useful, but when we create messages for the user, sometimes we need something
|
||||
like good old \c printf or \c boost::format.
|
||||
|
||||
Unfortunately \c boost::format has several limitations in context of localization:
|
||||
Unfortunately \c boost::format has several limitations in the context of localization:
|
||||
|
||||
-# It renders all parameters using global locale rather than target \c ostream locale. For example:
|
||||
\n
|
||||
@@ -20,18 +20,18 @@ Unfortunately \c boost::format has several limitations in context of localizatio
|
||||
output << boost::format("%1%") % 1234.345;
|
||||
\endcode
|
||||
\n
|
||||
This would write "1,234.235" to output, instead of the "1.234,234" that is expected for "de_DE" locale
|
||||
-# It knows nothing about the new Boost.Locale manipulators.
|
||||
This would write "1,234.235" to output, instead of the "1.234,234" that is expected for "de_DE" locale.
|
||||
-# It knows nothing about the Boost.Locale manipulators.
|
||||
-# The \c printf-like syntax is very limited for formatting complex localized data, not allowing
|
||||
the formatting of dates, times, or currencies
|
||||
|
||||
Thus a new class, boost::locale::format, was introduced. For example:
|
||||
Thus a new class, boost::locale::format, is introduced. For example:
|
||||
|
||||
\code
|
||||
wcout << wformat(L"Today {1,date} I would meet {2} at home") % time(0) % name <<endl;
|
||||
\endcode
|
||||
|
||||
Each format specifier is enclosed within \c {} brackets, is separated with a comma "," and
|
||||
Each format specifier is enclosed within \c {} brackets, is separated with a comma ",", and
|
||||
may have an additional option after an equals symbol '='. This option may be simple ASCII text or single-quoted localized text.
|
||||
If a single-quote should be inserted within the text, it may be represented with a pair of single-quote characters.
|
||||
|
||||
@@ -41,7 +41,7 @@ Here is an example of a format string:
|
||||
"Ms. {1} had arrived at {2,ftime='%I o''clock'} at home. The exact time is {2,time=full}"
|
||||
\endverbatim
|
||||
|
||||
The syntax is described by following grammar:
|
||||
The syntax is described by the following grammar:
|
||||
|
||||
\verbatim
|
||||
format : '{' parameters '}'
|
||||
@@ -54,14 +54,14 @@ The syntax is described by following grammar:
|
||||
\endverbatim
|
||||
|
||||
|
||||
You can include literal '{' and '}' by inserting double "{{" or "}}"
|
||||
to the text.
|
||||
You can include a literal '{' and '}' by inserting double "{{" or "}}"
|
||||
into the text.
|
||||
|
||||
\code
|
||||
cout << format(translate("Unexpected `{{' in line {1} in file {2}")) % pos % file;
|
||||
\endcode
|
||||
|
||||
Would display something like
|
||||
Would display something like:
|
||||
|
||||
\verbatim
|
||||
Unexpected `{' in line 5 in file source.cpp
|
||||
|
||||
20
doc/main.txt
20
doc/main.txt
@@ -11,7 +11,7 @@
|
||||
\section main_intro What is Boost.Locale?
|
||||
|
||||
Boost.Locale is a library that provides high quality
|
||||
localization facilities in a C++ way. It was originally designed
|
||||
localization facilities in C++. It was originally designed
|
||||
a part of <a href="http://cppcms.sourceforge.net/">CppCMS</a> - C++ Web Framework
|
||||
project and then contributed to Boost.
|
||||
|
||||
@@ -19,7 +19,7 @@ Boost.Locale gives powerful tools for development of cross platform
|
||||
localized software - the software that talks to user
|
||||
in its language.
|
||||
|
||||
Provided Features:
|
||||
Features:
|
||||
|
||||
- Correct case conversion, case folding and normalization.
|
||||
- Collation (sorting), including support for 4 Unicode
|
||||
@@ -34,17 +34,16 @@ Provided Features:
|
||||
- Powerful message formatting (string translation)
|
||||
including support for plural forms, using GNU catalogs.
|
||||
- Character set conversion.
|
||||
- Transparent support for 8-bit character sets like Latin1
|
||||
- Support for \c char and \c wchar_t
|
||||
- Transparent support for 8-bit character sets like Latin1.
|
||||
- Support for \c char and \c wchar_t.
|
||||
- Experimental support for C++11 \c char16_t and \c char32_t
|
||||
strings and streams.
|
||||
|
||||
Boost.Locale enhances and unifies the standard library's API
|
||||
the way it becomes useful and convenient for development
|
||||
of cross platform and "cross-culture" software.
|
||||
Boost.Local complements the standard library's API, making it easy
|
||||
to write cross platform and "cross culture" software.
|
||||
|
||||
In order to achieve this goal Boost.Locale uses
|
||||
the-state-of-the-art Unicode and Localization
|
||||
In order to achieve this goal, Boost.Locale uses the
|
||||
state-of-the-art Unicode and Localization
|
||||
library: <a href="http://icu-project.org/">ICU</a> - International Components for Unicode.
|
||||
|
||||
Boost.Locale creates the natural glue between the C++ locales
|
||||
@@ -53,8 +52,7 @@ framework, iostreams, and the powerful ICU library.
|
||||
Boost.Locale provides non-ICU based localization support as well.
|
||||
It is based on the operating system native API or on the standard
|
||||
C++ library support. Sacrificing some less important features,
|
||||
Boost.Locale becomes less powerful but lighter and easier to deploy
|
||||
and use library.
|
||||
Boost.Locale becomes less powerful but lighter and easier to deploy.
|
||||
|
||||
|
||||
\section main_tutorial Tutorials
|
||||
|
||||
@@ -210,10 +210,10 @@ int main()
|
||||
|
||||
\subsection plural_forms Plural Forms
|
||||
|
||||
GNU Gettext catalogs have simple, robust and yet powerful plural forms support. We recommend to read the
|
||||
GNU Gettext catalogs have simple, robust and yet powerful plural forms support. We recommend reading the
|
||||
original GNU documentation <a href="http://www.gnu.org/software/gettext/manual/gettext.html#Plural-forms">here</a>.
|
||||
|
||||
Let's try to solve a simple problem, displaying a message to the user:
|
||||
Let's try to solve a simple problem: displaying a message to the user.
|
||||
|
||||
\code
|
||||
if(files == 1)
|
||||
@@ -244,7 +244,7 @@ For example, the Slavic language family has 3 plural forms, that can be chosen u
|
||||
plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;
|
||||
\endcode
|
||||
|
||||
Such equation is stored in the message catalog itself and it is evaluated during translation to supply the correct form.
|
||||
Such an equation is stored in the message catalog itself and it is evaluated during translation to supply the correct form.
|
||||
|
||||
So the code above would display 3 different forms in Russian locale for values of 1, 3 and 5:
|
||||
|
||||
@@ -401,7 +401,7 @@ xgettext --keyword=translate:1,1t --keyword=translate:1c,2,2t \
|
||||
source_file_1.cpp ... source_file_N.cpp
|
||||
\endcode
|
||||
|
||||
Of course, if you do not use "gettext" like translation you
|
||||
Of course, if you do not use "gettext"-like translations, you
|
||||
may ignore some of these parameters.
|
||||
|
||||
\subsection custom_file_system_support Custom Filesystem Support
|
||||
|
||||
@@ -18,9 +18,9 @@
|
||||
- \ref why_abstract_api
|
||||
- \ref why_no_special_character_type
|
||||
|
||||
\section rationale_why Why is it needed?
|
||||
\section rationale_why Why do I need Boost.Locale?
|
||||
|
||||
Why do we need a localization library, when standard C++ facets (should) provide most of the required functionality:
|
||||
Why do we need a localization library, when standard C++ facets (should) provide most of the required functionality?
|
||||
|
||||
- Case conversion is done using the \c std::ctype facet
|
||||
- Collation is supported by \c std::collate and has nice integration with \c std::locale
|
||||
@@ -28,11 +28,11 @@ Why do we need a localization library, when standard C++ facets (should) provide
|
||||
time, and currency formatting and parsing.
|
||||
- There is a \c std::messages class that supports localized message formatting.
|
||||
|
||||
So why do we need such library if we have all the functionality within the standard library?
|
||||
So why do we need such a library if we have all the functionality within the standard library?
|
||||
|
||||
Almost every(!) facet has design flaws:
|
||||
|
||||
- \c std::collate supports only one level of collation, not allowing you to choose whether case- or accent-sensitive comparisons
|
||||
- \c std::collate supports only one level of collation; it does not allow you to choose whether case- or accent-sensitive comparisons
|
||||
should be performed.
|
||||
|
||||
- \c std::ctype, which is responsible for case conversion, assumes that all conversions can be done on a per-character basis. This is
|
||||
@@ -176,17 +176,17 @@ to provide all the required information.
|
||||
- ICU fully understands POSIX locales and knows how to treat them correctly.
|
||||
- They are native locale names for most operating system APIs (with the exception of Windows)
|
||||
|
||||
\section why_linear_chunks Why most parts of Boost.Locale work only on linear/contiguous chunks of text
|
||||
\section why_linear_chunks Why do most parts of Boost.Locale work only on linear/contiguous chunks of text?
|
||||
|
||||
There are two reasons:
|
||||
|
||||
- Boost.Locale relies heavily on the third-party APIs like ICU, POSIX or Win32 API, all of them
|
||||
work only on linear chunks of text, so providing non-linear API would just hide the
|
||||
real situation and would not bring real performance advantage.
|
||||
real situation and would hurt performance.
|
||||
- In fact, all known libraries that work with Unicode: ICU, Qt, Glib, Win32 API, POSIX API
|
||||
and others accept an input as single linear chunk of text and there is a good reason for this:
|
||||
and others accept an input as single linear chunks of text and there is a good reason for this:
|
||||
\n
|
||||
-# Most of supported operations on text like collation, case handling usually work on small
|
||||
-# Most supported operations on text like collation, case handling usually work on small
|
||||
chunks of text. For example: you probably would never want to compare two chapters of a book, but rather
|
||||
their titles.
|
||||
-# We should remember that even very large texts require quite a small amount of memory, for example
|
||||
@@ -201,7 +201,7 @@ However:
|
||||
on large chunks of text, will provide an interface for non-linear text handling.
|
||||
|
||||
|
||||
\section why_abstract_api Why all Boost.Locale implementation is hidden behind abstract interfaces and does not use template metaprogramming?
|
||||
\section why_abstract_api Why is all Boost.Locale implementation hidden behind abstract interfaces instead of using template metaprogramming?
|
||||
|
||||
There are several major reasons:
|
||||
|
||||
@@ -212,7 +212,7 @@ There are several major reasons:
|
||||
- This approach reduces compilation times significantly. This is very important for library that may be
|
||||
used in almost every part of specific program.
|
||||
|
||||
\section why_no_special_character_type Why Boost.Locale does not provide char16_t/char32_t for non-C++11 platforms.
|
||||
\section why_no_special_character_type Why doesn't Boost.Locale provide char16_t/char32_t for non-C++11 platforms?
|
||||
|
||||
There are several reasons:
|
||||
|
||||
@@ -226,7 +226,7 @@ There are several reasons:
|
||||
These are exactly the reasons why Boost.Locale fails with current limited C++11 characters support on GCC-4.5 (the second reason)
|
||||
and MSVC-2010 (the first reason)
|
||||
|
||||
So basically it is impossible to use non-C++ characters with the C++'s locales framework.
|
||||
Basically it is impossible to use non-C++ characters with the C++'s locales framework.
|
||||
|
||||
The best and the most portable solution is to use the C++'s \c char type and UTF-8 encodings.
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ on the platform, so they may be even less convenient when dealing with Unicode t
|
||||
|
||||
\subsection myths_utf16 UTF-16 is the best encoding to work with.
|
||||
|
||||
There is common assumption that UTF-16 is the best encoding for storing information because it gives "shortest" representation
|
||||
There is common assumption that UTF-16 is the best encoding for storing information because it gives the "shortest" representation
|
||||
of strings.
|
||||
|
||||
In fact, it is probably the most error-prone encoding to work with. The biggest issue is code points that lay outside of the BMP,
|
||||
|
||||
@@ -7,13 +7,13 @@
|
||||
/*!
|
||||
\page running_examples_under_windows Running Examples under Microsoft Windows
|
||||
|
||||
All of the examples that come with Boost.Locale are designed for UTF-8 and it is
|
||||
All of the examples that come with Boost.Locale are designed for UTF-8; it is
|
||||
the default encoding used by Boost.Locale.
|
||||
|
||||
However, the default narrow encoding under Microsoft Windows is not UTF-8 and
|
||||
the output of the applications would not be displayed correctly in the console.
|
||||
the output of the applications will not be displayed correctly in the console.
|
||||
|
||||
So in order to use UTF-8 encoding under the Windows console and see the output correctly, do the following:
|
||||
In order to use UTF-8 encoding in the Windows console and see the output correctly, do the following:
|
||||
|
||||
-# Open a \c cmd window
|
||||
-# Change the default font to a TrueType font: go to properties-\>font (right click on title-bar-\>properties-\>font) and
|
||||
|
||||
@@ -7,9 +7,9 @@
|
||||
/*!
|
||||
\page status_of_cpp0x_characters_support Status of C++11 char16_t/char32_t support
|
||||
|
||||
The support of C++11 \c char16_t and \c char32_t is experimental, mostly does not work and not
|
||||
intended to be used in production with current latest compilers: GCC-4.5, MSVC10 till major
|
||||
compiler's flaws would be fixed.
|
||||
The support of C++11 \c char16_t and \c char32_t is experimental, mostly does not work, and is not
|
||||
intended to be used in production with the latest compilers: GCC-4.5, MSVC10 until major
|
||||
compiler flaws are fixed.
|
||||
|
||||
\section status_of_cpp0x_characters_support_gnu GNU GCC 4.5/C++11 Status
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ int main()
|
||||
|
||||
const int first = calendar().first_day_of_week();
|
||||
|
||||
// Print weeks days
|
||||
// Print week days
|
||||
for(int i = 0; i < 7; i++) {
|
||||
date_time tmp(now, period::day_of_week() * (first + i));
|
||||
std::cout << format("{1,w=8,ftime='%a'} ") % tmp;
|
||||
|
||||
@@ -18,7 +18,7 @@ int main()
|
||||
std::locale::global(gen(""));
|
||||
/// Set global locale to requested
|
||||
|
||||
/// Create a set that includes all strings sorted according to ABC order
|
||||
/// Create a set that includes all strings sorted in alphabetical order
|
||||
/// std::locale can be used as object for comparison
|
||||
typedef std::set<std::string, std::locale> set_type;
|
||||
set_type all_strings;
|
||||
|
||||
@@ -47,9 +47,8 @@ int main()
|
||||
std::locale::global(loc);
|
||||
std::wcout.imbue(loc);
|
||||
|
||||
// This is needed to prevent C library to
|
||||
// convert strings to narrow
|
||||
// instead of C++ on some platforms
|
||||
// This is needed to prevent the C stdio library from
|
||||
// converting strings to narrow on some platforms
|
||||
std::ios_base::sync_with_stdio(false);
|
||||
|
||||
std::wstring text = L"Hello World! あにま! Linux2.6 and Windows7 is word and number. שָלוֹם עוֹלָם!";
|
||||
|
||||
@@ -43,9 +43,8 @@ int main()
|
||||
std::locale::global(loc);
|
||||
std::wcout.imbue(loc);
|
||||
|
||||
// This is needed to prevent C library to
|
||||
// convert strings to narrow
|
||||
// instead of C++ on some platforms
|
||||
// This is needed to prevent the C stdio library from
|
||||
// converting strings to narrow on some platforms
|
||||
std::ios_base::sync_with_stdio(false);
|
||||
|
||||
std::wcout << L"Correct case conversion can't be done by simple, character by character conversion\n";
|
||||
|
||||
@@ -18,9 +18,8 @@ int main()
|
||||
std::locale::global(loc);
|
||||
std::wcout.imbue(loc);
|
||||
|
||||
// This is needed to prevent C library to
|
||||
// convert strings to narrow
|
||||
// instead of C++ on some platforms
|
||||
// This is needed to prevent the C stdio library from
|
||||
// converting strings to narrow on some platforms
|
||||
std::ios_base::sync_with_stdio(false);
|
||||
|
||||
std::wcout << wformat(L"Today {1,date} at {1,time} we had run our first localization example") % time(0)
|
||||
|
||||
@@ -20,18 +20,18 @@ namespace boost { namespace locale { namespace boundary {
|
||||
/// It represents a pair - an iterator and a rule that defines this
|
||||
/// point.
|
||||
///
|
||||
/// This type of object is dereference by the iterators of boundary_point_index. Using a rule()
|
||||
/// This type of object is dereferenced by the iterators of boundary_point_index. Using a rule()
|
||||
/// member function you can get the reason why this specific boundary point was selected.
|
||||
///
|
||||
/// For example, When you use a sentence boundary analysis, the (rule() & \ref sentence_term) != 0 means
|
||||
/// For example, when you use sentence boundary analysis, the (rule() & \ref sentence_term) != 0 means
|
||||
/// that this boundary point was selected because a sentence terminator (like .?!) was spotted
|
||||
/// and the (rule() & \ref sentence_sep)!=0 means that a separator like line feed or carriage
|
||||
/// return was observed.
|
||||
///
|
||||
/// \note
|
||||
///
|
||||
/// - The beginning of analyzed range is always considered a boundary point and its rule is always 0.
|
||||
/// - when using a word boundary analysis the returned rule relates to a chunk of text preceding
|
||||
/// - The beginning of the analyzed range is always considered a boundary point and its rule is always 0.
|
||||
/// - When using word boundary analysis, the returned rule relates to a chunk of text preceding
|
||||
/// this point.
|
||||
///
|
||||
/// \see
|
||||
|
||||
@@ -24,13 +24,13 @@ namespace boost { namespace locale {
|
||||
///
|
||||
/// @{
|
||||
|
||||
/// \brief This structure is used for representing boundary point
|
||||
/// that follows the offset.
|
||||
/// \brief This structure is used for representing boundary points
|
||||
/// that follow the offset.
|
||||
struct break_info {
|
||||
/// Create empty break point at beginning
|
||||
break_info() : offset(0), rule(0) {}
|
||||
|
||||
/// Create empty break point at offset v.
|
||||
/// Create an empty break point at offset v.
|
||||
/// it is useful for order comparison with other points.
|
||||
break_info(size_t v) : offset(v), rule(0) {}
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ namespace boost { namespace locale { namespace boundary {
|
||||
///
|
||||
/// \defgroup boundary Boundary Analysis
|
||||
///
|
||||
/// This module contains all operations required for %boundary analysis of text: character, word, like and sentence
|
||||
/// This module contains all operations required for %boundary analysis of text: character, word, line and sentence
|
||||
/// boundaries
|
||||
///
|
||||
/// @{
|
||||
@@ -753,7 +753,7 @@ namespace boost { namespace locale { namespace boundary {
|
||||
/// Create a boundary_point_index from a \ref segment_index. It copies all indexing information
|
||||
/// and uses the default rule (all possible %boundary points)
|
||||
///
|
||||
/// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
|
||||
/// This operation is very cheap, so if you use boundary_point_index and segment_index on the same text
|
||||
/// range it is much better to create one from another rather then indexing the same
|
||||
/// range twice.
|
||||
///
|
||||
@@ -762,7 +762,7 @@ namespace boost { namespace locale { namespace boundary {
|
||||
/// Copy a boundary_point_index from a \ref segment_index. It copies all indexing information
|
||||
/// and keeps the current \ref rule() unchanged
|
||||
///
|
||||
/// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
|
||||
/// This operation is very cheap, so if you use boundary_point_index and segment_index on the same text
|
||||
/// range it is much better to create one from another rather then indexing the same
|
||||
/// range twice.
|
||||
///
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
|
||||
namespace boost { namespace locale {
|
||||
|
||||
/// \brief This namespase contains all operations required for boundary analysis of text
|
||||
/// \brief This namespace contains all operations required for boundary analysis of text
|
||||
namespace boundary {
|
||||
/// \defgroup boundary Boundary Analysis
|
||||
///
|
||||
|
||||
@@ -238,7 +238,7 @@ namespace boost { namespace locale {
|
||||
// mbstate_t is POD type and should be initialized to 0 (i.a. state = stateT())
|
||||
// according to standard. We use it to keep a flag 0/1 for surrogate pair writing
|
||||
//
|
||||
// if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observerd
|
||||
// if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observed
|
||||
// and first pair is written, but no input consumed
|
||||
boost::uint16_t& state = *reinterpret_cast<boost::uint16_t*>(&std_state);
|
||||
typename CodecvtImpl::state_type cvt_state =
|
||||
@@ -269,7 +269,7 @@ namespace boost { namespace locale {
|
||||
} else {
|
||||
// for other codepoints we do following
|
||||
//
|
||||
// 1. We can't consume our input as we may find ourselfs
|
||||
// 1. We can't consume our input as we may find ourselves
|
||||
// in state where all input consumed but not all output written,i.e. only
|
||||
// 1st pair is written
|
||||
// 2. We only write first pair and mark this in the state, we also revert back
|
||||
@@ -320,7 +320,7 @@ namespace boost { namespace locale {
|
||||
// according to standard. We assume that sizeof(mbstate_t) >=2 in order
|
||||
// to be able to store first observed surrogate pair
|
||||
//
|
||||
// State: state!=0 - a first surrogate pair was observerd (state = first pair),
|
||||
// State: state!=0 - a first surrogate pair was observed (state = first pair),
|
||||
// we expect the second one to come and then zero the state
|
||||
boost::uint16_t& state = *reinterpret_cast<boost::uint16_t*>(&std_state);
|
||||
typename CodecvtImpl::state_type cvt_state =
|
||||
@@ -472,7 +472,7 @@ namespace boost { namespace locale {
|
||||
// mbstate_t is POD type and should be initialized to 0 (i.a. state = stateT())
|
||||
// according to standard. We use it to keep a flag 0/1 for surrogate pair writing
|
||||
//
|
||||
// if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observerd
|
||||
// if 0 no code above >0xFFFF observed, of 1 a code above 0xFFFF observed
|
||||
// and first pair is written, but no input consumed
|
||||
auto cvt_state = implementation().initial_state(generic_codecvt_base::to_unicode_state);
|
||||
while(to < to_end && from < from_end) {
|
||||
|
||||
@@ -36,8 +36,8 @@ namespace boost { namespace locale {
|
||||
/// -# \c message_path - path to the location of message catalogs (vector of strings)
|
||||
/// -# \c message_application - the name of applications that use message catalogs (vector of strings)
|
||||
///
|
||||
/// Each backend can be installed with a different default priotiry so when you work with two different backends,
|
||||
/// you can specify priotiry so this backend will be chosen according to their priority.
|
||||
/// Each backend can be installed with a different default priority so when you work with two different backends,
|
||||
/// you can specify priority so this backend will be chosen according to their priority.
|
||||
class BOOST_LOCALE_DECL localization_backend {
|
||||
protected:
|
||||
localization_backend(const localization_backend&) = default;
|
||||
|
||||
@@ -97,7 +97,7 @@ namespace boost { namespace locale {
|
||||
namespace detail {
|
||||
inline bool is_us_ascii_char(char c)
|
||||
{
|
||||
// works for null terminated strings regardless char "signness"
|
||||
// works for null terminated strings regardless char "signedness"
|
||||
return 0 < c && c < 0x7F;
|
||||
}
|
||||
inline bool is_us_ascii_string(const char* msg)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
#include <typeinfo>
|
||||
|
||||
namespace boost { namespace locale {
|
||||
/// \brief This namespace provides various utility function useful for Boost.Locale backends
|
||||
/// \brief This namespace provides various utility function useful for Boost.Locale's backends
|
||||
/// implementations
|
||||
namespace util {
|
||||
|
||||
@@ -205,7 +205,7 @@ namespace boost { namespace locale {
|
||||
/// This function installs codecvt that can be used for conversion between single byte
|
||||
/// character encodings like ISO-8859-1, koi8-r, windows-1255 and Unicode code points,
|
||||
///
|
||||
/// Throws boost::locale::conv::invalid_charset_error if the chacater set is not supported or isn't single byte
|
||||
/// Throws boost::locale::conv::invalid_charset_error if the character set is not supported or isn't single byte
|
||||
/// character set
|
||||
BOOST_LOCALE_DECL
|
||||
std::locale create_simple_codecvt(const std::locale& in, const std::string& encoding, char_facet_t type);
|
||||
|
||||
Reference in New Issue
Block a user