blob: 524bb4d506a2caddc9c8a450fb082478db4e1947 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<title>Boost.Locale: wboundary.cpp</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript">
$(document).ready(initResizable);
</script>
</head>
<body>
<div id="top"><!-- do not remove this div! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectlogo"><img alt="Logo" src="boost-small.png"/></td>
<td style="padding-left: 0.5em;">
<div id="projectname">Boost.Locale
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- Generated by Doxygen 1.7.6.1 -->
<div id="navrow1" class="tabs">
<ul class="tablist">
<li><a href="index.html"><span>Main&#160;Page</span></a></li>
<li><a href="modules.html"><span>Modules</span></a></li>
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
<li><a href="annotated.html"><span>Classes</span></a></li>
<li><a href="files.html"><span>Files</span></a></li>
<li><a href="examples.html"><span>Examples</span></a></li>
</ul>
</div>
</div>
<div id="side-nav" class="ui-resizable side-nav-resizable">
<div id="nav-tree">
<div id="nav-tree-contents">
</div>
</div>
<div id="splitbar" style="-moz-user-select:none;"
class="ui-resizable-handle">
</div>
</div>
<script type="text/javascript">
initNavTree('wboundary_8cpp-example.html','');
</script>
<div id="doc-content">
<div class="header">
<div class="headertitle">
<div class="title">wboundary.cpp</div> </div>
</div><!--header-->
<div class="contents">
<p>Example of using segment_index over wide strings</p>
<div class="fragment"><pre class="fragment"><span class="comment">//</span>
<span class="comment">// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)</span>
<span class="comment">//</span>
<span class="comment">// Distributed under the Boost Software License, Version 1.0. (See</span>
<span class="comment">// accompanying file LICENSE_1_0.txt or copy at</span>
<span class="comment">// http://www.boost.org/LICENSE_1_0.txt)</span>
<span class="comment">//</span>
<span class="comment">//</span>
<span class="comment">// ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !</span>
<span class="comment">//</span>
<span class="comment">// BIG FAT WARNING FOR Microsoft Visual Studio Users</span>
<span class="comment">//</span>
<span class="comment">// YOU NEED TO CONVERT THIS SOURCE FILE ENCODING TO UTF-8 WITH BOM ENCODING.</span>
<span class="comment">//</span>
<span class="comment">// Unfortunately MSVC understands that the source code is encoded as</span>
<span class="comment">// UTF-8 only if you add useless BOM in the beginning.</span>
<span class="comment">//</span>
<span class="comment">// So, before you compile &quot;wide&quot; examples with MSVC, please convert them to text</span>
<span class="comment">// files with BOM. There are two very simple ways to do it:</span>
<span class="comment">//</span>
<span class="comment">// 1. Open file with Notepad and save it from there. It would convert </span>
<span class="comment">// it to file with BOM.</span>
<span class="comment">// 2. In Visual Studio go File-&gt;Advances Save Options... and select</span>
<span class="comment">// Unicode (UTF-8 with signature) Codepage 65001</span>
<span class="comment">//</span>
<span class="comment">// Note: once converted to UTF-8 with BOM, this source code would not</span>
<span class="comment">// compile with other compilers, because no-one uses BOM with UTF-8 today</span>
<span class="comment">// because it is absolutely meaningless in context of UTF-8.</span>
<span class="comment">//</span>
<span class="comment">// ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !</span>
<span class="comment">//</span>
<span class="preprocessor">#include &lt;boost/locale.hpp&gt;</span>
<span class="preprocessor">#include &lt;iostream&gt;</span>
<span class="preprocessor">#include &lt;cassert&gt;</span>
<span class="preprocessor">#include &lt;ctime&gt;</span>
<span class="keywordtype">int</span> main()
{
<span class="keyword">using namespace </span>boost::locale;
<span class="keyword">using namespace </span>std;
<span class="comment">// Create system default locale</span>
<a name="_a0"></a><a class="code" href="classboost_1_1locale_1_1generator.html" title="the major class used for locale generation">generator</a> gen;
locale loc=gen(<span class="stringliteral">&quot;&quot;</span>);
locale::global(loc);
wcout.imbue(loc);
<span class="comment">// This is needed to prevent C library to</span>
<span class="comment">// convert strings to narrow </span>
<span class="comment">// instead of C++ on some platforms</span>
std::ios_base::sync_with_stdio(<span class="keyword">false</span>);
wstring text=L<span class="stringliteral">&quot;Hello World! あにま! Linux2.6 and Windows7 is word and number. שָלוֹם עוֹלָם!&quot;</span>;
wcout&lt;&lt;text&lt;&lt;endl;
<a name="_a1"></a><a class="code" href="classboost_1_1locale_1_1boundary_1_1segment__index.html" title="This class holds an index of segments in the text range and allows to iterate over them...">boundary::wssegment_index</a> index(<a name="a2"></a><a class="code" href="group__boundary.html#gga15de9963ce9bb6037c8525901dfbf641a99aad8b8a5e25baa9f695abe5e574bb6" title="Analyse the text for word boundaries.">boundary::word</a>,text.begin(),text.end());
<a class="code" href="group__boundary.html#gaf7a775e77dbbca3495e11d646df96fd2">boundary::wssegment_index::iterator</a> p,e;
<span class="keywordflow">for</span>(p=index.begin(),e=index.end();p!=e;++p) {
wcout&lt;&lt;L<span class="stringliteral">&quot;Part [&quot;</span>&lt;&lt;*p&lt;&lt;L<span class="stringliteral">&quot;] has &quot;</span>;
<span class="keywordflow">if</span>(p-&gt;rule() &amp; <a name="a3"></a><a class="code" href="group__boundary.html#gaac78a3a6b9e671e253a2591cb56a479b" title="Word that appear to be a number.">boundary::word_number</a>)
wcout&lt;&lt;L<span class="stringliteral">&quot;number(s) &quot;</span>;
<span class="keywordflow">if</span>(p-&gt;rule() &amp; <a name="a4"></a><a class="code" href="group__boundary.html#ga8b7e6ce2fcb185845fa85a1a4c42b3c4" title="Word that contains letters, excluding kana and ideographic characters.">boundary::word_letter</a>)
wcout&lt;&lt;L<span class="stringliteral">&quot;letter(s) &quot;</span>;
<span class="keywordflow">if</span>(p-&gt;rule() &amp; <a name="a5"></a><a class="code" href="group__boundary.html#ga52d8c63e1f3f8c898c645352206a78ef" title="Word that contains kana characters.">boundary::word_kana</a>)
wcout&lt;&lt;L<span class="stringliteral">&quot;kana character(s) &quot;</span>;
<span class="keywordflow">if</span>(p-&gt;rule() &amp; <a name="a6"></a><a class="code" href="group__boundary.html#ga705ab96f9e62810c8ed977c90d404ef8" title="Word that contains ideographic characters.">boundary::word_ideo</a>)
wcout&lt;&lt;L<span class="stringliteral">&quot;ideographic character(s) &quot;</span>;
<span class="keywordflow">if</span>(p-&gt;rule() &amp; <a name="a7"></a><a class="code" href="group__boundary.html#ga90cf4f01b95658f659685377226677e7" title="Not a word, like white space or punctuation mark.">boundary::word_none</a>)
wcout&lt;&lt;L<span class="stringliteral">&quot;no word characters&quot;</span>;
wcout&lt;&lt;endl;
}
index.<a name="a8"></a><a class="code" href="group__boundary.html#gafa2a756b10d3522743204b45b794bb3e">map</a>(<a name="a9"></a><a class="code" href="group__boundary.html#gga15de9963ce9bb6037c8525901dfbf641ae80c964112541d88af9b875dafb6605d" title="Analyse the text for character boundaries.">boundary::character</a>,text.begin(),text.end());
<span class="keywordflow">for</span>(p=index.begin(),e=index.end();p!=e;++p) {
wcout&lt;&lt;L<span class="stringliteral">&quot;|&quot;</span> &lt;&lt;*p ;
}
wcout&lt;&lt;L<span class="stringliteral">&quot;|\n\n&quot;</span>;
index.map(<a name="a10"></a><a class="code" href="group__boundary.html#gga15de9963ce9bb6037c8525901dfbf641a72895419fdd7219b3695cf0abd602ea4" title="Analyse the text for positions suitable for line breaks.">boundary::line</a>,text.begin(),text.end());
<span class="keywordflow">for</span>(p=index.begin(),e=index.end();p!=e;++p) {
wcout&lt;&lt;L<span class="stringliteral">&quot;|&quot;</span> &lt;&lt;*p ;
}
wcout&lt;&lt;L<span class="stringliteral">&quot;|\n\n&quot;</span>;
index.map(<a name="a11"></a><a class="code" href="group__boundary.html#gga15de9963ce9bb6037c8525901dfbf641a88aa1509eace7589f5df87d4694871e9" title="Analyse the text for Find sentence boundaries.">boundary::sentence</a>,text.begin(),text.end());
<span class="keywordflow">for</span>(p=index.begin(),e=index.end();p!=e;++p) {
wcout&lt;&lt;L<span class="stringliteral">&quot;|&quot;</span> &lt;&lt;*p ;
}
wcout&lt;&lt;<span class="stringliteral">&quot;|\n\n&quot;</span>;
}
<span class="comment">// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4</span>
<span class="comment">// boostinspect:noascii</span>
</pre></div> </div><!-- contents -->
</div><!-- contents -->
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="footer">
&copy; Copyright 2009-2012 Artyom Beilis, Distributed under the <a href="http://www.boost.org/LICENSE_1_0.txt">Boost Software License</a>, Version 1.0.
</li>
</ul>
</div>
</body>
</html>