blob: b86d3ba3cf496433ba982a93a467a5ce322c834a [file] [log] [blame]
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
//
#define BOOST_LOCALE_SOURCE
#include <boost/locale/encoding.hpp>
#include <boost/shared_ptr.hpp>
#include "../encoding/conv.hpp"
#include <boost/locale/util.hpp>
#include "all_generator.hpp"
#include <errno.h>
#include <algorithm>
#include <stdexcept>
#include <vector>
#include "codecvt.hpp"
#ifdef BOOST_LOCALE_WITH_ICONV
#include "../util/iconv.hpp"
#endif
namespace boost {
namespace locale {
namespace impl_posix {
#ifdef BOOST_LOCALE_WITH_ICONV
class mb2_iconv_converter : public util::base_converter {
public:
mb2_iconv_converter(std::string const &encoding) :
encoding_(encoding),
to_utf_((iconv_t)(-1)),
from_utf_((iconv_t)(-1))
{
iconv_t d = (iconv_t)(-1);
std::vector<uint32_t> first_byte_table;
try {
d = iconv_open(utf32_encoding(),encoding.c_str());
if(d == (iconv_t)(-1)) {
throw std::runtime_error("Unsupported encoding" + encoding);
}
for(unsigned c=0;c<256;c++) {
char ibuf[2] = { char(c) , 0 };
char *in = ibuf;
size_t insize =2;
uint32_t obuf[2] = {illegal,illegal};
char *out = reinterpret_cast<char *>(obuf);
size_t outsize = 8;
// Basic sigle codepoint conversion
call_iconv(d,&in,&insize,&out,&outsize);
if(insize == 0 && outsize == 0 && obuf[1] == 0) {
first_byte_table.push_back(obuf[0]);
continue;
}
// Test if this is illegal first byte or incomplete
in = ibuf;
insize = 1;
out = reinterpret_cast<char *>(obuf);
outsize = 8;
call_iconv(d,0,0,0,0);
size_t res = call_iconv(d,&in,&insize,&out,&outsize);
// Now if this single byte starts a sequence we add incomplete
// to know to ask that we need two bytes, othewise it may only be
// illegal
uint32_t point;
if(res == (size_t)(-1) && errno == EINVAL)
point = incomplete;
else
point = illegal;
first_byte_table.push_back(point);
}
}
catch(...) {
if(d!=(iconv_t)(-1))
iconv_close(d);
throw;
}
iconv_close(d);
first_byte_table_.reset(new std::vector<uint32_t>());
first_byte_table_->swap(first_byte_table);
}
mb2_iconv_converter(mb2_iconv_converter const &other) :
first_byte_table_(other.first_byte_table_),
encoding_(other.encoding_),
to_utf_((iconv_t)(-1)),
from_utf_((iconv_t)(-1))
{
}
virtual ~mb2_iconv_converter()
{
if(to_utf_ != (iconv_t)(-1))
iconv_close(to_utf_);
if(from_utf_ != (iconv_t)(-1))
iconv_close(from_utf_);
}
virtual bool is_thread_safe() const
{
return false;
}
virtual mb2_iconv_converter *clone() const
{
return new mb2_iconv_converter(*this);
}
uint32_t to_unicode(char const *&begin,char const *end)
{
if(begin == end)
return incomplete;
unsigned char seq0 = *begin;
uint32_t index = (*first_byte_table_)[seq0];
if(index == illegal)
return illegal;
if(index != incomplete) {
begin++;
return index;
}
else if(begin+1 == end)
return incomplete;
open(to_utf_,utf32_encoding(),encoding_.c_str());
// maybe illegal or may be double byte
char inseq[3] = { static_cast<char>(seq0) , begin[1], 0};
char *inbuf = inseq;
size_t insize = 3;
uint32_t result[2] = { illegal, illegal };
size_t outsize = 8;
char *outbuf = reinterpret_cast<char*>(result);
call_iconv(to_utf_,&inbuf,&insize,&outbuf,&outsize);
if(outsize == 0 && insize == 0 && result[1]==0 ) {
begin+=2;
return result[0];
}
return illegal;
}
uint32_t from_unicode(uint32_t cp,char *begin,char const *end)
{
if(cp == 0) {
if(begin!=end) {
*begin = 0;
return 1;
}
else {
return incomplete;
}
}
open(from_utf_,encoding_.c_str(),utf32_encoding());
uint32_t codepoints[2] = {cp,0};
char *inbuf = reinterpret_cast<char *>(codepoints);
size_t insize = sizeof(codepoints);
char outseq[3] = {0};
char *outbuf = outseq;
size_t outsize = 3;
call_iconv(from_utf_,&inbuf,&insize,&outbuf,&outsize);
if(insize != 0 || outsize > 1)
return illegal;
size_t len = 2 - outsize ;
size_t reminder = end - begin;
if(reminder < len)
return incomplete;
for(unsigned i=0;i<len;i++)
*begin++ = outseq[i];
return len;
}
void open(iconv_t &d,char const *to,char const *from)
{
if(d!=(iconv_t)(-1))
return;
d=iconv_open(to,from);
}
static char const *utf32_encoding()
{
union { char one; uint32_t value; } test;
test.value = 1;
if(test.one == 1)
return "UTF-32LE";
else
return "UTF-32BE";
}
virtual int max_len() const
{
return 2;
}
private:
boost::shared_ptr<std::vector<uint32_t> > first_byte_table_;
std::string encoding_;
iconv_t to_utf_;
iconv_t from_utf_;
};
std::auto_ptr<util::base_converter> create_iconv_converter(std::string const &encoding)
{
std::auto_ptr<util::base_converter> cvt;
try {
cvt.reset(new mb2_iconv_converter(encoding));
}
catch(std::exception const &e) {
// Nothing to do, just retrun empty cvt
}
return cvt;
}
#else // no iconv
std::auto_ptr<util::base_converter> create_iconv_converter(std::string const &/*encoding*/)
{
std::auto_ptr<util::base_converter> cvt;
return cvt;
}
#endif
std::locale create_codecvt(std::locale const &in,std::string const &encoding,character_facet_type type)
{
std::auto_ptr<util::base_converter> cvt;
if(conv::impl::normalize_encoding(encoding.c_str())=="utf8")
cvt = util::create_utf8_converter();
else {
cvt = util::create_simple_converter(encoding);
if(!cvt.get())
cvt = create_iconv_converter(encoding);
}
return util::create_codecvt(in,cvt,type);
}
} // impl_posix
} // locale
} // boost
// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4