9 #ifndef ADOBE_UNICODE_HPP 10 #define ADOBE_UNICODE_HPP 18 #include <boost/cstdint.hpp> 19 #include <boost/utility/enable_if.hpp> 31 #if !defined(ADOBE_NO_DOCUMENTATION) 37 {
enum { value =
sizeof(T) == 1 }; };
43 {
enum { value =
sizeof(T) == 2 }; };
49 {
enum { value =
sizeof(T) == 4 }; };
54 struct is_utf8_iterator_type
55 {
enum { value = is_utf8_type<typename std::iterator_traits<I>::value_type>::value }; };
60 struct is_utf16_iterator_type
61 {
enum { value = is_utf16_type<typename std::iterator_traits<I>::value_type>::value }; };
66 struct is_utf32_iterator_type
67 {
enum { value = is_utf32_type<typename std::iterator_traits<I>::value_type>::value }; };
71 namespace implementation {
77 const unsigned char to_utf32_pivot_1_k(128);
78 const unsigned char to_utf32_pivot_2_k(192);
79 const unsigned char to_utf32_pivot_3_k(224);
80 const unsigned char to_utf32_pivot_4_k(240);
81 const unsigned char to_utf32_pivot_5_k(248);
82 const unsigned char to_utf32_pivot_6_k(252);
83 const unsigned char to_utf32_pivot_7_k(254);
85 const boost::uint32_t to_utf8_pivot_1_k(1UL << 7);
86 const boost::uint32_t to_utf8_pivot_2_k(1UL << 11);
87 const boost::uint32_t to_utf8_pivot_3_k(1UL << 16);
88 const boost::uint32_t to_utf8_pivot_4_k(1UL << 21);
89 const boost::uint32_t to_utf8_pivot_5_k(1UL << 26);
91 const boost::uint16_t to_utf16_surrogate_pivot_k(65535);
92 const boost::uint16_t utf16_high_surrogate_front_k(0xd800);
93 const boost::uint16_t utf16_high_surrogate_back_k(0xdbff);
94 const boost::uint16_t utf16_low_surrogate_front_k(0xdc00);
95 const boost::uint16_t utf16_low_surrogate_back_k(0xdfff);
104 template <std::
size_t NumBytes>
struct utf8_header_t { };
105 template <>
struct utf8_header_t<0> {
static const char value =
'\x80'; };
107 template <>
struct utf8_header_t<2> {
static const char value =
'\xC0'; };
108 template <>
struct utf8_header_t<3> {
static const char value =
'\xE0'; };
109 template <>
struct utf8_header_t<4> {
static const char value =
'\xF0'; };
110 template <>
struct utf8_header_t<5> {
static const char value =
'\xF8'; };
111 template <>
struct utf8_header_t<6> {
static const char value =
'\xFC'; };
115 template <
char Mask,
typename BinaryInteger>
116 inline char add_mask(BinaryInteger code)
117 {
return static_cast<char>(code | Mask); }
119 template <std::
size_t NumBytes,
bool Header,
typename BinaryInteger>
120 inline char utf8_add_mask(BinaryInteger code)
121 {
return add_mask<utf8_header_t<Header ? NumBytes : 0>::value>(code); }
125 inline char utf8_add_mask_0_false(boost::uint32_t code)
127 return utf8_add_mask<0,false>(code);
132 template<
char Mask,
typename BinaryInteger>
133 inline char strip_mask(BinaryInteger code)
134 {
return static_cast<char>(code & ~Mask); }
136 template <std::
size_t NumBytes,
bool Header,
typename BinaryInteger>
137 inline char utf8_strip_mask(BinaryInteger code)
138 {
return strip_mask<utf8_header_t<Header ? NumBytes : 0>::value>(code); }
142 template <std::
size_t Position>
143 inline boost::uint32_t promote_fragment(
char fragment)
144 {
return boost::uint32_t(fragment << ((Position - 1) * 6)); }
147 inline boost::uint32_t promote_fragment<1>(
char fragment)
148 {
return boost::uint32_t(fragment); }
151 inline boost::uint32_t promote_fragment<0>(char);
155 template <std::
size_t Position>
156 inline char demote_fragment(boost::uint32_t fragment)
157 {
return char((fragment >> ((Position - 1) * 6)) & 0x0000003F); }
160 inline char demote_fragment<1>(boost::uint32_t fragment)
161 {
return char(fragment & 0x0000003F); }
164 inline char demote_fragment<0>(boost::uint32_t);
167 inline char demote_fragment_1(boost::uint32_t fragment)
169 return demote_fragment<1>(fragment);
175 template <std::
size_t ByteCount,
bool Header = true>
176 struct demotion_engine_t
178 template <
typename OutputIterator>
179 inline OutputIterator operator () (boost::uint32_t code, OutputIterator i)
181 *i = utf8_add_mask<ByteCount, Header>(demote_fragment<ByteCount>(code));
185 return demotion_engine_t<ByteCount - 1,
false>()(code, i);
191 struct demotion_engine_t<1, false>
193 template <
typename OutputIterator>
194 inline OutputIterator operator () (boost::uint32_t code, OutputIterator i)
196 *i = utf8_add_mask_0_false(demote_fragment_1(code));
204 template <std::
size_t ByteCount,
bool Header = true>
205 struct promotion_engine_t
207 template <
typename InputIterator>
208 inline boost::uint32_t operator () (InputIterator& first, InputIterator last)
216 char stripped(utf8_strip_mask<ByteCount, Header>(n));
217 boost::uint32_t shifted(promote_fragment<ByteCount>(stripped));
222 throw std::runtime_error(
"unicode: utf32 conversion ran out of input");
224 return shifted | promotion_engine_t<ByteCount - 1,
false>()(first, last);
229 struct promotion_engine_t<1, false>
231 template <
typename InputIterator>
232 inline boost::uint32_t operator () (InputIterator& first, InputIterator)
234 boost::uint32_t result(promote_fragment<1>(utf8_strip_mask<0, false>(*first)));
244 template <
typename InputIterator,
typename DestInteger>
245 typename boost::enable_if<is_utf16_iterator_type<InputIterator>, InputIterator>::type
246 to_utf32 (InputIterator first, InputIterator last, DestInteger& result)
248 if (first == last)
return first;
250 boost::uint16_t code(static_cast<boost::uint16_t>(*first));
254 if (code >= implementation::utf16_high_surrogate_front_k &&
255 code <= implementation::utf16_high_surrogate_back_k)
260 throw std::runtime_error(
"unicode: utf16 high surrogate found without low surrogate");
262 boost::uint16_t low(static_cast<boost::uint16_t>(*first));
264 assert (low >= implementation::utf16_low_surrogate_front_k &&
265 low <= implementation::utf16_low_surrogate_back_k);
269 result = (code - implementation::utf16_high_surrogate_front_k) * 0x400 +
270 (low - implementation::utf16_low_surrogate_front_k) + 0x10000;
272 else if (code >= implementation::utf16_low_surrogate_front_k &&
273 code <= implementation::utf16_low_surrogate_back_k)
274 {
throw std::runtime_error(
"unicode: utf16 low surrogate found without high surrogate"); }
276 { result =
static_cast<DestInteger
>(code); }
283 template <
typename InputIterator,
typename DestInteger>
284 typename boost::enable_if<is_utf8_iterator_type<InputIterator>, InputIterator>::type
285 to_utf32 (InputIterator first, InputIterator last, DestInteger& result)
290 unsigned char n(static_cast<unsigned char>(*first));
292 if (n < implementation::to_utf32_pivot_1_k)
293 { result =
static_cast<DestInteger
>(n); ++first; }
294 else if (n < implementation::to_utf32_pivot_2_k)
295 {
throw std::runtime_error(
"unicode: ill-defined utf8 (< 192)"); }
296 else if (n < implementation::to_utf32_pivot_3_k)
297 result = implementation::promotion_engine_t<2>()(first, last);
298 else if (n < implementation::to_utf32_pivot_4_k)
299 result = implementation::promotion_engine_t<3>()(first, last);
300 else if (n < implementation::to_utf32_pivot_5_k)
301 result = implementation::promotion_engine_t<4>()(first, last);
302 else if (n < implementation::to_utf32_pivot_6_k)
303 result = implementation::promotion_engine_t<5>()(first, last);
304 else if (n < implementation::to_utf32_pivot_7_k)
305 result = implementation::promotion_engine_t<6>()(first, last);
307 {
throw std::runtime_error(
"unicode: ill-defined utf8 (>= 254)"); }
314 template <
typename InputIterator,
typename DestInteger>
315 typename boost::enable_if<is_utf32_iterator_type<InputIterator>, InputIterator>::type
316 to_utf32 (InputIterator first, InputIterator last, DestInteger& result)
341 template <
typename T,
343 typename boost::enable_if<is_utf32_type<T>, O>::type
346 if (code < implementation::to_utf8_pivot_1_k)
347 { *output =
static_cast<char>(code); ++output; }
348 else if (code < implementation::to_utf8_pivot_2_k)
349 output = implementation::demotion_engine_t<2>()(code, output);
350 else if (code < implementation::to_utf8_pivot_3_k)
351 output = implementation::demotion_engine_t<3>()(code, output);
352 else if (code < implementation::to_utf8_pivot_4_k)
353 output = implementation::demotion_engine_t<4>()(code, output);
354 else if (code < implementation::to_utf8_pivot_5_k)
355 output = implementation::demotion_engine_t<5>()(code, output);
357 output = implementation::demotion_engine_t<6>()(code, output);
369 template <
typename T,
371 typename boost::enable_if<is_utf16_type<T>, O>::type
374 return value_to_utf8(static_cast<boost::uint32_t>(code), output);
384 template <
typename T,
386 typename boost::enable_if<is_utf8_type<T>, O>::type
401 template <
typename I,
403 typename boost::enable_if<is_utf16_iterator_type<I>, O>::type
406 while (first != last)
408 boost::uint32_t result;
425 template <
typename I,
427 typename boost::enable_if<is_utf32_iterator_type<I>, O>::type
430 if (first == last)
return output;
432 typedef typename std::iterator_traits<I>::value_type value_type;
434 adobe::for_each(first, last, boost::bind(&value_to_utf8<value_type, O>, _1, boost::ref(output)));
446 template <
typename I,
448 typename boost::enable_if<is_utf8_iterator_type<I>, O>::type
461 template <
typename T,
463 typename boost::enable_if<is_utf32_type<T>, O>::type
466 if (code <= implementation::to_utf16_surrogate_pivot_k)
468 *output =
static_cast<boost::uint16_t
>(code);
472 *output =
static_cast<boost::uint16_t
>((code - 0x10000) / 0x400 + implementation::utf16_high_surrogate_front_k);
476 *output =
static_cast<boost::uint16_t
>((code - 0x10000) % 0x400 + implementation::utf16_low_surrogate_front_k);
488 template <
typename I,
490 typename boost::enable_if<is_utf8_iterator_type<I>, O>::type
493 while (first != last)
495 boost::uint32_t result;
511 template <
typename I,
513 typename boost::enable_if<is_utf16_iterator_type<I>, O>::type
524 template <
typename I>
525 inline typename boost::enable_if<is_utf8_iterator_type<I>, boost::uint16_t>::type
528 boost::uint32_t result;
532 return static_cast<boost::uint16_t
>(result);
546 template <
typename I,
550 boost::uint32_t result;
552 while (first != last)
569 template <
typename I>
572 boost::uint32_t result;
boost::enable_if< is_utf32_type< T >, O >::type value_to_utf16(T code, O output)
boost::enable_if< is_utf16_iterator_type< I >, O >::type to_utf8(I first, I last, O output)
boost::uint32_t to_utf32(I first, I last)
OutputIterator copy(const InputRange &range, OutputIterator result)
copy implementation
O to_utf32(I first, I last, O output)
void for_each(InputIterator first, InputIterator last, UnaryFunction f)
for_each implementation
boost::enable_if< is_utf8_iterator_type< I >, O >::type to_utf16(I first, I last, O output)
boost::enable_if< is_utf32_type< T >, O >::type value_to_utf8(T code, O output)