/******************************************************************************
UTF-16 to UTF-8 converter
By Ahmed habib (Prmigo)
*******************************************************************************/
#include <iostream>
using namespace std;
void ToUTF8(char16_t *str) {
while (*str) {
unsigned int codepoint = 0x0;
//-------(1) UTF-16 to codepoint -------
if (*str <= 0xD7FF) {
codepoint = *str;
str++;
} else if (*str <= 0xDBFF) {
unsigned short highSurrogate = (*str - 0xD800) * 0x400;
unsigned short lowSurrogate = *(str + 1) - 0xDC00;
codepoint = (lowSurrogate | highSurrogate) + 0x10000;
str += 2;
}
//-------(2) Codepoint to UTF-8 -------
if (codepoint <= 0x007F) {
unsigned char hex[2] = { 0 };
hex[0] = (char)codepoint;
hex[1] = 0;
cout << std::hex << std::uppercase << "(1Byte) " << (unsigned short)hex[0] << endl;
} else if (codepoint <= 0x07FF) {
unsigned char hex[3] = { 0 };
hex[0] = ((codepoint >> 6) & 0x1F) | 0xC0;
hex[1] = (codepoint & 0x3F) | 0x80;
hex[2] = 0;
cout << std::hex << std::uppercase << "(2Bytes) " << (unsigned short)hex[0] << "-" << (unsigned short)hex[1] << endl;
} else if (codepoint <= 0xFFFF) {
unsigned char hex[4] = { 0 };
hex[0] = ((codepoint >> 12) & 0x0F) | 0xE0;
hex[1] = ((codepoint >> 6) & 0x3F) | 0x80;
hex[2] = ((codepoint) & 0x3F) | 0x80;
hex[3] = 0;
cout << std::hex << std::uppercase << "(3Bytes) " << (unsigned short)hex[0] << "-" << (unsigned short)hex[1] << "-" << (unsigned short)hex[2] << endl;
} else if (codepoint <= 0x10FFFF) {
unsigned char hex[5] = { 0 };
hex[0] = ((codepoint >> 18) & 0x07) | 0xF0;
hex[1] = ((codepoint >> 12) & 0x3F) | 0x80;
hex[2] = ((codepoint >> 6) & 0x3F) | 0x80;
hex[3] = ((codepoint) & 0x3F) | 0x80;
hex[4] = 0;
cout << std::hex << std::uppercase << "(4Bytes) " << (unsigned short)hex[0] << "-" << (unsigned short)hex[1] << "-" << (unsigned short)hex[2] << "-" << (unsigned short)hex[3] << endl;
}
}
}
int main() {
// An Arabic, English and Chinese character.
ToUTF8(u"شa中");
return 0;
}