转码整理, 资料来源于网络
charset.h
#pragma once
#include <iostream>
#include <string>
std::string UnicodeToAnsi(const std::wstring& unicode);
std::wstring AnsiToUnicode(const std::string& ansi);
std::string AnsiToUtf8(const std::string& strSrc);
std::string Utf8ToAnsi(const std::string& strSrc);
std::string UnicodeToUtf8(const std::wstring& wstrSrc);
std::wstring Utf8ToUnicode(const std::string& strSrc);
std::string GBKToUtf8(const std::string& gbk);
std::string Utf8ToGBK(const std::string& utf8);
std::wstring GB2312ToUnicode(const std::string& gb2312);
std::string UnicodeToGB2312(const std::wstring& unicode);
std::wstring BIG5ToUnicode(const std::string& big5);
std::string UnicodeToBIG5(const std::wstring& unicode);
std::string FBIG5ToGB2312(const std::string& big5);
std::string GB2312ToFBIG5(const std::string gb2312);
bool IsUTF8(const void* pBuffer, long size);
main.cpp
#include "charset.h"
void showHex(const char* bytes, int len) {
for (int i = 0; i < len; i++) {
printf("%02x ", (unsigned char)bytes[i]);
}
}
void showHex(std::string charset, std::string str) {
printf("%10s: ", charset.data());
showHex(str.data(), str.size());
printf("\n");
}
void showHex(std::string charset, std::wstring str) {
printf("%10s: ", charset.data());
showHex((char*)str.data(), 2 * str.size());
printf("\n");
}
int main(int argc, char* argv[])
{
std::wstring wstr(L"中abc国");
std::string str("中abc国");
std::string ansi;
std::string utf8;
std::string gbk;
std::wstring unicode;
showHex("unicode", wstr);
showHex("ansi", str);
ansi = UnicodeToAnsi(wstr); showHex("ansi", ansi);
unicode = AnsiToUnicode(ansi); showHex("unicode", unicode);
utf8 = AnsiToUtf8(str); showHex("utf8", utf8);
ansi = Utf8ToAnsi(utf8); showHex("ansi", ansi);
utf8 = UnicodeToUtf8(wstr); showHex("utf8", utf8);
unicode = Utf8ToUnicode(utf8); showHex("unicode", unicode);
gbk = Utf8ToGBK(utf8); showHex("gbk", gbk);
utf8 = GBKToUtf8(gbk); showHex("utf8", utf8);
getchar();
return 0;
}
charset.cpp
#inchude "charset.h"
#include <Windows.h>
std::string UnicodeToAnsi(const std::wstring& unicode)
{
LPCWCH ptr = unicode.c_str();
/** 分配目标空间, 一个16位Unicode字符最多可以转为4个字节int size = static_cast<int>( wstrSrc.size() * 4 + 10 );*/
int size = WideCharToMultiByte(CP_THREAD_ACP, 0, ptr, -1, NULL, 0, NULL, NULL);
std::string strRet(size, 0);
int len = WideCharToMultiByte(CP_THREAD_ACP, 0, ptr, -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
return strRet;
}
std::wstring AnsiToUnicode(const std::string& ansi)
{
LPCCH ptr = ansi.c_str();
int size = MultiByteToWideChar(CP_ACP, 0, ptr, -1, NULL, NULL);
std::wstring wstrRet(size, 0);
int len = MultiByteToWideChar(CP_ACP, 0, ptr, -1, (LPWSTR)wstrRet.c_str(), size);
return wstrRet;
}
std::string AnsiToUtf8(const std::string& ansi)
{
LPCCH ptr = ansi.c_str();
/* 分配目标空间, 长度为 Ansi 编码的两倍 */
int size = MultiByteToWideChar(CP_ACP, 0, ptr, -1, NULL, NULL);
std::wstring wstrTemp(size, 0);
int len = MultiByteToWideChar(CP_ACP, 0, ptr, -1, (LPWSTR)wstrTemp.c_str(), size);
return UnicodeToUtf8(wstrTemp);
}
std::string Utf8ToAnsi(const std::string& utf8)
{
std::wstring wstrTemp = Utf8ToUnicode(utf8);
LPCWCH ptr = wstrTemp.c_str();
int size = WideCharToMultiByte(CP_ACP, 0, ptr, -1, NULL, 0, NULL, NULL);
std::string strRet(size, 0);
int len = WideCharToMultiByte(CP_ACP, 0, ptr, -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
return strRet;
}
std::string UnicodeToUtf8(const std::wstring& unicode)
{
/* 分配目标空间, 一个16位Unicode字符最多可以转为4个字节 */
LPCWCH ptr = unicode.c_str();
int size = WideCharToMultiByte(CP_UTF8, 0, ptr, -1, NULL, 0, NULL, NULL);
std::string strRet(size, 0);
int len = WideCharToMultiByte(CP_UTF8, 0, ptr, -1, (char*)strRet.c_str(), size, NULL, NULL);
return strRet;
}
std::wstring Utf8ToUnicode(const std::string& utf8)
{
LPCCH ptr = utf8.c_str();
int size = MultiByteToWideChar(CP_UTF8, 0, ptr, -1, NULL, NULL);
std::wstring wstrRet(size, 0);
int len = MultiByteToWideChar(CP_UTF8, 0, ptr, -1, (LPWSTR)wstrRet.c_str(), size);
return wstrRet;
}
std::string GBKToUtf8(const std::string& gbk)
{
return AnsiToUtf8(gbk);
}
std::string Utf8ToGBK(const std::string& utf8)
{
return Utf8ToAnsi(utf8);
}
bool IsUTF8(const void* pBuffer, long size)
{
bool isUTF8 = true;
unsigned char* start = (unsigned char*)pBuffer;
unsigned char* end = (unsigned char*)pBuffer + size;
while (start < end)
{
if (*start < 0x80) { /*(10000000): 值小于0x80的为ASCII字符*/
start++;
}
else if (*start < (0xC0)) { /*(11000000): 值介于0x80与0xC0之间的为无效UTF-8字符*/
isUTF8 = false;
break;
}
else if (*start < (0xE0)) { /*(11100000): 此范围内为2字节UTF-8字符 */
if (start >= end - 1) {
break;
}
if ((start[1] & (0xC0)) != 0x80) {
isUTF8 = false;
break;
}
start += 2;
}
else if (*start < (0xF0)) { /**(11110000): 此范围内为3字节UTF-8字符*/
if (start >= end - 2) {
break;
}
if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80) {
isUTF8 = false;
break;
}
start += 3;
}
else {
isUTF8 = false;
break;
}
}
return isUTF8;
}
//GB2312 转换成 Unicode
std::wstring GB2312ToUnicode(const std::string& gb2312)
{
UINT nCodePage = 936; //GB2312
int size = MultiByteToWideChar(nCodePage, 0, gb2312.c_str(), -1, NULL, 0);
std::wstring wstrRet(size, 0);
MultiByteToWideChar(nCodePage, 0, gb2312.c_str(), -1, (LPWSTR)wstrRet.c_str(), size);
return wstrRet;
}
//BIG5 转换成 Unicode
std::wstring BIG5ToUnicode(const std::string& big5)
{
UINT nCodePage = 950; //BIG5
int size = MultiByteToWideChar(nCodePage, 0, big5.c_str(), -1, NULL, 0);
std::wstring wstrRet(size, 0);
MultiByteToWideChar(nCodePage, 0, big5.c_str(), -1, (LPWSTR)wstrRet.c_str(), size);
return wstrRet;
}
//Unicode 转换成 GB2312
std::string UnicodeToGB2312(const std::wstring& unicode)
{
UINT nCodePage = 936; //GB2312
int size = WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, NULL, 0, NULL, NULL);
std::string strRet(size, 0);
WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
return strRet;
}
//Unicode 转换成 BIG5
std::string UnicodeToBIG5(const std::wstring& unicode)
{
UINT nCodePage = 950; //BIG5
int size = WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, NULL, 0, NULL, NULL);
std::string strRet(size, 0);
WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
return strRet;
}
//繁体中文BIG5 转换成 简体中文 GB2312
std::string FBIG5ToGB2312(const std::string& big5)
{
LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);
std::wstring unicode = BIG5ToUnicode(big5);
std::string gb2312 = UnicodeToGB2312(unicode);
int size = LCMapStringA(lcid, LCMAP_SIMPLIFIED_CHINESE, gb2312.c_str(), -1, NULL, 0);
std::string strRet(size, 0);
LCMapStringA(0x0804, LCMAP_SIMPLIFIED_CHINESE, gb2312.c_str(), -1, (LPSTR)strRet.c_str(), size);
return strRet;
}
//简体中文 GB2312 转换成 繁体中文BIG5
std::string GB2312ToFBIG5(const std::string gb2312)
{
LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);
int size = LCMapStringA(lcid, LCMAP_TRADITIONAL_CHINESE, gb2312.c_str(), -1, NULL, 0);
std::string strRet(size, 0);
LCMapStringA(lcid, LCMAP_TRADITIONAL_CHINESE, gb2312.c_str(), -1, (LPSTR)strRet.c_str(), size);
std::wstring unicode = GB2312ToUnicode(strRet);
std::string big5 = UnicodeToBIG5(unicode);
return big5;
}
原文链接: https://www.cnblogs.com/baigoogledu/p/7098135.html
欢迎关注
微信关注下方公众号,第一时间获取干货硬货;公众号内回复【pdf】免费获取数百本计算机经典书籍
原创文章受到原创版权保护。转载请注明出处:https://www.ccppcoding.com/archives/256120
非原创文章文中已经注明原地址,如有侵权,联系删除
关注公众号【高性能架构探索】,第一时间获取最新文章
转载文章受原作者版权保护。转载请注明原作者出处!