C++ 字符编码转换类

记录一下C++ 编码转换的函数:

1 #pragma once
 2 #include "afx.h"
 3 
 4 
 5 #define DEFAULT_CODE 0
 6 #define CHINESE_SIMPLIFIED 1
 7 #define   CHINESE_TRADITIONAL 2
 8 
 9 class CChineseConvertor:
10 //public CObject
11 {
12 public:
13     CChineseConvertor(void);
14     ~CChineseConvertor(void);
15     LPSTR Big52GBKSimplified(char * szText);
16     LPSTR Big52GBKTraditional(char * szText);
17     LPSTR GBK2Big5(char * szText);
18     LPSTR GBKSimplified2GBKTraditional(char * szSimplified);
19     LPSTR GBKTraditional2GBKSimplified(char * szTraditional);
20     LPWSTR UTF82UNICODE(char*   utf8str);
21     LPSTR UNICODE2UTF8(LPCWSTR  strText);
22 
23     char *m_pszUnknown;
24     // 转换到Unicode
25     LPWSTR ToUnicode(char * szSource, int nEncoding);
26     LPSTR ToMultiByte(LPCWSTR szSource, int nEncoding);
27 };
1 #include "stdafx.h"
  2 #include "Coding.h"
  3 
  4 
  5  
  6 CChineseConvertor::CChineseConvertor(void)
  7 {
  8   m_pszUnknown = new char[2];
  9   m_pszUnknown[0]=' ';
 10   m_pszUnknown[1]=0;
 11 }
 12 
 13 CChineseConvertor::~CChineseConvertor(void)
 14 {
 15   delete[] m_pszUnknown;
 16   m_pszUnknown = NULL;
 17 }
 18 
 19 //big5 to GBK_简体
 20 LPSTR CChineseConvertor::Big52GBKSimplified(char * szText)
 21 {
 22   int nLength;
 23   wchar_t *pBuffer;
 24   LPSTR pResult;
 25   int nResultLength;
 26 
 27   nLength=MultiByteToWideChar(950,0,szText,strlen(szText),NULL,0);
 28   pBuffer=new wchar_t[nLength+1];
 29   MultiByteToWideChar(950,0,(LPCSTR)szText,strlen(szText),(LPWSTR)pBuffer,nLength);
 30   pBuffer[nLength]=0;
 31 
 32   nResultLength=WideCharToMultiByte(936,0,pBuffer,nLength,NULL,0,m_pszUnknown,FALSE);
 33   pResult=new char[nResultLength+1];
 34   WideCharToMultiByte(936,0,(LPWSTR)pBuffer,nLength,(LPSTR)pResult,nResultLength,"  ",FALSE);
 35   pResult[nResultLength]=0;
 36 
 37   return GBKTraditional2GBKSimplified(pResult);
 38   
 39 }
 40 
 41 //big5 to GBK_繁体
 42 LPSTR CChineseConvertor::Big52GBKTraditional(char * szText)
 43 {
 44   int nLength;
 45   wchar_t *pBuffer;
 46   LPSTR pResult;
 47   int nResultLength;
 48 
 49   nLength=MultiByteToWideChar(950,0,szText,strlen(szText),NULL,0);
 50   pBuffer=new wchar_t[nLength+1];
 51   MultiByteToWideChar(950,0,(LPCSTR)szText,strlen(szText),(LPWSTR)pBuffer,nLength);
 52   pBuffer[nLength]=0;
 53 
 54   nResultLength=WideCharToMultiByte(936,0,pBuffer,nLength,NULL,0,m_pszUnknown,FALSE);
 55   pResult=new char[nResultLength+1];
 56   WideCharToMultiByte(936,0,(LPWSTR)pBuffer,nLength,(LPSTR)pResult,nResultLength,"  ",FALSE);
 57   pResult[nResultLength]=0;
 58 
 59   return pResult;
 60 }
 61 
 62 //GBK_简体 to GBK_繁体
 63 LPSTR CChineseConvertor::GBKTraditional2GBKSimplified(char * szTraditional)
 64 {
 65   LCID dwLocale;
 66   WORD wLangID;
 67   wLangID=MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED);
 68   dwLocale=MAKELCID(wLangID,SORT_CHINESE_PRC);
 69 
 70   int nLength;
 71   char *pBuffer;
 72   nLength=LCMapStringA(dwLocale,LCMAP_SIMPLIFIED_CHINESE,(LPCSTR)szTraditional,strlen(szTraditional),NULL,0);
 73   pBuffer=new char[nLength+1];
 74   pBuffer[nLength]=0;
 75   LCMapStringA(dwLocale,LCMAP_SIMPLIFIED_CHINESE,(LPCSTR)szTraditional,strlen(szTraditional),pBuffer,nLength);
 76   return pBuffer;
 77 }
 78 
 79 //GBK_简体 to big5
 80 LPSTR CChineseConvertor::GBK2Big5(char * szText)
 81 {
 82   LPSTR szGBKTraditional;
 83   int nLength;
 84   wchar_t *pBuffer;
 85   LPSTR pResult;
 86   int nResultLength;
 87 
 88   szGBKTraditional=GBKSimplified2GBKTraditional(szText);
 89   nLength=MultiByteToWideChar(936,0,szGBKTraditional,strlen(szGBKTraditional),NULL,0);
 90   pBuffer=new wchar_t[nLength+1];
 91   MultiByteToWideChar(936,0,(LPCSTR)szGBKTraditional,strlen(szGBKTraditional),(LPWSTR)pBuffer,nLength);
 92   pBuffer[nLength]=0;
 93 
 94   nResultLength=WideCharToMultiByte(950,0,pBuffer,nLength,NULL,0,m_pszUnknown,FALSE);
 95   pResult=new char[nResultLength+1];
 96   WideCharToMultiByte(950,0,(LPWSTR)pBuffer,nLength,(LPSTR)pResult,nResultLength,"  ",FALSE);
 97   pResult[nResultLength]=0;
 98 
 99   return pResult;
100 }
101 
102 //将GBK的简体转换到GBK繁体
103 LPSTR CChineseConvertor::GBKSimplified2GBKTraditional(char * szSimplified)
104 {
105   LCID dwLocale;
106   WORD wLangID;
107   wLangID=MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED);
108   dwLocale=MAKELCID(wLangID,SORT_CHINESE_PRC);
109 
110   int nLength;
111   char *pBuffer;
112   nLength=LCMapStringA(dwLocale,LCMAP_TRADITIONAL_CHINESE,(LPCSTR)szSimplified,strlen(szSimplified),NULL,0);
113   pBuffer=new char[nLength+1];
114   pBuffer[nLength]=0;
115   LCMapStringA(dwLocale,LCMAP_TRADITIONAL_CHINESE,(LPCSTR)szSimplified,strlen(szSimplified),pBuffer,nLength);
116   return pBuffer;
117 }
118 
119 // 转换到Unicode
120 LPWSTR CChineseConvertor::ToUnicode(char * szSource, int nEncoding)
121 {
122   int nLength;
123   wchar_t *pBuffer;
124   int nLanguage;
125 
126   if(nEncoding==CHINESE_SIMPLIFIED)
127     nLanguage=936;
128   else
129     if(nEncoding==CHINESE_TRADITIONAL)
130       nLanguage=950;
131     else
132       nLanguage= CP_ACP;
133 
134   nLength=MultiByteToWideChar(nLanguage,0,szSource,strlen(szSource),NULL,0);
135   pBuffer=new wchar_t[nLength+1];
136   MultiByteToWideChar(nLanguage,0,(LPCSTR)szSource,strlen(szSource),(LPWSTR)pBuffer,nLength);
137   pBuffer[nLength]=0;
138 
139   return pBuffer;
140 }
141 
142 //转换到多字节
143 LPSTR CChineseConvertor::ToMultiByte(LPCWSTR szSource, int nEncoding)
144 {
145   int nLength;
146   char *pBuffer;
147   int nLanguage;
148 
149   if(nEncoding==CHINESE_SIMPLIFIED)
150     nLanguage=936;
151   else
152     if(nEncoding==CHINESE_TRADITIONAL)
153       nLanguage=950;
154     else
155       nLanguage= CP_ACP;
156 
157   nLength=WideCharToMultiByte(nLanguage,0,szSource,wcslen(szSource),NULL,0,m_pszUnknown,FALSE);
158 
159   pBuffer=new char[nLength+1];
160   WideCharToMultiByte(nLanguage,0,szSource,wcslen(szSource),pBuffer,nLength,m_pszUnknown,FALSE);
161   pBuffer[nLength]=0;
162 
163   return pBuffer;
164 
165 }
166 
167 //UTF8转换到UNICODE
168 LPWSTR CChineseConvertor::UTF82UNICODE(char*   utf8str) 
169 {
170     int nLength;
171     wchar_t *pBuffer;
172     
173     nLength=MultiByteToWideChar(CP_UTF8,0,utf8str,strlen(utf8str),NULL,0);
174     pBuffer=new wchar_t[nLength+1];
175     MultiByteToWideChar(CP_UTF8,0,(LPCSTR)utf8str,strlen(utf8str),(LPWSTR)pBuffer,nLength);
176     pBuffer[nLength]=0;
177     
178     return pBuffer;
179 }
180 
181 //UNICODE转换到UTF8
182 LPSTR CChineseConvertor::UNICODE2UTF8(LPCWSTR  strText) 
183 {
184     int len;  
185     len = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR)strText, -1, NULL, 0, NULL, NULL);  
186     char *szUtf8=new char[2*(len + 1)];
187     memset(szUtf8, 0, len * 2 + 2);  //UTF8最多的字节数最多是一个UINICODE字符所占字节数的两倍
188     WideCharToMultiByte (CP_UTF8, 0, (LPCWSTR)strText, -1, szUtf8, len, NULL,NULL);
189     return szUtf8;
190     
191 }

原文链接: https://www.cnblogs.com/george-cw/p/5742713.html

欢迎关注

微信关注下方公众号,第一时间获取干货硬货;公众号内回复【pdf】免费获取数百本计算机经典书籍

原创文章受到原创版权保护。转载请注明出处:https://www.ccppcoding.com/archives/238158

非原创文章文中已经注明原地址,如有侵权,联系删除

关注公众号【高性能架构探索】,第一时间获取最新文章

转载文章受原作者版权保护。转载请注明原作者出处!

(0)
上一篇 2023年2月13日 下午5:41
下一篇 2023年2月13日 下午5:41

相关推荐