前言
1.C++的string对中文的查找替换之类的基本操作并不友好,如果要对中文进行操作,要把中文转成宽字符(wstring)来解决,因为中文字符长度不确定的,在unicode中每个中文为2个字节,而字符串中有时还可能有英文数字字符等,这些只占一个字节1个字节,查找的过程很容易返回的不是找到的位置。
2.如果要操作中文字符串,比较好的办法先把string转成wstring,进行操作查找匹配操作之后,再转回来。
3.这里我定了个类,把它们之间的互相转换都封装成函数。
代码
Chinese.h
#pragma once
#include <string>
#include <iostream>
class Chinese
{
public:
Chinese();
~Chinese();
//char*转换为wchar_t*
wchar_t* MBCSToUnicode(wchar_t * buff, const char * str);
//wchar*转换为char*
char* unicodeToMBCS(char* buff, const wchar_t* str);
//string转wstring
std::wstring strToWstr(std::string &input);
std::string wstrToStr(std::wstring &wstr);
char* wstrToChar(std::wstring &wstr);
char* wstrToChar(const wchar_t* wstr);
};
Chinese.cpp
#include "Chinese.h"
Chinese::Chinese()
{
}
Chinese::~Chinese()
{
}
wchar_t* Chinese::MBCSToUnicode(wchar_t* buff, const char* str)
{
wchar_t * wp = buff;
char * p = (char *)str;
while (*p)
{
if (*p & 0x80)
{
*wp = *(wchar_t *)p;
p++;
}
else {
*wp = (wchar_t)*p;
}
wp++;
p++;
}
*wp = 0x0000;
return buff;
}
char* Chinese::unicodeToMBCS(char* buff, const wchar_t* str)
{
wchar_t * wp = (wchar_t *)str;
char * p = buff, *tmp;
while (*wp)
{
tmp = (char *)wp;
if (*wp & 0xFF00)
{
*p = *tmp;
p++; tmp++;
*p = *tmp;
p++;
}
else
{
*p = *tmp;
p++;
}
wp++;
}
*p = 0x00;
return buff;
}
std::wstring Chinese::strToWstr(std::string &input)
{
size_t len = input.size();
wchar_t * b = (wchar_t *)malloc((len + 1) * sizeof(wchar_t));
MBCSToUnicode(b, input.c_str());
std::wstring r(b);
free(b);
return r;
}
char* Chinese::wstrToChar(std::wstring &wstr)
{
char* re = wstrToChar(wstr.c_str());
return re;
}
char* Chinese::wstrToChar(const wchar_t* wstr)
{
int len = wcslen(wstr);
char * buff = (char *)malloc((len * 2 + 1) * sizeof(char));
char* re = unicodeToMBCS(buff, wstr);
free(buff);
return re;
}
std::string Chinese::wstrToStr(std::wstring &wstr)
{
size_t len = wstr.size();
char * b = (char *)malloc((2 * len + 1) * sizeof(char));
unicodeToMBCS(b, wstr.c_str());
std::string r(b);
free(b);
return r;
}
main.cpp
#include <iostream>
#include <string>
#include "Chinese.h"
int main()
{
//输入层:接收char*输入,并将其转换为wchar*
std::string input = "于老师的k父亲王老爷子是蒙古的海军司令!yes";
std::string temp = "王";
Chinese ch;
std::wstring w_str = ch.strToWstr(input);
std::wstring w_tem = ch.strToWstr(temp);
int index = w_str.find(w_tem);
std::cout << index << std::endl;
return 0;
}