【动态规划】【字符串】C++算法：10正则表达式匹配

作者推荐

涉及知识点

LeetCode10:正则表达式匹配

给你一个字符串 s 和一个字符规律 p，请你来实现一个支持 ‘.’ 和 ‘’ 的正则表达式匹配。
‘.’ 匹配任意单个字符
'’ 匹配零个或多个前面的那一个元素
所谓匹配，是要涵盖整个字符串 s的，而不是部分字符串。
示例 1：
输入：s = “aa”, p = “a”
输出：false
解释：“a” 无法匹配 “aa” 整个字符串。
示例 2:
输入：s = “aa”, p = “a*”
输出：true
解释：因为 ‘’ 代表可以匹配零个或多个前面的那一个元素, 在这里前面的元素就是 ‘a’。因此，字符串 “aa” 可被视为 ‘a’ 重复了一次。
示例 3：
输入：s = “ab”, p = "."
输出：true
解释："." 表示可匹配零个或多个（'’）任意字符（‘.’）。
提示：
1 <= s.length <= 20
1 <= p.length <= 20
s 只包含从 a-z 的小写字母。
p 只包含从 a-z 的小写字母，以及字符 . 和 *。
保证每次出现字符 * 时，前面都匹配到有效的字符

动态规划

时间复杂度😮(nnm) n=s.length m = p.length


动态规划的状态表示	p[0,i)和s[0,j)能完全匹配，记录所有(i,j)
动态规划的状态转移方程	如果p[i+1]是*，则p[i,i+2)能否匹配s[j,x)；否则p[i]能否匹配s[j]
动态规划的的初始化	{0,0}
动态规划的填表顺序	从小到枚举i
动态规划的返回值	是否存在状态(p.length,s.lenght)

滚动哈希集合

转移状态时：只需要读取j1的相关状态，写人j1+1的状态。我们用两个哈希来表示状态：pre表示j1 相关状态,dp 表示j2的相关状态，然后swap。

分类讨论


.*	[min(pre),s.length)
字母x*	iPre, 如果s[iPre,pPre+y]都是x ,则[iPre+1,iPre+1+y]都是合法状态 iPre取自pre
字母x	s[j]==x，则j+1也是合法状态
.	s[j]存在，j+1就是合法状态

代码

核心代码

class Solution {
public:
	bool isMatch(string s, string p) {
		m_c = s.length();
		unordered_set<int> pre = { 0 };
		for (int i = 0 ; i < p.length(); i++ )
		{	
			const auto& ch = p[i];
			if ('*' == ch)
			{
				continue;
			}
			unordered_set<int> dp;
			if ((i + 1 < p.length()) && ('*' == p[i + 1]))
			{
				if ('.' == ch)
				{
					int iMin = INT_MAX;
					for (const auto& iPre : pre)
					{
						iMin = min(iMin, iPre);
					}
					for (; iMin <= m_c; iMin++)
					{
						dp.insert(iMin);
					}
				}
				else
				{
					dp = pre;
					for (const auto& iPre : pre)
					{
						int j = iPre;
						while (j < m_c)
						{
							if (s[j] == ch)
							{
								dp.insert(++j);
							}
							else
							{
								break;
							}
						}
					}
				}
			}
			else
			{
				for (const auto& iPre : pre)
				{
					if (iPre  < m_c)
					{
						if (('.' == ch) || (s[iPre] == ch))
						{
							dp.insert(iPre + 1);
						}
					}
				}
			}			
			pre.swap(dp);
		}
		return pre.count(m_c);
	}
	int m_c;
};

测试用例

template<class T>
void Assert(const T& t1, const T& t2)
{
	assert(t1 == t2);
}

template<class T>
void Assert(const vector<T>& v1, const vector<T>& v2)
{
	if (v1.size() != v2.size())
	{
		assert(false);
		return;
	}
	for (int i = 0; i < v1.size(); i++)
	{
		Assert(v1[i], v2[i]);
	}
}


int main()
{
	string s, p;

	{
		Solution sln;
		s = "aa", p = "a";
		auto res = sln.isMatch(s, p);
		Assert(false, res);
	}
	{
		Solution sln;
		s = "aa", p = "aa";
		auto res = sln.isMatch(s, p);
		Assert(true, res);
	}
	{
		Solution sln;
		s = "a", p = "a*";
		auto res = sln.isMatch(s, p);
		Assert(true, res);
	}
	{
		Solution sln;
		s = "aa", p = "a*";
		auto res = sln.isMatch(s, p);
			Assert(true, res);
	}
	{
		Solution sln;
		s = "aaa", p = "a*";
		auto res = sln.isMatch(s, p);
		Assert(true, res);
	}
	{
		Solution sln;
		s = "ab", p = ".*";
		auto res = sln.isMatch(s, p);
		Assert(true, res);
	}
	{
		Solution sln;
		s = "aab", p = "c*a*b";
		auto res = sln.isMatch(s, p);
		Assert(true, res);
	}
	{
		Solution sln;
		s = "aaaaaaaaaaaaab", p = "a*a*a*a*a*a*a*a*a*a*";
		auto res = sln.isMatch(s, p);
		Assert(false, res);
	}
}

动态规划的优化

时间复杂度😮(nm)
优化动态规划的转移方程，改成枚举s。也要处理匹配多个的问题。比如：连续多个不匹配任何字符。
不用滚动哈希集合了。


动态规划的状态表示	p[0,i)和s[0,j)能完全匹配，dp[i][j]为true;否则为false
动态规划的状态转移方程	比较复杂下文讨论
动态规划的的初始化	dp[0][0]=ture，其它false dp[x][0]也要计算
动态规划的填表顺序	从小到枚举i
动态规划的返回值	dp[p.length][s.length]

如果p[i-1]是星号，只需要考虑两种情况：

匹配0个字符,dp[i][j] = dp[i-2][j]。
匹配n个字符，n>0。 dp[i][j] = dp[i][j-1]

注意
dp[0][x] x>0，无意义全部为false。
dp[x][0] x>0 如果p[0,x)全部是yyyy… ，则为true。 y表示.或字母，两个y可能不相同。
y* 必须处理号，不能处理y，否则如果以号结束的时候，会出错。

动态规划的无后效性

计算dp[i][j]的时候，用到了i,i-1,i-2,j,j-1。第一层循环从小到大枚举i，第二层循环从小到大枚举j。i小的先处理，i相等的,j小的先处理。

代码

class Solution {
public:
	bool isMatch(string s, string p) {
		m_r = p.length();
		m_c = s.length();
		vector<vector<bool>> dp(m_r+1, vector<bool>(m_c+1));
		dp[0][0] = true; 
		for (int i = 1; (i < m_r)&&('*'== p[i]); i+=2 )
		{
			dp[i + 1][0] = dp[i - 1][0];
		}
		for (int i = 1; i <= m_r; i++)
		{
			auto Match = [&p, &s](int i,int j) {return ('.' == p[i]) || (s[j] == p[i]); };
			if ((i < m_r) && ('*' == p[i]))
			{
				continue;//x* 在*号那处理
			}
			for (int j = 1; j <= m_c; j++)
			{	
				if ('*' == p[i-1])
				{
					if (i >= 2)
					{//匹配0个字符
						dp[i][j] = dp[i][j] | dp[i - 2][j];
					}
					if (!Match(i - 2, j-1))
					{
						continue;
					}
					dp[i][j] = dp[i][j] | dp[i][j-1];//dp[i][j-1] 的*号，可能匹配了0次，1次,2次...
				}
				else
				{
					if (!Match(i-1, j-1))
					{
						continue;
					}
					dp[i][j] = dp[i - 1][j - 1];
				}
			}

		}
		return dp[m_r][m_c];
	}
	int m_r, m_c;
};

2022年12月旧版

class Solution {
public:
bool isMatch(string s, string p) {
const int lenS = s.size();
const int lenP = p.size();
//dp[i][j]表示 p的前i个字符能否和s的前j个字符匹配
vector<vector> dp;
dp.assign(lenP + 1, vector(lenS + 1));
dp[0][0] = true;
for (int i = 1; i <= lenP; i++)
{
for (int j = 0; j <= lenS; j++)
{
if (‘’ == p[i-1])
{
if (dp[i -2][j ])
{//匹配0个字符
dp[i ][j ] = true;
}
if (0 == j)
{
continue;
}
if (IsSame(p[i - 2], s[j-1]))
{
//匹配一次和匹配多次
if (dp[i - 2][j] || dp[i ][j-1])
{
dp[i][j] = true;
}
}
}
if (0 == j)
{
continue;
}
if ((i < lenP) && ('’ == p[i ]))
{
//dp[i + 1 + 1][j + 1] != dp[i][j];
}
else
{
if (IsSame(p[i-1], s[j-1]) && dp[i-1][j-1] )
{
dp[i][j] = true;
}
}
}
}
return dp[lenP][lenS];
}
bool IsSame(const char& ch1, const char& ch2)
{
return (‘.’ == ch1) || (‘.’ == ch2) | (ch1 == ch2);
}
};

扩展阅读

视频课程

有效学习：明确的目标及时的反馈拉伸区（难度合适），可以先学简单的课程，请移步CSDN学院，听白银讲师（也就是鄙人）的讲解。
https://edu.csdn.net/course/detail/38771

如何你想快

速形成战斗了，为老板分忧，请学习C#入职培训、C++入职培训等课程
https://edu.csdn.net/lecturer/6176

我想对大家说的话
闻缺陷则喜是一个美好的愿望，早发现问题，早修改问题，给老板节约钱。
子墨子言之：事无终始，无务多业。也就是我们常说的专业的人做专业的事。
如果程序是一条龙，那算法就是他的是睛

测试环境

操作系统：win7 开发环境： VS2019 C++17
或者操作系统：win10 开发环境： VS2022 C++17
如无特殊说明，本算法用**C++**实现。