【数据结构】二叉搜索树

什么是二叉搜索树

二叉搜索树又称二叉排序树，它或者是一棵空树，或者是具有以下性质的二叉树。

若它的左子树不为空，则左子树上所有节点的值都小于根节点的值
若它的右子树不为空，则右子树上所有节点的值都大于根节点的值
它的左右子树也分别为二叉搜索树

假如有一个数组int a [] = {5,3,4,1,7,8,2,6,0,9};，我们将它放入二叉搜索树中，则树的结果如下图所示。
并且当我们对二叉搜索树进行中序遍历后，得到的是升序序列。
在这里插入图片描述

二叉搜索树的数据结构

树的结点

主要包含三个属性

结点的左指针
结点的右指针
结点的值

template<class K>
struct BSTreeNode
{
    BSTreeNode<K>* _left;  //左指针
	BSTreeNode<K>* _right; //右指针
	K _key;                //结点的值
 
	BSTreeNode(const K& key)
		:_left(nullptr)
		, _right(nullptr)
		, _key(key)
	{}
};

树

主要包含树的根，和一些使用方法。

template<class K>
class BSTree
{
	typedef BSTreeNode<K> Node;

public:
	bool insert(const K& key);

	void inOrder();
	
	bool find(const K& key);

	bool erase(const K& key);

private:
	Node* _root = nullptr;
};

插入操作

如果树根为空，则创建新的结点，使新结点，等于根结点。
如果根不为空，则先找到应该存在的位置
如果已经存在该值，则返回false，表示无法插入，已经存在该值。
如果不存在该值，找到该值应该存在的位置，创建新结点，让父母指向自己，返回true。

bool insert(const K& key)
{
	if (_root == nullptr)
	{
		_root = new Node(key);
		return true;
	}
	
	Node* parent = nullptr;
	Node* cur = _root;
	while (cur)
	{
		if (cur->_key < key)
		{
			parent = cur;
			cur = cur->_right;
		}
		else if (cur->_key > key)
		{
			parent = cur;
			cur = cur->_left;
		}
		else
		{
			return false;
		}
	}

	cur = new Node(key);
	if (parent->_key < key)
	{
		parent->_right = cur;
	}
	else
	{
		parent->_left = cur;
	}
	return true;
}

遍历操作

递归中序遍历。

void inOrder()
{
	_inOrder(_root);
}

void _inOrder(Node* root)
{
	if (root == nullptr)
		return;

	_inOrder(root->_left);
	cout << root->_key << " ";
	_inOrder(root->_right);
}

查找操作

需要传参关键字key。
在这里插入图片描述

bool find(const K& key)
{
	Node* cur = _root;
	while (cur)
	{
		if (cur->_key < key)
		{
			cur = cur->_right;
		}
		else if (cur->_key > key)
		{
			cur = cur->_left;
		}
		else
		{
			return true;
		}
	}

	return false;
}

删除操作

首先查找元素是否在二叉搜索树中，如果不存在，则返回, 否则要删除的结点可能分下面四种情况：

a. 要删除的结点无孩子结点
b. 要删除的结点只有左孩子结点
c. 要删除的结点只有右孩子结点
d. 要删除的结点有左、右孩子结点

看起来有待删除节点有4中情况，实际情况a可以与情况b或者c合并起来，因此真正的删除过程如下：

情况b：删除该结点且使被删除节点的双亲结点指向被删除节点的左孩子结点
情况c：删除该结点且使被删除节点的双亲结点指向被删除结点的右孩子结点
情况d：在它的右子树中寻找中序下的第一个结点(关键码最小)，用它的值填补到被删除节点中，再来处理该结点的删除问题。

bool erase(const K& key)
{
	Node* parent = nullptr;
	Node* cur = _root;

	while (cur)
	{
		if (cur->_key < key)
		{
			parent = cur;
			cur = cur->_right;
		}
		else if (cur->_key > key)
		{
			parent = cur;
			cur = cur->_left;
		}
		else
		{
			//找到了开始删除

			//如果该结点左儿子为空
			if (cur->_left == nullptr)
			{
				if (parent->_right == cur)
				{
					parent->_right = cur->_right;
				}
				else
				{
					parent->_left = cur->_right;
				}
				delete cur;
			}
			//如果结点的右儿子为空
			else if (cur->_right == nullptr)
			{
				if (parent->_left == cur)
				{
					parent->_left = cur->_left;
				}
				else
				{
					parent->_right = cur->_left;
				}
				delete cur;
			}
			//左右都不为空
			else
			{
				//找要删除的结点的右子树的最小结点进行替代删除
				Node* rightMinParent = cur;
				Node* rightMin = cur->_right;
				while (rightMin->_left)
				{
					rightMinParent = rightMin;
					rightMin = rightMin->_left;
				}

				//替代删除
				cur->_key = rightMin->_key;
				
				if (rightMin == rightMinParent->_left)
					rightMinParent->_left = rightMin->_right;
				else
					rightMinParent->_right = rightMin->_right;

				delete rightMin;
			}
			return true;
		}
	}
	//如果找不到该结点
	return false;
}

搜索二叉树的应用

分为K模型和V模型

Key模型

K模型即只有key作为关键码，结构中只需要存储Key即可，关键码即为需要搜索到的值。比如：给一个单词word，判断该单词是否拼写正确，
具体方式如下：

以单词集合中的每个单词作为key，构建一棵二叉搜索树
在二叉搜索树中检索该单词是否存在，存在则拼写正确，不存在则拼写错误。

Key/Value模型

KV模型：每一个关键码key，都有与之对应的值Value，即<Key, Value>的键值对。该种方式在现实生活中非常常见：
比如英汉词典就是英文与中文的对应关系，通过英文可以快速找到与其对应的中文，英文单词与其对应的中文<word, chinese>就构成一种键值对；
再比如统计单词次数，统计成功后，给定单词就可快速找到其出现的次数，单词与其出现次数就是<word, count>就构成一种键值对。
比如：实现一个简单的英汉词典dict，可以通过英文找到与其对应的中文，具体实现方式如下：

<单词，中文含义>为键值对构造二叉搜索树，注意：二叉搜索树需要比较，键值对比较时只比较Key
查询英文单词时，只需给出英文单词，就可快速找到与其对应的key

Key模型整体代码实现

#pragma once
//二叉搜索树 K模型
#include <iostream>
using namespace std;

template<class K>
struct BSTreeNode
{
	BSTreeNode(const K& key):_left(nullptr),_right(nullptr),_key(key){}
	BSTreeNode<K>* _left;
	BSTreeNode<K>* _right;

	K _key;
};

template<class K>
class BSTree
{
	typedef BSTreeNode<K> Node;

public:
	bool insert(const K& key)
	{
		if (_root == nullptr)
		{
			_root = new Node(key);
			return true;
		}
		
		Node* parent = nullptr;
		Node* cur = _root;
		while (cur)
		{
			if (cur->_key < key)
			{
				parent = cur;
				cur = cur->_right;
			}
			else if (cur->_key > key)
			{
				parent = cur;
				cur = cur->_left;
			}
			else
			{
				return false;
			}
		}

		cur = new Node(key);
		if (parent->_key < key)
		{
			parent->_right = cur;
		}
		else
		{
			parent->_left = cur;
		}
		return true;
	}

	

	void inOrder()
	{
		_inOrder(_root);
	}

	bool find(const K& key)
	{
		Node* cur = _root;
		while (cur)
		{
			if (cur->_key < key)
			{
				cur = cur->_right;
			}
			else if (cur->_key > key)
			{
				cur = cur->_left;
			}
			else
			{
				return true;
			}
		}

		return false;
	}


	bool erase(const K& key)
	{
		Node* parent = nullptr;
		Node* cur = _root;

		while (cur)
		{
			if (cur->_key < key)
			{
				parent = cur;
				cur = cur->_right;
			}
			else if (cur->_key > key)
			{
				parent = cur;
				cur = cur->_left;
			}
			else
			{
				//找到了开始删除

				//如果该结点左儿子为空
				if (cur->_left == nullptr)
				{
					if (parent->_right == cur)
					{
						parent->_right = cur->_right;
					}
					else
					{
						parent->_left = cur->_right;
					}
					delete cur;
				}
				//如果结点的右儿子为空
				else if (cur->_right == nullptr)
				{
					if (parent->_left == cur)
					{
						parent->_left = cur->_left;
					}
					else
					{
						parent->_right = cur->_left;
					}
					delete cur;
				}
				//左右都不为空
				else
				{
					Node* rightMinParent = cur;
					Node* rightMin = cur->_right;
					while (rightMin->_left)
					{
						rightMinParent = rightMin;
						rightMin = rightMin->_left;
					}

					//替代删除
					cur->_key = rightMin->_key;
					
					if (rightMin == rightMinParent->_left)
						rightMinParent->_left = rightMin->_right;
					else
						rightMinParent->_right = rightMin->_right;

					delete rightMin;
				}
				return true;
			}
		}
		//如果找不到
		return false;
	}


private:
	Node* _root = nullptr;

	void _inOrder(Node* root)
	{
		if (root == nullptr)
			return;

		_inOrder(root->_left);
		cout << root->_key << " ";
		_inOrder(root->_right);
	}
};


void TestBSTree()
{
	BSTree<int> tree;
	int a[] = { 5,3,4,1,7,8,2,6,0,9 };
	for (auto x : a)
	{
		tree.insert(x);
	}
	tree.inOrder();
	tree.erase(7);
	cout << endl;
	tree.inOrder();
}

Key/Value模型整体代码实现

#pragma once
#pragma once

//二叉搜索树		KV模型

#include <iostream>
#include <string>
using namespace std;


template<class K,class V>
struct BSTreeNode
{
	BSTreeNode(const K& key,const V& value) :_left(nullptr)
		, _right(nullptr), _key(key),_value(value) {}
	
	BSTreeNode<K,V>* _left;
	BSTreeNode<K,V>* _right;

	K _key;
	V _value;
};

template<class K,class V>
class BSTree
{
	typedef BSTreeNode<K,V> Node;

public:
	bool insert(const K& key,const V& value)
	{
		if (_root == nullptr)
		{
			_root = new Node(key,value);
			return true;
		}

		Node* parent = nullptr;
		Node* cur = _root;
		while (cur)
		{
			if (cur->_key < key)
			{
				parent = cur;
				cur = cur->_right;
			}
			else if (cur->_key > key)
			{
				parent = cur;
				cur = cur->_left;
			}
			else
			{
				return false;
			}
		}

		cur = new Node(key,value);
		if (parent->_key < key)
		{
			parent->_right = cur;
		}
		else
		{
			parent->_left = cur;
		}
		return true;
	}



	void inOrder()
	{
		_inOrder(_root);
	}

	Node* find(const K& key)
	{
		Node* cur = _root;
		while (cur)
		{
			if (cur->_key < key)
			{
				cur = cur->_right;
			}
			else if (cur->_key > key)
			{
				cur = cur->_left;
			}
			else
			{
				return cur;
			}
		}

		return nullptr;
	}


	bool erase(const K& key)
	{
		Node* parent = nullptr;
		Node* cur = _root;

		while (cur)
		{
			if (cur->_key < key)
			{
				parent = cur;
				cur = cur->_right;
			}
			else if (cur->_key > key)
			{
				parent = cur;
				cur = cur->_left;
			}
			else
			{
				//找到了开始删除

				//如果该结点左儿子为空
				if (cur->_left == nullptr)
				{
					if (parent->_right == cur)
					{
						parent->_right = cur->_right;
					}
					else
					{
						parent->_left = cur->_right;
					}
					delete cur;
				}
				//如果结点的右儿子为空
				else if (cur->_right == nullptr)
				{
					if (parent->_left == cur)
					{
						parent->_left = cur->_left;
					}
					else
					{
						parent->_right = cur->_left;
					}
					delete cur;
				}
				//左右都不为空
				else
				{
					Node* rightMinParent = cur;
					Node* rightMin = cur->_right;
					while (rightMin->_left)
					{
						rightMinParent = rightMin;
						rightMin = rightMin->_left;
					}

					//替代删除
					cur->_key = rightMin->_key;

					if (rightMin == rightMinParent->_left)
						rightMinParent->_left = rightMin->_right;
					else
						rightMinParent->_right = rightMin->_right;

					delete rightMin;
				}
				return true;
			}
		}
		//如果找不到
		return false;
	}


private:
	Node* _root = nullptr;

	void _inOrder(Node* root)
	{
		if (root == nullptr)
			return;

		_inOrder(root->_left);
		cout << root->_key << " " << root->_value << endl;
		_inOrder(root->_right);
	}
};


void TestBSTree()
{
	BSTree<string, string> dict;
	dict.insert("sort", "排序");
	dict.insert("string", "字符串");
	dict.insert("tree", "树");
	dict.insert("insert", "插入");

	string str;
	while (cin >> str)
	{
		BSTreeNode<string, string>* ret = dict.find(str);
		if (ret)
		{
			cout << ret->_value << endl;
		}
		else
		{
			cout << "没找到" << endl;
		}
	}
}

二叉搜索树的性能分析

最优情况下，二叉搜索树为完全二叉树，其平均比较次数为： $log_2N$ ，时间复杂度为O(log)
最差情况下，二叉搜索树退化为单支树，其平均比较次数为： $\frac{N}{2}$