Bootstrap

C++容器——集合(unordered_set)

1. 简介

unordered_set是一种关联式容器,包含的key值唯一,不会进行排序;增加、修改和查询具有线性的时间复杂度,其存储结构为哈希表;

头文件和定义

#include <unordered_set>

template<
    class Key,
    class Hash = std::hash<Key>,
    class KeyEqual = std::equal_to<Key>,
    class Allocator = std::allocator<Key>
> class unordered_set;

2. 初始化

unordered_set的初始化方法如下所示

/*
* @brief unordered_set 
* @g++ -g unordered_set_main.cc -o d -std=c++11
* @author 
* @date 2023/03/27
*/

#include <iostream>
#include <unordered_set>
#include <string>

template<typename T>
void showInfo(T& t)
{
    for(const auto& au : t)
    {
        std::cout<<au<<"  ";
    }
    std::cout<<std::endl;
}


int main(int argc, char* argv[])
{
    std::unordered_set<std::string> un_s1;
    un_s1.emplace("c++");
    un_s1.emplace("c");
    un_s1.insert("java");
    un_s1.insert("Rust");
    showInfo(un_s1);

    //直接初始化
    std::unordered_set<std::string> un_s2{"student", "teacher", "boy", "girl", "class1", "class2", "score"};
    showInfo(un_s2);

    //拷贝初始化
    std::unordered_set<std::string> un_s3 = un_s1;
    showInfo(un_s3);

    return 0;
}

输出

Rust  java  c++  c
class2  class1  teacher  score  girl  boy  student
Rust  java  c++  c

3. 使用

3.1 元素访问

方法说明
iterator集合元素的遍历

unordered_set不支持下标访问

示例

int main(int argc, char* argv[])
{
    std::unordered_set<std::string> un_s1{"student", "teacher", "boy", "girl", "class1", "class2", "score"};
    auto iter = un_s1.begin();
    while(iter != un_s1.end())
    {
        std::cout<<*iter<<" ";
        iter++;
    }
    std::cout<<std::endl;    

    return 0;
}

输出

class2 class1 teacher score girl boy student

3.2 集合大小

方法说明
empty集合判空
size集合大小
max_size最大容量
clear清空unordered_set的大小
swap交换两个unordered_set的内容
int main(int argc, char* argv[])
{
    //empty size max_size clear swap
    std::unordered_set<std::string> un_s1{"student", "teacher", "boy", "girl", "class1", "class2", "score"};
    showInfo(un_s1);
    std::cout<<"size() is: "<<un_s1.size()<<"\n";
    std::cout<<"max_size() is: "<<un_s1.max_size()<<"\n\n";

    std::unordered_set<std::string> un_s2;
    un_s1.swap(un_s2);
    std::cout<<"[un_s1]"<<std::endl;
    showInfo(un_s1);
    std::cout<<"[un_s2]"<<std::endl;
    showInfo(un_s2);
    std::cout<<std::endl;

    un_s2.clear();
    if(un_s2.empty())
    {
        std::cout<<"empty\n";
    }
    else{
        std::cout<<"not empty\n";
    }    
}

输出

class2  class1  teacher  score  girl  boy  student
size() is: 7
max_size() is: 384307168202282325

[un_s1]

[un_s2]
class2  class1  teacher  score  girl  boy  student

empty

3.3 元素的修改

方法说明
=直接赋值
get_allocator返回内存分配器
insert插入元素
emplace插入元素
emplace_hint插入元素,需要添加位置
erase删除元素
erase_if(C++20)条件删除,通常和lambda一起使用
extract(C++17)元素的删除,可以返回该值
merge(C++17)unordered_set的合并

示例

int main(int argc, char* argv[])
{
    //operator=   get_allocator   insert emplace emplace_hint erase extract merge  
    std::unordered_set<std::string> un_s1{"c++"};
    un_s1.insert("c");
    un_s1.emplace("Rust");
    un_s1.emplace_hint(un_s1.begin(), "golang");
    showInfo(un_s1);
    std::cout<<std::endl;

    un_s1.erase(un_s1.begin());
    std::cout<<"[erase]"<<std::endl;
    showInfo(un_s1);
    std::cout<<std::endl;

    //extract
    auto re = un_s1.extract(un_s1.begin());    
    showInfo(un_s1);
    std::cout<<std::endl;

    //merge
    std::unordered_set<std::string> un_s2{"student", "teacher", "boy", "girl"};
    un_s1.merge(un_s2);
    showInfo(un_s1);
    showInfo(un_s2);   //输出空

    return 0;    
}

输出

Rust  c  golang  c++

[erase]
c  golang  c++

golang  c++

c++  golang  student  boy  girl  teacher

3.5 元素的操作

桶是内部的存储结构

方法说明
count计算unordered_set中元素出现的次数
find查找元素
equal_range返回的是一个范围 [lower_bound, upper_bound]
bucket_count返回桶的数量
max_bucket_count返回桶可以达到的最大数量
bucket_size(n)返回第n个桶中元素的数量
bucket返回元素在第几个桶中
load_factor返回桶的平均元素数
max_load_factor返回最大负载系数
rehash(n)设定桶的数目为n
reserve(n)设定桶的数目最少为n
hash_function返回一个唯一的大小为size_t的值
key_eq返回unordered_set使用的键等效项比较谓词

示例1

int main(int argc, char* argv[])
{
    std::unordered_set<std::string> un_s1{"c", "Rust", "golang"};
    showInfo(un_s1);
    auto iter = un_s1.find("c");
    std::cout<<*iter<<std::endl;
    std::cout<<"count() test "<<un_s1.count("golang")<<std::endl;

    //equal_range
    auto equal_iter = un_s1.equal_range("Rust");
    std::cout<<*equal_iter.first<<std::endl;
    std::cout<<*equal_iter.second<<std::endl;

    //bucket_count  max_bucket_count  bucket_size  bucket
    //桶
    std::cout<<"un_s1 bucket_count() value is: "<<un_s1.bucket_count()<<std::endl;       //返回当前桶的数量,从0到n-1
    std::cout<<"un_s1 max_bucket_count() value is: "<<un_s1.max_bucket_count()<<"\n\n";   //返回最大桶的数量

    std::cout<<"un_s1 "<<un_s1.bucket_size(1)<<"\n\n";       //返回桶1中元素的个数
    for(int32_t i = 0; i<un_s1.bucket_count(); i++)
    {
        std::cout<<"size of "<<i<<" bucket is: "<<un_s1.bucket_size(i)<<std::endl;
    }
    std::cout<<std::endl;

    std::cout<<"the location of Rust is: "<<un_s1.bucket("Rust")<<std::endl;       //返回"Rust"在第几个桶中

    return 0;    
}

输出

golang  Rust  c
c
count() test 1
Rust
c
un_s1 bucket_count() value is: 5
un_s1 max_bucket_count() value is: 384307168202282325

un_s1 0

size of 0 bucket is: 0
size of 1 bucket is: 0
size of 2 bucket is: 1
size of 3 bucket is: 1
size of 4 bucket is: 1

the location of Rust is: 3

示例2

int main(int argc, char* argv[])
{    
    
    std::unordered_set<std::string> un_s1{"c++", "c", "Rust", "golang", "linux", "matlab"};        
    showInfo(un_s1);
    std::cout<<"bucket_count is: "<<un_s1.bucket_count()<<std::endl;    
    std::cout<<"un_s1.size is: "<<un_s1.size()<<std::endl;

    //load_factor,返回桶的平均元素数 size()/bucket_count()
    std::cout<<"load_factor value is: "<<un_s1.load_factor()<<std::endl;

    //max_load_factor,返回最大负载系数
    std::cout<<"max_load_factor value is: "<<un_s1.max_load_factor()<<std::endl;

    //rehash,设定桶的数目
    un_s1.rehash(10);    
    std::cout<<"bucket_count after rehash is: "<<un_s1.bucket_count()<<std::endl;
    
    //reserve,设置桶的数目(最少包含20个)
    un_s1.reserve(20);   
    std::cout<<"bucket_count after reserve is: "<<un_s1.bucket_count()<<std::endl;
    

    //hash_function,返回一个唯一的大小为size_t的值
    auto au_hash = un_s1.hash_function();
    std::cout<<au_hash("c")<<std::endl;

    //key_eq,返回unordered_set使用的键等效项比较谓词
    auto au_key = un_s1.key_eq()("c","c");
    
    if(au_key)
    {
        std::cout<<"equal"<<std::endl;
    }
    else{
        std::cout<<"not equal"<<std::endl;
    }        

    return 0;    
}

输出

matlab  Rust  linux  golang  c  c++
bucket_count is: 7
un_s1.size is: 6
load_factor value is: 0.857143
max_load_factor value is: 1
bucket_count after rehash is: 11
bucket_count after reserve is: 23
10959529184379665549
equal

3.6 迭代器

迭代器最好结合STL中的算法一起使用;迭代器的返回值最好用auto接收;

迭代器的类型包括:iterator和const_iterator

方法说明
begin返回unordered_set的头元素(迭代器),其值可修改
end返回unordered_set的尾元素(迭代器),其值可修改
cbegin返回set的头元素(迭代器),其值不可修改,const属性
cend返回set的尾元素(迭代器),其值不可修改,const属性

迭代器的辅助函数
辅助函数的使用需要包含头文件

#include <iterator>
方法说明
advance移动迭代器的位置
distance计算两个迭代器的距离
begin返回容器第一个元素的迭代器
end返回容器最后一个元素的迭代器
prev迭代器向前移动一个元素
next迭代器向后移动一个元素

示例

int main(int argc, char* argv[])
{        
    std::unordered_set<std::string> un_s1{"c++", "c", "Rust", "golang", "linux", "matlab"};        
    showInfo(un_s1);
    std::cout<<std::endl;

    auto au = un_s1.begin();
    while (au != un_s1.end())
    {
        std::cout<<*au<<" ";
        au++;
    }
    std::cout<<std::endl;

    auto au_begin = un_s1.begin();
    std::advance(au_begin,2);
    std::cout<<*au_begin<<std::endl;

    au_begin = un_s1.begin();
    auto au_end = un_s1.end();

    std::cout<<std::distance(au_begin,au_end)<<std::endl;   
    
    return 0;    
}

输出

matlab  Rust  linux  golang  c  c++

matlab Rust linux golang c c++
linux
6
;