C++--哈希表的实现及unorder_set和unorder

1.什么是哈希表

哈希表是一种数据结构，用来存放数据的，哈希表存放的数据是无序的，可以实现增删查，当时不能修改数据。可以不经过任何比较，一次直接从表中得到要搜索的元素。如果构造一种存储结构，通过某种函数(hashFunc)使元素的存储位置与它的关键码之间能够建立一一映射的关系，那么在查找时通过该函数可以很快找到该元素。该方式即为哈希(散列)方法，哈希方法中使用的转换函数称为哈希(散列)函数，构造出来的结构称为哈希表(Hash Table)(或者称散列表)。
unordered_map是存储<key, value>键值对的关联式容器，其允许通过keys快速的索引到与其对应的value。
在unordered_map中，键值通常用于惟一地标识元素，而映射值是一个对象，其内容与此键关联。键和映射值的类型可能不同。
在内部,unordered_map没有对<kye, value>按照任何特定的顺序排序, 为了能在常数范围内找到key所对应的value，unordered_map将相同哈希值的键值对放在相同的桶中。
unordered_map容器通过key访问单个元素要比map快，但它通常在遍历元素子集的范围迭代方面效率较低。
unordered_maps实现了直接访问操作符(operator[])，它允许使用key作为参数直接访问value。
它的迭代器至少是前向迭代器。

unorder_set是用来存储<key>的关联式容器，可以用来快速的查找和去重，在内部是按照无序的方式排列的，它的迭代器和unorder_map的使用相同，其他的特性和unorder_map的特性差不多。

2.哈希冲突

对于两个数据元素的关键字$k_i$和 $k_j$(i != j)，有$k_i$ != $k_j$，但有：Hash($k_i$) ==
Hash($k_j$)，即：不同关键字通过相同哈希哈数计算出相同的哈希地址，该种现象称为哈希冲突或哈希碰撞。把具有不同关键码而具有相同哈希地址的数据元素称为“同义词”。

解决哈希冲突：

引起哈希冲突的一个原因可能是：哈希函数设计不够合理。
哈希函数设计原则：
哈希函数的定义域必须包括需要存储的全部关键码，而如果散列表允许有m个地址时，其值
域必须在0到m-1之间；哈希函数计算出来的地址能均匀分布在整个空间中；哈希函数应该比较简单。

解决哈希冲突两种常见的方法是：闭散列和开散列

闭散列：

线性探测：从发生冲突的位置开始，依次向后探测，直到寻找到下一个空位置为止。

通过哈希函数获取待插入元素在哈希表中的位置，如果该位置中没有元素则直接插入新元素，如果该位置中有元素发生哈希冲突，使用线性探测找到下一个空位置，插入新元素。

开散列：

开散列法又叫链地址法(开链法)，首先对关键码集合用散列函数计算散列地址，具有相同地
址的关键码归于同一子集合，每一个子集合称为一个桶，各个桶中的元素通过一个单链表链
接起来，各链表的头结点存储在哈希表中。

开散列与闭散列比较：
应用链地址法处理溢出，需要增设链接指针，似乎增加了存储开销。事实上：
由于开地址法必须保持大量的空闲空间以确保搜索效率，如二次探查法要求装载因子a <=
0.7，而表项所占空间又比指针大的多，所以使用链地址法反而比开地址法节省存储空间。

3.哈希表的实现

闭散列的实现：

#include <iostream>
#include <vector>
#include <string>
//usng namespace std;
using std::cout;
using std:: endl;
using std::cin;
//闭散列
namespace open_address
{//处理不是整形的字符template<class T>struct DefaultHashFunc{size_t operator()(const T& t){return size_t(value(t));}};//特殊化处理字符串template<>struct DefaultHashFunc<string>{const size_t operator()(const string& t){//KeyofT value;size_t num=1;for (auto e : t){num *= 131;num +=e;}return num;}};//枚举三种状态enum State{Empty,Exist,Delete};//哈希节点template<class T>struct HashDateNode{private:T _kv;State _state=Empty;};
//哈希表template<class K, class T,class KeyofT,class DefaultHashFunc>class HashTable{typedef HashDateNode<T> Node;HashTable()//初始化，开辟10个空间{_Date.resize(10);}//插入数值bool insert(const T& Date){DefaultHashFunc Func;//处理字符串问题KeyofT value;//处理set和map的数值问题函数//扩容if (n * 10 > _Date.size() * 7)//负载因子{int newsize = _Date.size() * 2;vector<Node*> newHT;newHT.resize(newsize);//开辟空间for (int i = 0; i < _Date.size(); i++){if (_Date._state == Exist)//讲旧数据写入新空间中{newHT.insert(KetofT(_Date));}_Date.swap(newHT._Date);//交换数据}}//插入哈希表size_t hashi =Func(value(Date))% _Date.size();while (_Date[hashi]._state == Exist)//存在就++{++hashi;hashi %= _Date.size();}_Date[hashi]._kv = Date;_Date[hashi]._state = Exist;n++;//存放个数加一return true;}//查找函数vector<Node*> Find(const K& t){DefaultHashFunc Func;KeyofT value;size_t hashi = Func(value(t)) % _Date.size();while (_Date[hashi]._state != Empty){if (_Date[hashi]._state == Exist&& value(_Date[hashi]) == t){return   _Date[hashi];}++hashi;hashi %= _Date.size();}return nullptr;}//删除bool erash(const K& key){vector<Node*> ret = Find(key);//查找是否存在if (ret){ret._state= Exist;n--;return true;}return false;}private:vector<Node*> _Date;//数据size_t n;//存储有效个数};
}

开散列的实现

#include <iostream>
#include <string>
#include <vector>
using namespace std;
//处理字符
template<class T>
struct DefaultHashFunc
{size_t operator()(const T& t){return size_t(t);}
};template<>
struct DefaultHashFunc<string>
{size_t operator()(const string& t){size_t num = 0;for (auto e : t){num *= 131;num += e;}return num;}
};
//哈希桶，闭散列
namespace hash_bucket
{template<class T> struct HashDateNode{T _Date;HashDateNode<T>* _next;HashDateNode(const T& Date):_Date(Date),_next(nullptr){}};template<class K, class T, class KeyofT, class HashFunc>class HashDate;//前置声明//迭代器//template<class K,class T,class KeyofT ,class HashFunc>//旧版template<class K, class T, class Ptr, class Ref, class KeyofT, class HashFunc>//新版struct HashIterator{typedef HashDateNode<T> Node;typedef HashIterator<K,T, Ptr, Ref,KeyofT, HashFunc> Self;typedef HashIterator<K, T, T*, T&, KeyofT, HashFunc> iterator;Node* _node;const HashDate<K, T, KeyofT, HashFunc>* _pht; //设置位const ，防止普通类型无法转化为const类型//初始化/*const HashIterator(Node* node, HashDate<K, T, KeyofT, HashFunc>* pht):_node(node), _pht(pht){}*///const初始化HashIterator(Node* node, const HashDate<K, T, KeyofT, HashFunc>*  pht):_node(node), _pht(pht){}// 普通迭代器时，他是拷贝构造// const迭代器时，他是构造函数HashIterator(const iterator& it)//防止set类型的const类型与非const类型的转换（模板不同）:_node(it._node), _pht(it._pht){}Ref operator*(){return _node->_Date;}Ptr operator->(){return &_node->_Date;}bool operator!=(const Self& s){return _node != s._node;}bool operator==(const Self& s){return _node == s._node;}Self& operator++(){if (_node->_next){_node = _node->_next;}else{HashFunc Func;KeyofT value;size_t hashi = Func(value(_node->_Date)) % _pht->_table.size(); //该用仿函数//从下一个哈希桶查找hashi++;while (hashi < _pht->_table.size()){if (_pht->_table[hashi])//不为空{_node = _pht->_table[hashi];return *this;}else//为空{++hashi;}}_node = nullptr;}return *this;}};template<class K, class T, class KeyofT, class HashFunc = DefaultHashFunc<K>>class HashDate{typedef HashDateNode<T> Node;// //友元template<class K, class T, class Ptr, class Ref, class KeyofT, class HashFunc>friend struct HashIterator;//可以使用迭代器public:typedef HashIterator<K, T, T*, T&, KeyofT, HashFunc> iterator;typedef HashIterator<K, T, const T*, const T&, KeyofT, HashFunc> const_iterator;iterator begin(){for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];if (cur){return iterator(cur,this);}}return iterator(nullptr, this);}const_iterator begin() const{for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];if (cur){return iterator(cur, this);}}return iterator(nullptr, this);}iterator end() {return iterator(nullptr, this);}const_iterator end() const{return iterator(nullptr, this);}HashDate(){_table.resize(10, nullptr);}~HashDate(){for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_table[i] = nullptr;}}pair<iterator,bool> Insert(const T& date){KeyofT value;HashFunc Func;iterator it = Find(value(date));//it!=end()if(it != end()){return make_pair(it,false);}if (n == _table.size())//扩容{int newsize = _table.size() * 2;vector<Node*> newHash;newHash.resize(newsize);for (size_t i = 0; i < _table.size(); i++)//旧数据{Node* cur = _table[i];while (cur){Node* next = cur->_next;size_t hashi = Func(value(cur->_Date))% newsize;cur->_next = newHash[hashi];newHash[hashi] = cur;cur = next;}_table[i] = nullptr;}_table.swap(newHash);//交换数据}//插入size_t hashi = Func(value(date))% _table.size();//改变//Node* cur = _table[hashi];//尾插//Node* newNode = new Node(date);//while (cur)//{//	cur = cur->_next;//}//cur = newNode;//头插Node* newNode = new Node(date);//新节点newNode->_next = _table[hashi];//头结点_table[hashi] = newNode;//尾节点n++;return make_pair(iterator(newNode,this),true);}//查找iterator Find(const K& key)//Find为K类型的只用一个数值，T类型的在map中为pair<K,T>类型{HashFunc Func;KeyofT value;size_t hashi = Func(key) % _table.size();//size_t hashi = Func(value(key)) % _table.size();//不能使用这个Node* cur = _table[hashi];while (cur){if (value(cur->_Date) == key)//?{return iterator(cur,this);}cur = cur->_next;}return iterator(nullptr,this);}//删除bool Erase(const K& key){HashFunc Func;KeyofT value;iterator ret=Find(key);if (ret == end())return true;size_t x = Func(value(key))% _table.size();Node* cur = _table[x];Node* prev = nullptr;while (cur){if (value(cur->_Date) == key){if (prev == nullptr){_table[x] = cur->_next;}else{prev->_next = cur->_next;}}prev = cur;cur = cur->_next;delete cur;return true;}return false;}//打印void Print(){for (int i = 10; i < _table.size(); i++){Node* cur = _table[i];while (cur){std::cout << cur->_Date.first<<" " << cur->_Date.second <<" " << std::endl;cur = cur->_next;}}std::cout << std::endl;}private:vector<Node*> _table; //存储数据size_t n=0; //记录容器中的个数};
}

4.Unorderset和Unordermap的实现

4.1 Unorderset

#pragma once
#include "HashTable.h"
using namespace hash_bucket;
//using namespace open_address;template<class K>
class Unorderset
{struct SetKeyofT{const K& operator()(const K& k){return k;}};
public:typedef typename hash_bucket::HashDate<K, K, SetKeyofT>::const_iterator iterator;typedef typename hash_bucket::HashDate<K, K, SetKeyofT>::const_iterator const_iterator;//typedef typename hash_bucket::HashDate<K, K, SetKeyofT> Hash;
public:const_iterator begin() const{return _ht.begin();}const_iterator end() const{return _ht.end();}pair<iterator, bool> insert(const K& k){return _ht.Insert(k);}iterator find(const K& kk){return _ht.Find(kk);}bool erase(const K& kk){return _ht.Erase(kk);}
private:HashDate<K, K, SetKeyofT> _ht;
};

4.2 Unordermap

#pragma once
#include "HashTable.h"
using namespace hash_bucket;
template<class K,class T>
class Unordermap
{struct MapkeyofT{const K& operator()(const pair<const K, T>& kt){return kt.first;}};
public:typedef typename hash_bucket::HashDate<K, pair<const K, T>, MapkeyofT>::iterator iterator;//普通迭代器typedef typename hash_bucket::HashDate<K, pair<const K, T>, MapkeyofT>::const_iterator const_iterator;//const迭代器//typedef typename hash_bucket::HashDate<K, pair<K, T>, MapkeyofT> Hash;iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}const_iterator begin() const{return _ht.begin();}const_iterator end() const{return _ht.end();}//插入pair<iterator, bool> insert(const pair<K,T>& kt){return _ht.Insert(kt);}//查找iterator find(const K& kk){return _ht.Find(kk);}//重载[]T& operator[](const K& kk){pair<iterator, bool> ret = insert(make_pair(kk, T()));return ret.first->second;}//删除bool eraser(const K& kk){return _ht.Erase(kk);}
private:HashDate<K, pair<const K, T>, MapkeyofT> _ht;
};

4.3 完整代码

Ps:具体实现中建议分开写

#pragma once
#include "HashTable.h"
using namespace hash_bucket;
using namespace open_address;//set实现
template<class K>
class Unorderset
{struct SetKeyofT{const K& operator()(const K& k){return k;}};
public:typedef typename hash_bucket::HashDate<K, K, SetKeyofT>::const_iterator iterator;typedef typename hash_bucket::HashDate<K, K, SetKeyofT>::const_iterator const_iterator;//typedef typename hash_bucket::HashDate<K, K, SetKeyofT> Hash;
public:const_iterator begin() const{return _ht.begin();}const_iterator end() const{return _ht.end();}pair<iterator, bool> insert(const K& k){return _ht.Insert(k);}iterator find(const K& kk){return _ht.Find(kk);}bool erase(const K& kk){return _ht.Erase(kk);}
private:HashDate<K, K, SetKeyofT> _ht;
};//map实现
template<class K,class T>
class Unordermap
{struct MapkeyofT{const K& operator()(const pair<const K, T>& kt){return kt.first;}};
public:typedef typename hash_bucket::HashDate<K, pair<const K, T>, MapkeyofT>::iterator iterator;//普通迭代器typedef typename hash_bucket::HashDate<K, pair<const K, T>, MapkeyofT>::const_iterator const_iterator;//const迭代器//typedef typename hash_bucket::HashDate<K, pair<K, T>, MapkeyofT> Hash;iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}const_iterator begin() const{return _ht.begin();}const_iterator end() const{return _ht.end();}//插入pair<iterator, bool> insert(const pair<K,T>& kt){return _ht.Insert(kt);}//查找iterator find(const K& kk){return _ht.Find(kk);}//重载[]T& operator[](const K& kk){pair<iterator, bool> ret = insert(make_pair(kk, T()));return ret.first->second;}//删除bool eraser(const K& kk){return _ht.Erase(kk);}
private:HashDate<K, pair<const K, T>, MapkeyofT> _ht;
};
//底层实现
#pragma once//防止头文件重复引用
#include <iostream>
#include <vector>
#include <string>
using namespace std;//处理不是整形的字符template<class T>struct DefaultHashFunc{size_t operator()(const T& t){return size_t(value(t));}};//特殊化处理字符串template<>struct DefaultHashFunc<string>{const size_t operator()(const string& t){//KeyofT value;size_t num=1;for (auto e : t){num *= 131;num +=e;}return num;}};
//开散列
namespace open_address
{//枚举三种状态enum State{Empty,Exist,Delete};//哈希节点template<class T>struct HashDateNode{private:T _kv;State _state=Empty;};
//哈希表template<class K, class T,class KeyofT,class DefaultHashFunc>class HashTable{typedef HashDateNode<T> Node;HashTable()//初始化，开辟10个空间{_Date.resize(10);}//插入数值bool insert(const T& Date){DefaultHashFunc Func;//处理字符串问题KeyofT value;//处理set和map的数值问题函数//扩容if (n * 10 > _Date.size() * 7)//负载因子{int newsize = _Date.size() * 2;vector<Node*> newHT;newHT.resize(newsize);//开辟空间for (int i = 0; i < _Date.size(); i++){if (_Date._state == Exist)//讲旧数据写入新空间中{newHT.insert(KetofT(_Date));}_Date.swap(newHT._Date);//交换数据}}//插入哈希表size_t hashi =Func(value(Date))% _Date.size();while (_Date[hashi]._state == Exist)//存在就++{++hashi;hashi %= _Date.size();}_Date[hashi]._kv = Date;_Date[hashi]._state = Exist;n++;//存放个数加一return true;}//查找函数vector<Node*> Find(const K& t){DefaultHashFunc Func;KeyofT value;size_t hashi = Func(value(t)) % _Date.size();while (_Date[hashi]._state != Empty){if (_Date[hashi]._state == Exist&& value(_Date[hashi]) == t){return   _Date[hashi];}++hashi;hashi %= _Date.size();}return nullptr;}//删除bool erash(const K& key){vector<Node*> ret = Find(key);//查找是否存在if (ret){ret._state= Exist;n--;return true;}return false;}private:vector<Node*> _Date;//数据size_t n;//存储有效个数};
}//哈希桶，闭散列
namespace hash_bucket
{template<class T> struct HashDateNode{T _Date;HashDateNode<T>* _next;HashDateNode(const T& Date):_Date(Date),_next(nullptr){}};template<class K, class T, class KeyofT, class HashFunc>class HashDate;//前置声明//迭代器//template<class K,class T,class KeyofT ,class HashFunc>//旧版template<class K, class T, class Ptr, class Ref, class KeyofT, class HashFunc>//新版struct HashIterator{typedef HashDateNode<T> Node;typedef HashIterator<K,T, Ptr, Ref,KeyofT, HashFunc> Self;typedef HashIterator<K, T, T*, T&, KeyofT, HashFunc> iterator;Node* _node;const HashDate<K, T, KeyofT, HashFunc>* _pht; //设置位const ，防止普通类型无法转化为const类型//初始化/*const HashIterator(Node* node, HashDate<K, T, KeyofT, HashFunc>* pht):_node(node), _pht(pht){}*///const初始化HashIterator(Node* node, const HashDate<K, T, KeyofT, HashFunc>*  pht):_node(node), _pht(pht){}// 普通迭代器时，他是拷贝构造// const迭代器时，他是构造函数HashIterator(const iterator& it)//防止set类型的const类型与非const类型的转换（模板不同）:_node(it._node), _pht(it._pht){}Ref operator*(){return _node->_Date;}Ptr operator->(){return &_node->_Date;}bool operator!=(const Self& s){return _node != s._node;}bool operator==(const Self& s){return _node == s._node;}Self& operator++(){if (_node->_next){_node = _node->_next;}else{HashFunc Func;KeyofT value;size_t hashi = Func(value(_node->_Date)) % _pht->_table.size(); //该用仿函数//从下一个哈希桶查找hashi++;while (hashi < _pht->_table.size()){if (_pht->_table[hashi])//不为空{_node = _pht->_table[hashi];return *this;}else//为空{++hashi;}}_node = nullptr;}return *this;}};template<class K, class T, class KeyofT, class HashFunc = DefaultHashFunc<K>>class HashDate{typedef HashDateNode<T> Node;// //友元template<class K, class T, class Ptr, class Ref, class KeyofT, class HashFunc>friend struct HashIterator;//可以使用迭代器public:typedef HashIterator<K, T, T*, T&, KeyofT, HashFunc> iterator;typedef HashIterator<K, T, const T*, const T&, KeyofT, HashFunc> const_iterator;iterator begin(){for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];if (cur){return iterator(cur,this);}}return iterator(nullptr, this);}const_iterator begin() const{for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];if (cur){return iterator(cur, this);}}return iterator(nullptr, this);}iterator end() {return iterator(nullptr, this);}const_iterator end() const{return iterator(nullptr, this);}HashDate(){_table.resize(10, nullptr);}~HashDate(){for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_table[i] = nullptr;}}pair<iterator,bool> Insert(const T& date){KeyofT value;HashFunc Func;iterator it = Find(value(date));//it!=end()if(it != end()){return make_pair(it,false);}if (n == _table.size())//扩容{int newsize = _table.size() * 2;vector<Node*> newHash;newHash.resize(newsize);for (size_t i = 0; i < _table.size(); i++)//旧数据{Node* cur = _table[i];while (cur){Node* next = cur->_next;size_t hashi = Func(value(cur->_Date))% newsize;cur->_next = newHash[hashi];newHash[hashi] = cur;cur = next;}_table[i] = nullptr;}_table.swap(newHash);//交换数据}//插入size_t hashi = Func(value(date))% _table.size();//改变//Node* cur = _table[hashi];//尾插//Node* newNode = new Node(date);//while (cur)//{//	cur = cur->_next;//}//cur = newNode;//头插Node* newNode = new Node(date);//新节点newNode->_next = _table[hashi];//头结点_table[hashi] = newNode;//尾节点n++;return make_pair(iterator(newNode,this),true);}//查找iterator Find(const K& key)//Find为K类型的只用一个数值，T类型的在map中为pair<K,T>类型{HashFunc Func;KeyofT value;size_t hashi = Func(key) % _table.size();//size_t hashi = Func(value(key)) % _table.size();//不能使用这个Node* cur = _table[hashi];while (cur){if (value(cur->_Date) == key)//?{return iterator(cur,this);}cur = cur->_next;}return iterator(nullptr,this);}//删除bool Erase(const K& key){HashFunc Func;KeyofT value;iterator ret=Find(key);if (ret == end())return true;size_t x = Func(value(key))% _table.size();Node* cur = _table[x];Node* prev = nullptr;while (cur){if (value(cur->_Date) == key){if (prev == nullptr){_table[x] = cur->_next;}else{prev->_next = cur->_next;}}prev = cur;cur = cur->_next;delete cur;return true;}return false;}//打印void Print(){for (int i = 10; i < _table.size(); i++){Node* cur = _table[i];while (cur){std::cout << cur->_Date.first<<" " << cur->_Date.second <<" " << std::endl;cur = cur->_next;}}std::cout << std::endl;}private:vector<Node*> _table; //存储数据size_t n=0; //记录容器中的个数};
}

C++--哈希表的实现及unorder_set和unorder_map的封装

相关文章

【DETR】End-to-End Object Detection with Transformers

golang 自动生成文件头

5.14 Get Log Page Command

Postgresql并行框架随手记

Linux 在线解压jar包

Windows10下的GTSAM因子图安装与使用

大数据分析（Python）学习笔记1（python基础快速过）

【考研数学】线性代数第五章 —— 特征值和特征向量（2，特征值与特征向量的性质）