C/C++ 查找算法

一.二分算法

调整:

如果arr[mid] < x, min = mid + 1;

如果arr[mid] > x, max = mid - 1;

如果arr[mid] == x, 找到结果

终止条件:min >= max

二分查找--泛型情况

找第一个1:

找最后一个1:

注意:mid = (min + max + 1) /2

代码演示:

#define _CRT_SECURE_NO_WARNINGS
#include<iostream>
#include<algorithm>using namespace std;void output(int* arr, int n, int ind = -1) {int len = 0;for (int i = 0; i < n; i++) {len += printf("%4d", i);}printf("\n");for (int i = 0; i < len; i++) {printf("-");}printf("\n");for (int i = 0; i < n; i++) {if (i == ind) printf("\033[1;32m");printf("%4d", arr[i]);if (i == ind) printf("\033[0m");}printf("\n");return;
}int binary_search(int* arr, int n, int x) {int head = 0, tail = n - 1, mid;while (head <= tail) {mid = (head + tail) / 2;if (arr[mid] == x) return mid;if (arr[mid] < x) head = mid + 1;else tail = mid - 1;}return -1;
}void test_binary_search(int n) {int* arr = (int*)malloc(sizeof(int) * n);arr[0] = rand() % 10;for (int i = 1; i < n; i++) arr[i] = arr[i - 1] + rand() % 10;output(arr, n);int x;while (~scanf("%d", &x)) {if (x == -1) break;int ind = binary_search(arr, n, x);output(arr, n, ind);}
}#define EXP 1e-4double f(double x) {if (x >= 0) x -= min(x, 3000.0) * 0.03;if (x >= 3000) x -= (min(x, 12000.0) - 3000) * 0.1;if (x >= 12000) x -= (min(x, 25000.0) - 12000) * 0.2;if (x >= 25000) x -= (min(x, 35000.0) - 25000) * 0.25;if (x >= 35000) x -= (min(x, 55000.0) - 35000) * 0.3;if (x >= 55000) x -= (min(x, 80000.0) - 55000) * 0.35;if (x >= 80000) x -= (x - 80000) * 0.45;return x;
}double binary_algorithm(double y) {double head = 0, tail = 1000000, mid;while (tail - head >= EXP) {mid = (head + tail) / 2;if (f(mid) < y) head = mid;else tail = mid;}return head;
}void test_binary_algorithm() {double y;while (~scanf("%lf", &y)) {if (y < 0) break;double x = binary_algorithm(y);printf("f(%.2lf) = %.2lf\n", x, y);}
}int main() {
#define MAX_N 10test_binary_search(MAX_N);test_binary_algorithm();return 0;
}

二.跳跃表

对于原有链表中,对每个节点都设置了一个层高;

头节点的值为一个极小值,尾节点的值为一个极大值.

跳跃表本身,会维护节点中值的有序性

查找操作:

从左上角的节点开始查找:

若下一个节点的值比要查找的值大,则向下走一层;

若下一个节点的值比要查找的值小,则走向下一个节点;

插入操作:

跳跃表的每一个节点会随机一个层高

#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<string.h>
#include<inttypes.h>typedef struct Node {int key, level;struct Node* next, * down, * up;
}Node;#define min(a,b) ((a) < (b) ? (a) : (b))
#define MAX_LEVEL 32
typedef struct Skiplist {Node* head, * tail;int max_level;
}Skiplist;Node* getNewNode(int key, int n) {Node* nodes = (Node*)malloc(sizeof(Node) * n);for (int i = 0; i < n; i++) {nodes[i].key = key;nodes[i].level = i;nodes[i].next = NULL;nodes[i].down = ((i != 0)? nodes + i - 1 : NULL);nodes[i].up = ((i + 1 < n)? nodes + i + 1 : NULL);}return nodes + n - 1;
}Skiplist* getNewSkiplist(int n) {Skiplist* s = (Skiplist*)malloc(sizeof(Skiplist));s->head = getNewNode(INT32_MIN, n);s->tail = getNewNode(INT32_MAX, n);s->max_level = n;Node* p = s->head, *q = s->tail;while (p) {p->next = q;p = p->down;q = q->down;}while (s->head->level != 0) s->head = s->head->down;return s;
}Node* find(Skiplist* s, int x) {Node* p = s->head;while (p && p->key != x) {if (p->next->key <= x) p = p->next;else p = p->down;}return p;
}#define MAX_RAND_N 10000double randDouble() {int n = (rand() % MAX_RAND_N);return (n * 1.0 / (double)MAX_RAND_N);
}int randLevel(Skiplist* s) {int level = 1;double p = 1.0 / 2.0;double temp = randDouble();while (temp < p) {level += 1;temp = randDouble();}return min(s->max_level, level);
}void insert(Skiplist* s, int x) {int level = randLevel(s);while (s->head->level + 1 < level) {s->head = s->head->up;}Node* node = getNewNode(x, level);Node* p = s->head;printf("insert begin\n");fflush(stdout);while(p->level != node->level) p = p->down;while (p) {while (p->next->key < node->key) p = p->next;node->next = p->next;p->next = node;p = p->down;node = node->down;}return;}void clearNode(Node* p) {if (p == NULL) return;free(p);return;
}void clearSkiplist(Skiplist* s) {Node* p = s->head, *q;while (p->level != 0) p = p->down;while (p) {q = p->next;clearNode(p);p = q;}free(s);
}void output(Skiplist* s) {Node* p = s->head;int len = 0;for (int i = 0; i < s->head->level; i++) {len += printf("%4d", i);}printf("\n");for (int i = 0; i < len; i++) printf("-");printf("\n");while (p->level > 0) p = p->down;while (p) {bool flag = (p->key != INT32_MIN && p->key != INT32_MAX);for (Node* q = p; q && flag; q = q->up) {printf("%4d", q->key);}if(flag) printf("\n");p = p->next;}
}int main() {srand(time(0));int x;Skiplist* s = getNewSkiplist(MAX_LEVEL);while (~scanf("%d", &x)) {if (x == -1)break;insert(s, x);}output(s);while (~scanf("%d", &x)) {Node* p = find(s, x);if (p) {printf("key: %d, level: %d", p->key, p->level);}else {printf("NULL\n");}}clearSkiplist(s);return 0;
}

三.哈希表与布隆过滤器

哈希表:

哈希冲突:不同元素经过哈希函数被映射到了同一个位置.

哈希冲突的解决办法:

1.开放定址法

如果发生冲突,将新的元素按照一定规则,去重新计算得出一个新的下标;

2.再哈希法

如果发生冲突,用其他的哈希函数进行计算得出新的下标;

3.建立公共溢出区法

用另一种数据结构来存储多余的元素;

4.链式地址法(推荐)

每个下标中维护一个链表;

布隆过滤器:

传统哈希表,储存空间和元素数量强相关;

布隆过滤器,储存空间和元素数量弱相关;

若经过多个哈希函数运算,出现的结果相同,则布隆过滤器就认为此元素出现过;

注意,并不是一定出现过,而是大概率出现过;

如何减少误判:增加布隆过滤器的底层存储空间,降低重复的概率;

#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<time.h>
#include<stdlib.h>
#include<string.h>#include<iostream>using namespace std;typedef struct Node {char* s;struct Node* next;
}Node;typedef struct HashTable {Node* data;int cnt, size;
}HashTable;Node* getNewNode(const char* s) {Node* p = (Node*)malloc(sizeof(Node));p->s = _strdup(s);p->next = NULL;return p;
}HashTable* getNewHashTable(int n) {HashTable* h = (HashTable*)malloc(sizeof(HashTable));h->data = (Node*)malloc(sizeof(Node) * n);for (int i = 0; i < n; i++) {h->data[i].next = NULL;}h->size = n;h->cnt = 0;return h;
}int hash_func(const char* s) {int seed = 131, h = 0;for (int i = 0; s[i]; i++) {h = h * seed + s[i];}return h & 0x7fffffff;
}bool find(HashTable* h, const char* s) {int hcode = hash_func(s), ind = hcode % h->size;Node* p = h->data[ind].next;while (p) {if (strcmp(p->s, s) == 0)return true;p = p->next;}return false;
}void swapHashTable(HashTable* h1, HashTable* h2) {swap(h1->data, h2->data);swap(h1->cnt, h2->cnt);swap(h1->size, h2->size);return;
}void expand(HashTable* h);bool insert(HashTable* h, const char* s) {if (h->cnt >= h->size * 2) {expand(h);}int hcode = hash_func(s), ind = hcode% h->size;Node* p = getNewNode(s);p->next = h->data[ind].next;h->data[ind].next = p;h->cnt += 1;return true;
}void clearNode(Node* p) {if (p == NULL) return;if (p->s) free(p->s);free(p);return;
}void clearHashTable(HashTable* h) {if (h == NULL) return;for (int i = 0; i < h->size; i++) {Node* p = h->data[i].next, * q;while (p) {q = p->next;clearNode(p);p = q;}}free(h->data);free(h);return;
}void  expand(HashTable* h) {HashTable* new_h = getNewHashTable(h->size * 2);for (int i = 0; i < h->size; i++) {Node* p = h->data[i].next;while (p) {insert(new_h, p->s);p = p->next;}}swapHashTable(h, new_h);clearHashTable(new_h);
}
void output(HashTable* h) {printf("\n\nHash Table(%d/%d): \n", h->cnt, h->size);for (int i = 0; i < h->size; i++) {printf("%d:", i);Node* p = h->data[i].next;while (p) {printf("%s -> ", p->s);p = p->next;}printf("\n");}return;
}int main() {srand(time(0));char s[100];
#define MAX_N 2HashTable* h = getNewHashTable(MAX_N);while (~scanf("%s", s)) {if (strcmp(s, "end") == 0) break;insert(h, s);}output(h);while (~scanf("%s", s)) {printf("find(%s) = %d\n", s, find(h, s));}return 0;
}

代码练习

1. 两数之和 - 力扣（LeetCode）

哈希表做法

class Solution {
public:vector<int> twoSum(vector<int>& nums, int target) {unordered_map<int, int> h;vector<int> ret(2);for(int i = 0; i < nums.size();i++) {if(h.find(target -nums[i]) != h.end()) {ret[0] = h[target - nums[i]];ret[1] = i;break;}h[nums[i]] = i;}return ret;}
};

二分查找做法

class Solution {
public:int binary_search(vector<int>&nums, vector<int>&ind, int b, int x) {int head = b, tail = nums.size() - 1, mid;while(head <= tail) {mid = (head + tail) /2;if(nums[ind[mid]] == x) return mid;if(nums[ind[mid]] < x) head = mid + 1;else tail = mid - 1;}return -1;}vector<int> twoSum(vector<int>& nums, int target) {int n = nums.size();vector<int> ind(n, 0);for(int i = 0; i < n; i++) ind[i] = i;sort(ind.begin(), ind.end(), [&](int i , int j) {return nums[i] < nums[j];});vector<int> ret(2);for(int i = 0; i < n; i++) {int j = binary_search(nums, ind, i + 1, target - nums[ind[i]]);if(j == -1) continue;ret[0] = ind[j];ret[1] = ind[i];}//if(ret[0] >ret[1]) swap(ret[0], ret[1]);return ret;}
};

35. 搜索插入位置 - 力扣（LeetCode）

class Solution {
public:int searchInsert(vector<int>& nums, int target) {int head = 0, tail = nums.size(), mid;while(head < tail) {mid = (head + tail) /2;if(nums[mid] < target) head = mid + 1;else tail = mid;}return head;}
};

217. 存在重复元素 - 力扣（LeetCode）

class Solution {
public:bool containsDuplicate(vector<int>& nums) {unordered_set<int> h;for(auto x: nums) {if(h.find(x) != h.end()) return true;h.insert(x);}return false;}
};

349. 两个数组的交集 - 力扣（LeetCode）

class Solution {
public:vector<int> intersection(vector<int>& nums1, vector<int>& nums2) {unordered_set<int> h;vector<int> ret;for(auto x: nums1) h.insert(x);for(auto x: nums2) {if(h.find(x) == h.end()) continue;ret.push_back(x);h.erase(h.find(x));}return ret;}
};

3. 无重复字符的最长子串 - 力扣（LeetCode）

class Solution {
public:bool check(string &s, int l) {int cnt[256] = {0}, k = 0;for(int i = 0; s[i]; i++) {cnt[s[i]] += 1;if(cnt[s[i]] == 1) k += 1;if(i >= l) {cnt[s[i - l]] -= 1;if(cnt[s[i - l]] == 0) k -= 1;}if (l == k) return true;}return false;;}int lengthOfLongestSubstring(string s) {int head = 1, tail = s.size(), mid;//111100000while(head < tail) {mid = (head +tail + 1) /2;if(check(s,mid)) head = mid;else tail = mid - 1;} return head;}
};

4. 寻找两个正序数组的中位数 - 力扣（LeetCode）

class Solution {
public:int findK(vector<int> &n1, int ind1, vector<int> &n2, int ind2, int k) {int n = n1.size(), m = n2.size();if(k == 1) {int a = (n1.size() == ind1 ? INT32_MAX: n1[ind1]);int b = (n2.size() == ind2 ? INT32_MAX: n2[ind2]);return min(a,b);}if(n == ind1) return n2[k - 1];if(m == ind2) return n1[k - 1];int cnt1 = min(k / 2 , n - ind1);int cnt2 = min(k - cnt1, m - ind2);cnt1 = k - cnt2;if(n1[ind1 + cnt1 - 1] <= n2[ind2 + cnt2 - 1]) {return findK(n1,ind1 + cnt1, n2, ind2, k - cnt1);}return findK(n1, ind1, n2 ,ind2 + cnt2, k - cnt2);}double findMedianSortedArrays(vector<int>& nums1, vector<int>& nums2) {int n = nums1.size(), m = nums2.size();if((n + m) % 2 == 1) return findK(nums1, 0, nums2, 0, (n + m) / 2 + 1);double a = findK(nums1, 0, nums2, 0, (n + m) / 2);double b = findK(nums1, 0, nums2, 0, (n + m) / 2 + 1);return (a + b) /2.0;}
};