数据结构入门

1, 哈希表的c语言与c++实现

1.1 哈希表介绍

先说数组，我们知道数组的查找时间复杂度是O(1)，但是只支持连续整数索引，不支持非连续索引。并且在索引过大时，数组扩容的成本很高。空间利用率很低。

所以就有了哈希表，用于查找非连续索引的数据结构。也就是说，任何唯一的键(可哈希)都可以作为哈希表的索引。

哈希表是一种数据结构，它通过将键映射到存储桶（bucket）的索引来实现快速的查找、插入和删除操作。哈希表通常使用哈希函数来计算键的哈希值，然后将哈希值映射到存储桶的索引。

在哈希表中，每个存储桶都可以存储多个键值对。当需要查找一个键时，哈希表首先计算该键的*哈希值，然后将哈希值映射到存储桶的索引。如果该存储桶中存在该键，则返回对应的值；否则，返回空值。

哈希表的查找、插入和删除操作的时间复杂度通常为O(1)，即常数时间。这是因为哈希表的查找、插入和删除操作只需要计算哈希值和访问存储桶，而不需要遍历整个哈希表。(准确来说是O(1)的平均时间复杂度，在最坏情况下是O(n)，即当哈希表装载因子过高时，会发生碰撞，导致查找、插入和删除操作的时间复杂度退化为线性时间。也就是我们说的哈希冲突)

哈希表的一个重要特性是哈希冲突。当两个不同的键映射到同一个存储桶时，就会发生哈希冲突。为了解决哈希冲突，可以使用开放地址法和链地址法等技术。

1.2 哈希表的c语言实现

下面各个成员的含义

size->哈希表的大小(存储桶的数量)
data->哈希表的数组(存储桶本身)

get_id()->哈希函数，用于计算键的哈希值这个是哈希表的核心函数
get()->获取键对应的值
set()->设置键值对

对set()函数的分析

首先，我们需要计算键的哈希值。这里使用了一个简单的哈希函数，将键值与哈希表的大小取模，得到存储桶的索引。
然后，我们需要遍历存储桶中的链表，查找是否已经存在该键。如果存在，则更新对应的值；如果不存在，则将新的键值对插入到链表的末尾。

对get()函数的分析

首先，我们需要计算键的哈希值。这里使用了一个简单的哈希函数，将键值与哈希表的大小取模，得到存储桶的索引。
然后，我们需要遍历存储桶中的链表，查找是否存在该键。如果存在，则返回对应的值；如果不存在，则返回空值。

//哈希表
typedef struct MapNode {
    int key;
    int val;
    struct MapNode* next;
} MapNode;
//这个数据结构，这里作为一个子节点，用来存数据，存键值对，一个键代表一个值

typedef struct{
    int size;
    MapNode** data;
}Map; //作为哈希表的使用的入口，或者说这个就是哈希表。
//创建节点的函数
MapNode* buildNode(){
    MapNode* obj=(MapNode*)malloc(sizeof(MapNode));
    obj->key=-1;
    obj->val=0;
    obj->next=NULL;
    return obj;
}
MapNode* BuildNode(int key,int val){
    MapNode* obj=(MapNode*)malloc(sizeof(MapNode));
    obj->key=key;
    obj->val=val;
    obj->next=NULL;
    return obj;
}

//初始化哈希表
Map* init(){
    Map* obj=(Map*)malloc(sizeof(Map));
    obj->size=1000; //这个是哈希表中data数组的大小，取一个合适的大小，不能太大，大概500~10000就可以了
    obj->data=(MapNode**)malloc((obj->size)*sizeof(MapNode*)); //分配一个size大小节点数组，用于存储每个键值对
    for(int i=0;i<obj->size;i++){
        obj->data[i]=buildNode();
    }
    return obj;
}

// 哈希函数，这里用的是FNV算法
unsigned int get_id(int key, int size) {
    const unsigned int FNV_OFFSET_BASIS = 2166136261U;
    const unsigned int FNV_PRIME = 16777619U;
    unsigned int hash = (FNV_OFFSET_BASIS ^ key) * FNV_PRIME;
    return hash % size;
}

// 设置键值对函数
void set(Map* obj,int key,int val){
    unsigned int id=get_id(key,obj->size);
    MapNode* node=obj->data[id];
    while(node->next){
        if(node->key==key){
            node->val=val;
            return;
        }
        node=node->next;
    }
    node->next=BuildNode(key,val);
}

// 获取键值函数
int get(Map* obj,int key){
    unsigned int id=get_id(key,obj->size);
    MapNode* node=obj->data[id];
    while(node->next){
        if(node->key==key){
            return node->val;
        }
        node=node->next;
    }
    return 0;
}

// 释放哈希表函数
void freeMap(Map* obj){
    if (obj == NULL) return;
    
    for (int i = 0; i < obj->size; i++) {
        MapNode* curr = obj->data[i];
        while (curr != NULL) {
            MapNode* temp = curr;
            curr = curr->next;
            free(temp);
        }
    }
    
    free(obj->data);
    free(obj);
}

1.3 c++实现

c++语法较为复杂，需要先学习c++基础语法，原理与c语言实现相同。


// 哈希函数模板
template<typename T>
int get_digit(T key) {
    return key;
}

// 特化模板，针对char和string的哈希函数
template<>
inline int get_digit(char key) {
    return static_cast<int>(key);
}

// 特化模板，针对char*的哈希函数
template<>
inline int get_digit(const char *key) {
    if(key==nullptr) return 0;
    int hash=0;
    // 使用简单的DJB2哈希算法
    while(*key) {
        hash=((hash << 5)+hash)+*key++;
    }
    return hash;
}

// 特化模板，针对std::string的哈希函数
template<>
inline int get_digit(std::string key) {
    return get_digit(key.c_str());
}

// 特化模板，针对std::string_view的哈希函数
template<>
inline int get_digit(std::string_view key) {
    int hash=0;
    for(char c:key) {
        hash=((hash << 5)+hash)+c;
    }
    return hash;
}

// 哈希表模板
template <typename K, typename V>
class Unordered_dict {
    // 哈希函数
    int get_hash(const K& k) const{
        return get_digit(k) % _size;
    }

    // 构建哈希表的辅助函数
    void build() {
        for (int i=0;i<_size;i++) {
            tables[i] = new Node();
        }
    }
    // 清除哈希表的辅助函数
    void clear_nodes() {
        for (int i = 0; i < _size; ++i) {
            Node* current = tables[i];
            while (current) {
                Node* next = current->next;
                delete current;
                current = next;
            }
        }
        delete[] tables;
    }
    // 哈希表节点结构体
    struct Node {
        K key;
        V value;
        Node *next;
        Node() {
            key = K();
            value = V();
            next = nullptr;
        }
        Node(K k, V v) {
            key = k;
            value = v;
            next = nullptr;
        }
    };
    int _size;
    Node **tables; // 哈希表数组，每个元素指向一个链表的头节点
    int total_size;
public:
    // 构造函数
    Unordered_dict() {
        _size = 1<<16;
        tables = new Node*[_size];
        total_size = 0;
        build();
    }
    // 构造函数，允许指定哈希表大小
    explicit Unordered_dict(int size): _size (size) {
        tables = new Node*[size];
        total_size = 0;
        build();
    }

    // 拷贝构造函数
    Unordered_dict(const Unordered_dict<K,V> &other) {
        _size = other._size;
        tables = new Node*[_size];
        total_size = other.total_size;
        build();
        for(int i=0;i<_size;i++) {
            Node *p = other.tables[i] -> next;
            Node *cur = tables[i];
            while(p) {
                cur -> next = new Node(p->key, p->value);
                p = p->next;
                cur = cur->next;
            }
        }
    }

    ~Unordered_dict() {
        clear_nodes();
    }

    // 插入键值对, 如果键已经存在，则更新值允许[] 访问，重载[]运算符，注意返回的是引用，允许修改值
    V &operator[](K k) {
        int hash = get_hash(k);
        Node *p = tables[hash];
        while(p->next) {
            if(p->next->key == k) {
                return p->next->value;
            }
            p = p->next;
        }
        p->next = new Node(k, V());
        total_size ++;
        return p->next->value;
    }
    // 拷贝赋值运算符
    Unordered_dict<K,V>& operator=(const Unordered_dict<K,V> &other) {
        if(this == &other) return *this;
        if (this -> tables) {
            clear_nodes();
        }
        _size = other._size;
        tables = new Node*[_size];
        total_size = other.total_size;
        build();
        for(int i=0;i<_size;i++) {
            Node *p = other.tables[i] -> next;
            Node *cur = tables[i];
            while(p) {
                cur -> next = new Node(p->key, p->value);
                p = p->next;
                cur = cur->next;
            }
        }
        return *this;
    }

    Unordered_dict<K,V>& operator=(Unordered_dict<K,V> &&other) noexcept {
        if (this == &other) return *this;
        if (this -> tables) {
            clear_nodes();
        }
        _size = other._size;
        tables = other.tables;
        total_size = other.total_size;
        other.tables = nullptr;
        return *this;
    }

    // 重载<<运算符，用于输出哈希表的内容
    friend  std::ostream& operator<<(std::ostream &os, Unordered_dict<K,V>& dict) {
        os << "--------------------------------------------------" << std::endl;
        for(int i=0;i<dict._size;i++) {
            Node *p = dict.tables[i] -> next;
            while(p) {
                os << p->key << " : " << p->value << std::endl;
                p = p->next;
            }
        }
        os << "--------------------------------------------------" << std::endl;
        return os;
    }

    // 查找键对应的值，如果键不存在，则抛出异常
    V get(K k) {
        int hash = get_hash(k);
        Node *p = tables[hash]->next;
        while(p) {
            if(p->key == k) {
                return p->value;
            }
            p = p->next;
        }
        throw std::runtime_error("key not found");
    }

    // 插入键值对，如果键已经存在，则更新值
    void insert(K k, V v) {
        int hash = get_hash(k);
        Node *p = tables[hash];
        while(p->next) {
            if(p->next->key == k) {
                p->next->value = v;
                return;
            }
            p = p->next;
        }
        p->next = new Node(k, v);
        total_size ++;
    }

    // 获得哈希表的当前大小
    [[nodiscard]] int size() const {
        return total_size;
    }

    // 查找键在哈希表中出现的次数
    int count(K k) const{
        int hash = get_hash(k);
        Node *p = tables[hash] -> next;
        while(p) {
            if(p->key == k) {
                return 1;
            }
            p = p->next;
        }
        return 0;
    }

    // 删除键值对，如果键不存在，则返回0
    int erase(K k) {
        int hash = get_hash(k);
        Node *p = tables[hash];
        while(p->next) {
            if(p->next->key == k) {
                Node *q = p->next;
                p->next = p->next->next;
                delete q;
                total_size --;
                return 1;
            }
            p = p->next;
        }
        return 0;
    }
    // 判断哈希表是否为空
    bool empty() const {
        return total_size==0;
    }
};

2，优先队列

原理

优先队列是一种特殊的队列，它的特点是队列中的元素按照一定的优先级进行排序，优先级最高的元素最先出队。优先队列通常使用堆来实现，堆是一种完全二叉树，它的每个节点的值都大于或等于它的左右子节点的值（最大堆），或者小于或等于它的左右子节点的值（最小堆）。

这样，堆顶的元素就是优先级最高的元素。

实现

template<typename T>
class PriorityQueue {
    vector<T> data;
    void up(int i) {
        while(data[i] > data[(i-1)/2]) {
            swap(data[i], data[(i-1)/2]);
            i=(i-1)/2;
        }
    }

    void down(int i) {
        int j = 2*i+1;
        while(j<data.size()) {
            int maxidx = j+1 < data.size() && data[j+1] > data[j] ? j+1 : j;
            if(data[i] >= data[maxidx]) {
                break;
            }
            swap(data[i], data[maxidx]);
            i = maxidx;
            j = 2*i+1;
        }
    }
public:

    PriorityQueue() {}

    PriorityQueue(vector<T> v) {
        for(auto &x: v) {
            data.push_back(x);
            up(data.size()-1);
        }
    }

    void push(T x) {
        data.push_back(x);
        up(data.size()-1);
    }

    void pop() {
        if(data.empty()) return;
        swap(data[0], data[data.size()-1]);
        data.pop_back();
        down(0);
    }

    T top() {
        if(data.empty()) return T();
        return data[0];
    }

    bool empty() {
        return data.empty();
    }
}

注：c语言不好写

3，栈

原理

栈是一种后进先出（LIFO）的数据结构，它只允许在表的一端进行插入和删除操作。栈的实现通常使用数组或链表。

实现

template<typename T>
class Stack {
    vector<T> data;
public:
    Stack() {}

    Stack(const Stack& other) = delete;

    void push(T x) {
        data.push_back(x);
    }

    void pop() {
        if(data.empty()) return;
        data.pop_back();
    }

    T top() {
        if(data.empty()) return T();
        return data[data.size()-1];
    }

    bool empty() {
        return data.empty();
    }
}

4，队列

原理

队列是一种先进先出（FIFO）的数据结构，它只允许在表的一端进行插入操作，在另一端进行删除操作。队列的实现通常使用数组或链表。

实现

template<typename T>
class Queue {
    int* data;
    int f;
    int b;
public:
    Queue() {
        data = new int[10];
        f=b=0;
    }

    Queue(int n) {
        data = new int[n];
        f=b=0;
    }

    Queue(const Queue& other) = delete;

    ~Queue() {
        delete[] data;
    }

    void push(T x) {
        data.push_back(x);
        b++;
    }

    void pop() {
        if(f == b) return;
        f++;
    }

    T front() {
        if(f == b) return T();
        return data[f];
    }

    bool empty() {
        return f == b;
    }
}

5，图

原理

图是一种由节点和边组成的抽象数据结构，它表示了一组对象及其之间的关系。图可以是有向的，也可以是无向的。图可以是有权重的，也可以是无权重的。

实现

template<typename T>
class Graph {
    vector<vector<T>> data;
public:
    Graph() {}

    Graph(int n) {
        data.resize(n);
    }

    Graph(const Graph& other) = delete;

    void add_edge(int u, int v) {
        data[u].push_back(v);
    }

    void add_edge(int u, int v, int w) {
        data[u].push_back(make_pair(v, w));
    }

    vector<T> get_neighbors(int u) {
        return data[u];
    }
}

6，树

原理

树是一种由节点和边组成的抽象数据结构，它表示了一组对象及其之间的关系。树是一种特殊的图，它的特点是每个节点最多只有一个父节点，没有父节点的节点称为根节点。

实现

template<typename T>
class Tree {
    struct Node {
        T value;
        vector<Node*> children;
        Node(T v) : value(v) {}
    };
    Node* root;
public:
    Tree() {}

    Tree(T v) {
        root = new Node(v);
    }

    Tree(const Tree& other) = delete;


    void add_child(Node* parent, Node* child) {
        parent->children.push_back(child);
    }

    Node* get_root() {
        return root;
    }
}

7，线段树

跳转：线段树

8，双端队列

原理

双端队列是一种允许在两端进行插入和删除操作的数据结构。它既可以像栈一样后进先出，也可以像队列一样先进先出。

实现

template<typename T>
class Deque {
    vector<T> data;
public:
    Deque() {}

    void push_front(T x) {
        data.insert(data.begin(), x);
    }

    void push_back(T x) {
        data.push_back(x);
    }

    void pop_front() {
        if(data.empty()) return;
        data.erase(data.begin());
    }

    void pop_back() {
        if(data.empty()) return;
        data.pop_back();
    }

    T front() {
        if(data.empty()) return T();
        return data[0];
    }

    T back() {
        if(data.empty()) return T();
        return data[data.size()-1];
    }

    bool empty() {
        return data.empty();
    }
}

9，链表

原理

链表是一种由节点组成的抽象数据结构，每个节点包含一个或多个元素以及指向下一个节点的指针。与数组相比，链表的优点是可以在常数时间内插入和删除元素，但访问特定元素的效率较低。

实现

template<typename T>
class LinkedList {
    struct Node {
        T value;
        Node* next;
        Node(T v) : value(v), next(nullptr) {}
    };
    Node* head;
public:
    LinkedList() {
        head = nullptr;
    }


    void push_back(T x) {
        if(head == nullptr) {
            head = new Node(x);
            return;
        }
        Node* p = head;
        while(p->next != nullptr) {
            p = p->next;
        }
        p->next = new Node(x);
    }

    void push_front(T x) {
        Node* p = new Node(x);
        p->next = head;
        head = p;
    }

    void pop_back() {
        if(head == nullptr) return;
        if(head->next == nullptr) {
            delete head;
            head = nullptr;
            return;
        }
        Node* p = head;
        while(p->next->next != nullptr) {
            p = p->next;
        }
        delete p->next;
        p->next = nullptr;
    }

    void pop_front() {
        if(head == nullptr) return;
        Node* p = head;
        head = head->next;
        delete p;
    }
    T front() {
        if(head == nullptr) return T();
        return head->value;
    }

    T back() {
        if(head == nullptr) return T();
        Node* p = head;
        while(p->next != nullptr) {
            p = p->next;
        }
        return p->value;
    }

    bool empty() {
        return head == nullptr;
    }
}

10，并查集

原理

并查集是一种用于处理不相交集合的数据结构，它支持两种操作：查找和合并。查找操作用于确定一个元素属于哪个集合，合并操作用于将两个集合合并为一个集合。

方法：路径压缩，合并。

并查思路：是否有共同的祖先。

路径压缩：在查找过程中，将路径上的所有节点都指向根节点，从而减少查找的时间复杂度。

合并：将两个集合合并为一个集合，通常是将一个集合的根节点指向另一个集合的根节点。

实现

class UnionSet{
unordered_map<int,int> father;
public:
    UnionSet(int n) {
        for (int i=0; i<n; i++) {
            father[i] = i;
        }
    }
    int find(int x) {
        if (father[x] == x) {
            return x;
        }
        father[x] = find(father[x]);
        return father[x];
    }

    void join(int x, int y) {
        int fa_x = find(x);
        int fa_y = find(y);
        if (fa_x != fa_y) {
            father[fa_x] = fa_y;
        }
    }

    bool is_Connect(int x, int y) {
        int fa_x = find(x);
        int fa_y = find(y);
        return fa_x == fa_y;
    }
};