diff --git "a/content/post/Redis\346\272\220\347\240\201\345\211\226\346\236\220-\344\270\200.md" "b/content/post/Redis\346\272\220\347\240\201\345\211\226\346\236\220-\344\270\200.md" index 2fb8298..5461094 100644 --- "a/content/post/Redis\346\272\220\347\240\201\345\211\226\346\236\220-\344\270\200.md" +++ "b/content/post/Redis\346\272\220\347\240\201\345\211\226\346\236\220-\344\270\200.md" @@ -12,6 +12,13 @@ toc: true --- # 基础数据结构部分 ## 动态字符串 SDS + +实现在 sds.h/sds.c。 + +### 设计原则 + +为什么不使用c语言原生的字符串操作库? c字符串用'\0'作为终止符,不能满足二进制安全,而且求字符串长度,拼接等操作都要遍历到'\0'来实现,需要自己控制内存使用,操作复杂度高。 + ### 前置知识 由于我对C语言没有深入了解,有很多知识点会在前面补充。 @@ -59,6 +66,9 @@ struct __attribute__ ((__packed__)) sdshdr8 { }; ``` +记录了已经使用的空间和分配的空间,比C字符串操作效率更高。和 C 语言中的字符串操作相比,SDS 通过记录字符数组的使用长度和分配空间大小,避免了对字符串的遍历操作,降低了操作开销,进一步就可以帮助诸多字符串操作更加高效地完成,比如创建、追加、复制、比较等。 + + #### 二进制安全 @@ -476,7 +486,7 @@ int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node) { 具体的实现在ziplist.h和ziplist.c -压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是O(1)。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。 +压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是 **O(1)** 。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。 ```c // ziplist 结构 @@ -573,8 +583,7 @@ typedef struct zlentry { ``` 对于压缩列表的任意元素,获取前一个元素的长度、判断存储的数据类型、获取数据内容都需要经过复杂的解码运算。解码后的结果应该被缓存起来,为此定义了结构体zlentry,用于表示解码后的压缩列表元素。 - - +解码操作,主要用宏实现: ```c static inline void zipEntry(unsigned char *p, zlentry *e) { ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen); @@ -585,6 +594,27 @@ static inline void zipEntry(unsigned char *p, zlentry *e) { e->p = p; } ``` +这里主要就是对字节的读取,可以去看源代码。 + +### 操作 +#### 创建 +```c +/* Create a new empty ziplist. */ +// 先申请初始的空间(4+4+2+1),再对zlbytes,zltail,zllen,zlend逐个初始化 +unsigned char *ziplistNew(void) { + unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE; + unsigned char *zl = zmalloc(bytes); + ZIPLIST_BYTES(zl) = intrev32ifbe(bytes); + ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE); + ZIPLIST_LENGTH(zl) = 0; + zl[bytes-1] = ZIP_END; + return zl; +} +``` +#### 插入元素 + + + ## 字典 ### 结构 @@ -613,14 +643,447 @@ typedef struct dictEntry { ```c struct dict { dictType *type; // 对应特定类型操作函数 - - dictEntry **ht_table[2]; - unsigned long ht_used[2]; + + dictEntry **ht_table[2]; // 哈希表。有两个,一个正常使用,另外一个在rehash时使用 + unsigned long ht_used[2]; // 记录每个哈希表被使用的数目。 long rehashidx; /* rehashing not in progress if rehashidx == -1 */ /* Keep small vars at end for optimal (minimal) struct padding */ int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */ + // size 的 系数,size 是2 的N次幂 signed char ht_size_exp[2]; /* exponent of size. (size = 1<ht_table[htidx] = NULL; + d->ht_size_exp[htidx] = -1; + d->ht_used[htidx] = 0; +} + +/* Create a new hash table */ +dict *dictCreate(dictType *type) +{ + dict *d = zmalloc(sizeof(*d)); + + _dictInit(d,type); + return d; +} + +/* Initialize the hash table */ +int _dictInit(dict *d, dictType *type) +{ + _dictReset(d, 0); + _dictReset(d, 1); + d->type = type; + d->rehashidx = -1; + d->pauserehash = 0; + return DICT_OK; // 使用一些宏来反馈结果 +} +``` + +### 增加与扩容 + +这里先提前讲一下Rehash的概念,便于理解增加扩容中的一些操作: + +扩容后,字典容量及掩码值会发生改变,同一个键与掩码经位运算后得到的索引值就会发生改变,从而导致根据键查找不到值的情况。解决这个问题的方法是,**新扩容的内存放到一个全新的Hash表中(ht[1]),并给字典打上在进行rehash操作中的标识(即rehashidx! =-1)**。此后,新添加的键值对都往新的Hash表中存储;而修改、删除、查找操作需要在ht[0]、ht[1]中进行检查,然后再决定去对哪个Hash表操作。除此之外,还需要把老Hash表(ht[0])中的数据重新计算索引值后全部迁移插入到新的Hash表(ht[1])中,此迁移过程称作rehash。 + +先看增加单个entry的操作: + +```c +/* Add an element to the target hash table */ +int dictAdd(dict *d, void *key, void *val) +{ + dictEntry *entry = dictAddRaw(d,key,NULL); + if (!entry) return DICT_ERR; + dictSetVal(d, entry, val); + return DICT_OK; +} + +dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) +{ + long index; + dictEntry *entry; + int htidx; + + // 如果正在rehash,在add时进行一步rehash,这里是将大范围的rehash分散来减小资源集中消耗 + if (dictIsRehashing(d)) _dictRehashStep(d); + + /* Get the index of the new element, or -1 if + * the element already exists. */ + if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1) + return NULL; + + /* Allocate the memory and store the new entry. + * Insert the element in top, with the assumption that in a database + * system it is more likely that recently added entries are accessed + * more frequently. */ + htidx = dictIsRehashing(d) ? 1 : 0; + size_t metasize = dictMetadataSize(d); + entry = zmalloc(sizeof(*entry) + metasize); + if (metasize > 0) { + memset(dictMetadata(entry), 0, metasize); + } + // 插入在顶部:根据时空局限性 + entry->next = d->ht_table[htidx][index]; + d->ht_table[htidx][index] = entry; + d->ht_used[htidx]++; + + /* Set the hash entry fields. */ + dictSetKey(d, entry, key); + return entry; +} +``` +可以看出,add调用了一个底层的addraw函数。addraw首先使用dictkeyindex来查找一个合适的插入位置,如果这个key已经存在就退出add操作。然后确定是否在rehash,上面我们讲过如果在rehash那么 **新添加的键值对都往新的Hash表中存储**。后面就申请空间在相应位置顶部插入,这是数据库时空局限性的体现。 + +这里看一下dictSetKey和dictSetVal: + +```c +#define dictSetKey(d, entry, _key_) do { \ + if ((d)->type->keyDup) \ + (entry)->key = (d)->type->keyDup((d), _key_); \ + else \ + (entry)->key = (_key_); \ +} while(0) + +#define dictSetVal(d, entry, _val_) do { \ + if ((d)->type->valDup) \ + (entry)->v.val = (d)->type->valDup((d), _val_); \ + else \ + (entry)->v.val = (_val_); \ +} while(0) +``` +可以看出是用宏的形式调用dict的dicttype函数,也就是说这些操作是可以调整的。 + +扩容操作: + +```c +// 将d扩容到2^size的大小 +int _dictExpand(dict *d, unsigned long size, int* malloc_failed) +{ + if (malloc_failed) *malloc_failed = 0; + + /* the size is invalid if it is smaller than the number of + * elements already inside the hash table */ + if (dictIsRehashing(d) || d->ht_used[0] > size) + return DICT_ERR; + + /* the new hash table */ + dictEntry **new_ht_table; + unsigned long new_ht_used; + signed char new_ht_size_exp = _dictNextExp(size); + + /* Detect overflows */ + size_t newsize = 1ul<ht_size_exp[0]) return DICT_ERR; + + /* Allocate the new hash table and initialize all pointers to NULL */ + if (malloc_failed) { + new_ht_table = ztrycalloc(newsize*sizeof(dictEntry*)); + *malloc_failed = new_ht_table == NULL; + if (*malloc_failed) + return DICT_ERR; + } else + new_ht_table = zcalloc(newsize*sizeof(dictEntry*)); + + // 新的hash表被使用的数量 + new_ht_used = 0; + + /* Is this the first initialization? If so it's not really a rehashing + * we just set the first hash table so that it can accept keys. */ + if (d->ht_table[0] == NULL) { + d->ht_size_exp[0] = new_ht_size_exp; + d->ht_used[0] = new_ht_used; + d->ht_table[0] = new_ht_table; + return DICT_OK; + } + + /* Prepare a second hash table for incremental rehashing */ + d->ht_size_exp[1] = new_ht_size_exp; + d->ht_used[1] = new_ht_used; + d->ht_table[1] = new_ht_table; + d->rehashidx = 0; + return DICT_OK; +} +``` +首先判断是否在rehash,在rehash中不能扩容。然后创建一个新的hash table,这个newsize是2的n次幂。expand操作在刚开始初始化时会使用,也会在这里做一个判断。更常用的是在扩容后进行rehash操作。 + +获得size的函数: + +```c +// 确保hash cap 为2的N次幂 +static signed char _dictNextExp(unsigned long size) +{ + unsigned char e = DICT_HT_INITIAL_EXP; + + if (size >= LONG_MAX) return (8*sizeof(long)-1); + // 1 << e == 1 * 2^e + // 找到一个大于size 的2^e + while(1) { + if (((unsigned long)1<= size) + return e; + e++; + } +} + +``` +### 渐进式Rehash + +直接看函数: + +```c +int dictRehash(dict *d, int n) { + int empty_visits = n*10; /* Max number of empty buckets to visit. */ + if (!dictIsRehashing(d)) return 0; + + while(n-- && d->ht_used[0] != 0) { + dictEntry *de, *nextde; + + /* Note that rehashidx can't overflow as we are sure there are more + * elements because ht[0].used != 0 */ + assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx); + while(d->ht_table[0][d->rehashidx] == NULL) { + d->rehashidx++; + if (--empty_visits == 0) return 1; + } + de = d->ht_table[0][d->rehashidx]; + /* Move all the keys in this bucket from the old to the new hash HT */ + while(de) { + uint64_t h; + + nextde = de->next; + /* Get the index in the new hash table */ + h = dictHashKey(d, de->key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]); + de->next = d->ht_table[1][h]; + d->ht_table[1][h] = de; + d->ht_used[0]--; + d->ht_used[1]++; + de = nextde; + } + d->ht_table[0][d->rehashidx] = NULL; + d->rehashidx++; + } + + /* Check if we already rehashed the whole table... */ + if (d->ht_used[0] == 0) { + zfree(d->ht_table[0]); + /* Copy the new ht onto the old one */ + d->ht_table[0] = d->ht_table[1]; + d->ht_used[0] = d->ht_used[1]; + d->ht_size_exp[0] = d->ht_size_exp[1]; + _dictReset(d, 1); + d->rehashidx = -1; + return 0; + } + + /* More to rehash... */ + return 1; +} +``` +rehash除了扩容时会触发,缩容时也会触发。Redis整个rehash的实现,主要分为如下几步完成。 + +1. 给Hash表ht[1]申请足够的空间;扩容时空间大小为当前容量*2,即d->ht[0]. used*2;当使用量不到总空间10%时,则进行缩容。缩容时空间大小则为能恰好包含d->ht[0].used个节点的2^N次方幂整数,并把字典中字段rehashidx标识为0 +2. 进行rehash操作调用的是dictRehash函数,重新计算ht[0]中每个键的Hash值与索引值(重新计算就叫rehash),依次添加到新的Hash表ht[1],并把老Hash表中该键值对删除。把字典中字段rehashidx字段修改为Hash表ht[0]中正在进行rehash操作节点的索引值. +3. rehash操作后,清空ht[0],然后对调一下ht[1]与ht[0]的值,并把字典中rehashidx字段标识为-1。 + +我们知道,Redis可以提供高性能的线上服务,而且是单进程模式,当数据库中键值对数量达到了百万、千万、亿级别时,整个rehash过程将非常缓慢,如果不优化rehash过程,可能会造成很严重的服务不可用现象。Redis优化的思想很巧妙,利用分而治之的思想了进行rehash操作,大致的步骤如下。 + +执行插入、删除、查找、修改等操作前,都先判断当前字典rehash操作是否在进行中,进行中则调用dictRehashStep函数进行rehash操作(每次只对1个节点进行rehash操作,共执行1次)。除这些操作之外,当服务空闲时,如果当前字典也需要进行rehsh操作,则会调用incrementallyRehash函数进行批量rehash操作(每次对100个节点进行rehash操作,共执行1毫秒)。在经历N次rehash操作后,整个ht[0]的数据都会迁移到ht[1]中,这样做的好处就把是本应集中处理的时间分散到了上百万、千万、亿次操作中,所以其耗时可忽略不计。 + +```c +/* This function performs just a step of rehashing, and only if hashing has + * not been paused for our hash table. When we have iterators in the + * middle of a rehashing we can't mess with the two hash tables otherwise + * some elements can be missed or duplicated. + * + * This function is called by common lookup or update operations in the + * dictionary so that the hash table automatically migrates from H1 to H2 + * while it is actively used. */ +static void _dictRehashStep(dict *d) { + if (d->pauserehash == 0) dictRehash(d,1); +} +``` +### 删除 + +```c +static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { + uint64_t h, idx; + dictEntry *he, *prevHe; + int table; + + /* dict is empty */ + if (dictSize(d) == 0) return NULL; + + if (dictIsRehashing(d)) _dictRehashStep(d); + h = dictHashKey(d, key); + + for (table = 0; table <= 1; table++) { + idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]); + he = d->ht_table[table][idx]; + prevHe = NULL; + // 查找 + while(he) { + if (key==he->key || dictCompareKeys(d, key, he->key)) { + /* Unlink the element from the list */ + if (prevHe) + prevHe->next = he->next; + else // 在bucket顶部,直接略过 + d->ht_table[table][idx] = he->next; + if (!nofree) { + dictFreeUnlinkedEntry(d, he); + } + d->ht_used[table]--; + return he; + } + prevHe = he; + he = he->next; + } + if (!dictIsRehashing(d)) break; + } + return NULL; /* not found */ +} +``` +### 遍历 + +遍历Redis整个数据库主要有两种方式:全遍历(例如keys命令)、间断遍历(hscan命令): +- 全遍历: 一次命令执行就遍历完整个数据库。 +- 间断遍历: 每次命令执行只取部分数据,分多次遍历。 + +迭代器——可在容器(容器可为字典、链表等数据结构)上遍访的接口,设计人员无须关心容器的内容,调用迭代器固定的接口就可遍历数据,在很多高级语言中都有实现。 + +字典迭代器主要用于迭代字典这个数据结构中的数据,既然是迭代字典中的数据,必然会出现一个问题,迭代过程中,如果发生了数据增删,则可能导致字典触发rehash操作,或迭代开始时字典正在进行rehash操作,从而导致一条数据可能多次遍历到。 + +```c +typedef struct dictIterator { + dict *d; + long index; // 迭代hash中的索引值 + // safe 为1表示是安全迭代器,可以在add,find等rehash场景中使用 + int table, safe; + // entry 当前读取节点,nextEntry entry 节点的next字段 + dictEntry *entry, *nextEntry; + /* unsafe iterator fingerprint for misuse detection. */ + unsigned long long fingerprint;// 字典指纹,字典发生改变随之改变 +} dictIterator; +``` +fingerprint字段是一个64位的整数,表示在给定时间内字典的状态。在这里称其为字典的指纹,因为该字段的值为字典(dict结构体)中所有字段值组合在一起生成的Hash值,所以当字典中数据发生任何变化时,其值都会不同,生成算法可参见源码dict.c文件中的dictFingerprint函数。 + +```c +/* A fingerprint is a 64 bit number that represents the state of the dictionary + * at a given time, it's just a few dict properties xored together. + * When an unsafe iterator is initialized, we get the dict fingerprint, and check + * the fingerprint again when the iterator is released. + * If the two fingerprints are different it means that the user of the iterator + * performed forbidden operations against the dictionary while iterating. */ +unsigned long long dictFingerprint(dict *d) { + unsigned long long integers[6], hash = 0; + int j; + + integers[0] = (long) d->ht_table[0]; + integers[1] = d->ht_size_exp[0]; + integers[2] = d->ht_used[0]; + integers[3] = (long) d->ht_table[1]; + integers[4] = d->ht_size_exp[1]; + integers[5] = d->ht_used[1]; + + /* We hash N integers by summing every successive integer with the integer + * hashing of the previous sum. Basically: + * + * Result = hash(hash(hash(int1)+int2)+int3) ... + * + * This way the same set of integers in a different order will (likely) hash + * to a different number. */ + for (j = 0; j < 6; j++) { + hash += integers[j]; + /* For the hashing step we use Tomas Wang's 64 bit integer hash. */ + hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1; + hash = hash ^ (hash >> 24); + hash = (hash + (hash << 3)) + (hash << 8); // hash * 265 + hash = hash ^ (hash >> 14); + hash = (hash + (hash << 2)) + (hash << 4); // hash * 21 + hash = hash ^ (hash >> 28); + hash = hash + (hash << 31); + } + return hash; +} +``` +根据迭代器结构中的safe字段,将迭代器分为普通迭代器和安全迭代器: + +- 普通迭代器: 只遍历数据 +- 安全迭代器: 遍历的同时删除数据 + +#### 普通迭代器 + +普通迭代器迭代字典中数据时,会对迭代器中fingerprint字段的值作严格的校验,来保证迭代过程中字典结构不发生任何变化,确保读取出的数据不出现重复 + +当Redis执行部分命令时会使用普通迭代器迭代字典数据,例如sort命令。sort命令主要作用是对给定列表、集合、有序集合的元素进行排序,如果给定的是有序集合,其成员名存储用的是字典,分值存储用的是跳跃表,则执行sort命令读取数据的时候会用到迭代器来遍历整个字典。 + +```c + dict *set = ((zset*)sortval->ptr)->dict; + dictIterator *di; + dictEntry *setele; + sds sdsele; + di = dictGetIterator(set); + while((setele = dictNext(di)) != NULL) { + sdsele = dictGetKey(setele); + vector[j].obj = createStringObject(sdsele,sdslen(sdsele)); + vector[j].u.score = 0; + vector[j].u.cmpobj = NULL; + j++; + } + dictReleaseIterator(di); +``` + +1. 调用dictGetIterator函数初始化一个普通迭代器,此时会把iter->safe值置为0,表示初始化的迭代器为普通迭代器 + ```c + void dictInitIterator(dictIterator *iter, dict *d) + { + iter->d = d; + iter->table = 0; + iter->index = -1; + iter->safe = 0; + iter->entry = NULL; + iter->nextEntry = NULL; + } + + dictIterator *dictGetIterator(dict *d) + { + dictIterator *iter = zmalloc(sizeof(*iter)); + dictInitIterator(iter, d); + return iter; + } + ``` +2. 循环调用dictNext函数依次遍历字典中Hash表的节点,首次遍历时会通过dictFingerprint函数拿到当前字典的指纹值。 + +3. + +#### 安全迭代器 diff --git "a/content/post/\345\214\272\345\235\227\351\223\276\345\255\246\344\271\240.md" "b/content/post/\345\214\272\345\235\227\351\223\276\345\255\246\344\271\240.md" index 6be8c5e..f811b35 100644 --- "a/content/post/\345\214\272\345\235\227\351\223\276\345\255\246\344\271\240.md" +++ "b/content/post/\345\214\272\345\235\227\351\223\276\345\255\246\344\271\240.md" @@ -1,14 +1,16 @@ --- -title: "区块链学习" +title: "区块链学习---BTC" date: 2022-11-28T11:08:17+08:00 draft: true --- -## BTC +# BTC -### 密码学原理 +bitcoin 的论文标题为 一种点对点的电子现金系统,可见它最初的设计目标是为了摆脱第三方中介,建立一个新型的信任模型。是区块链技术第一次实际运用。 -#### 哈希 +## 密码学原理 + +### 哈希 - **collision resistance** (这里指哈希碰撞) : @@ -34,7 +36,7 @@ draft: true 哈希值事先不可预测,仅仅根据输入很难预测输出。在比特币中采用SHA-256哈希函数,需要一个哈希值,存在于某一个范围内,只能通过不停运算查找出来。该性质保证了比特币系统中,只能通过“挖矿”获得比特币。也就是说,该性质保证了工作量证明(POW)机制可以运行下去【“挖矿难,但验证易”】。 -#### 签名 +### 签名 比特币中账户管理 在第三方中心化系统中,账户开通依赖于第三方。但去中心化的比特币系统中,很明显不能 进行“申请账户”。在比特币系统中,申请账户是用户自己来处理的,即自己创建一个公钥-私钥对。(关于公私 @@ -43,9 +45,9 @@ draft: true 易时,通过自己私钥签名,其他人可以根据公钥进行验证,从而保证该交易由自己发起。也就是说,只有拥有 私钥,才能将该账户中的比特币转走。 【注意:比特币系统中,很难通过生成大量公私钥对来获取他人私钥】 -### 数据结构 +## 数据结构 -#### hash pointer (哈希指针) +### hash pointer (哈希指针) ![](https://s2.loli.net/2022/11/28/8ReVK2i7LQqWsld.png) @@ -55,7 +57,7 @@ draft: true 区块链系统中有的节点只需保存部分区块,等到需要其他区块时,再向其他节点获取,可以通过hash来验证正确性。 -#### Merkle tree +### Merkle tree ![](https://s2.loli.net/2022/11/28/fKRoqOe94NlCHLj.png) @@ -65,9 +67,181 @@ draft: true 简化支付确认协议(SPV)允许另一种节点存在,这样的节点被称为“轻节点”,它下载区块头,使用区块头确认工作量证明,**然后只下载与其交易相关的默克尔树分支**。 这使得轻节点只要下载整个区块链的一小部分,就可以安全地确定任何一笔比特币交易的状态和帐户的当前余额。 -### 协议 +## 协议 + +### double spending attack + +数字货币本身为带有签名的数据文件,可以进行复制。即:对用户来说,可以将同一货币花费两次。 + +对货币添加唯一编号(不可篡改),每次支付向货币发行单位查询真伪。 该方法每次交易都需要依赖于 +第三方机构来判断货币真伪且防止双花攻击。是一个典型的第三方中心化方案。 + +而比特币系统中是通过挖矿来决定货币的发行权和发行量,通过系统维护的区块链结构来解决双花问题 + +### 分布式共识 + +为了保证区块链内容在不同节点上的一致性,需要取得分布式共识。 + +FLP不可能结论: 在一个异步系统中,网络时延无上限,即使只有一个成员是有问题的,也不可能达成共识。 + +CAP(Consistency一致性、Availability可靠性、Partition tolerance容错性) Theorem: 任何一个分布式系统 +中,最多只能满足其中两个性质。 分布式共识中协议Paxos 可以保证Consistency(若达成共识必然一致),但在某 +些情况下,可能会一直无法达成共识。 + +假设系统中存在部分节点有恶意,但存在比例较小。大多数节点为“好”的节点,在这种情况下进行共识 +协议设置。 想法1:直接投票某个节点打包交易到区块,将其发给其他节点,其他节点检查该候选区块,检查 +若正确投赞成票,若票数过半数,加入区块链。 + - 存在的问题1——恶意节点不断打包不合法区块,导致一直无 +法达成共识,时间全花费在投票上。 + - 存在的问题2——无强迫投票手段,某些节点不投票(行政不作为)。 + - 存在的问题3——网络延迟事先未知,投票需要等多久?效率上会产生问题。 + - 更大的一个问题——membership。如果是联盟链,对加入成员有要求,可以基于投票。但比特币系统,任何人都可以加入,且创建账户及其简单,只需要本地产生公私钥对即可。只有转账(交易)时候,比特币系统才能知道该账户的存在。这样,黑客可以使用计算机专门生成大量公私钥对,当其产生大量公私钥对超过系统中一半数目,就可以获得 +支配地位(**女巫攻击**)。 + +所以,这种简单的投票方案也是不可行的。 + +比特币系统中采用了很巧妙的方案解决这个问题。虽然仍然是投票,但并非简单的根据账户数目,而是依据计算力进行投票。 在比特币系统中,每个节点都可以自行组装一个候选区块,而后,尝试各种nonce值,这就是挖矿。[H(block header)<=target] 当某个节点找到符合要求的nonce,便获得了记账权,从而可以将区块发布到系统中。其他节点受到区块后,验证区块合法性,如果系统中绝大多数节点验证通过,则接收该区块为最新的区块并加入到区块链中。 + + - 可能出现分叉情况。当两个节点同时获得记账权,这时就会出现分叉,但区块链只承认最长合法链,随着时间推移,必然存在某一条链变成最长合法链。这样,也就会导致合法区块被拒绝 + + - 分叉攻击。A用户对上面的A转账给B的记录回滚,从而非法获取利益。在两条链上,发现交易都 +合法。这是一个典型的双花攻击。A给B转账后,用分叉攻击将钱又转回来,覆盖掉原来的记录。 在比特 +币系统中,这种情况实际上很难发生。因为大多数矿工认可的是最长的合法链,会沿着上面的链继续挖下 +去。而A这个攻击者要想回退记录,就必须使得下面的链变得比上面的链还长。理论上来说,攻击者需要 +达到整个系统中51%的计算力,才能使得这种攻击成功 + +### 激励机制 + +节点竞争记账权需要消耗算力和电力成本,为了让激励节点参与,系统会给出块节点 **出块奖励**,一个获得 +合法区块的节点,可以在区块中加入一个特殊交易(铸币交易)。事实上,这种方式也是唯一一个产生新比特币的途径。 + +比特币系统设计规定,起初每个区块可以获得50个比特币,但之后**每隔21万个区块**,奖励减半。因为平均出块时间为10分钟,可以算出大约**四年**减半。 + +区块中保存交易记录,如果仅仅设置出块奖励,那么,会不会存在节点只想发布区块获得出块奖励而不想打包交易? + +BTC系统设计了**Tranction fee(交易费)**,每个交易可以有多个输入,也可以有多个输出,但输入之和要等于输出之和(total inputs = total outputs)。 存在一些交易的total inputs 略大于 total outputs,这部分差额便作为交易费,给了获得记账权的节点。对于获得记账权节点来说,除了出块奖励之外,还可以得到打包交易的交易费。但目前来说,交易费远远小于出块奖励。等到未来出块奖励变少,可能区块链的维护便主要依赖于交易费了。 + + +## 实现 + +### UTXO + +比特币采用了 **基于交易的账本模式** 。然而,系统中并无显示记录账户包含比特币数,实际上其需要通过交易记录进行推算。在比特币系统中,全节点需要维护一个名为 **UTXO**(Unspent Transaction Output尚未被花掉的交易输出) 的数据结构。 + +A转给B五个BTC,转给C3个BTC,B将5个BTC花掉,则该交易记录不保存在UTXO中,C没有花掉,则该交易记录保存在UTXO中 + +UTXO集合中每个元素要给出产生这个输出的交易的哈希值,以及其在交易中是第几个输出。通过这两个信息,便可 +以定位到UTXO中的输出。 + +> 判断一个交易是否合法,要查一下想要花掉的BTC是否在该集合中,只有在集合中才是合法的。如果想要花掉的BTC不在UTXO中,那么说明这个BTC要么根本不存在,要么已经被花过。所以,全节点需要在内存中维护一个UTXO,从而便于快速检测double spending(双花攻击)。 + + + +## 网络 + +![](https://s2.loli.net/2022/12/01/gzeoK1dSaJf4VRL.png) + +bitcoin 网络层使用的是p2p网络,区块链运行在应用层。 + +节点之间的通信采用了TCP协议,便于穿透防火墙。 + +当一个节点要加入网络时,要先找到一个seed node (种子节点),通过这个种子节点来获得其他节点。退出时自行退出,过段时间其他节点就会把它从网络中删除。 + +网络设计原则: **simple,robust,but not efficient** + +节点维护一个收到的待上链的交易集合,当第一次收到一个交易时,会转发给邻居节点并加入集合,下次收到时就不会转发。当交易上链就把它删掉。 + +假如网络中存在两个冲突交易,如 +交易1:A->B,交易2:A->C(假设花费的同一笔钱)。具体接收哪个取决于节点先接收到哪个交易,之后收到另一个 +交易会将其放弃。 + +新发布区块在网络中传播方式与新发布交易传播方式类似,每个节点除检查该区块内容是否合法,还要检查是否位于 +最长合法链上。区块越大,则网络上传输越慢。BTC协议对于区块大小限制为不大于1M大小. + +此外,比特币网络传播属于 Best effort(尽力而为) ,不能保证一定传输成功。以一个交易发布到网络上,未必所 +有节点都能收到,也未必所有节点收到交易顺序都一致。 + +## 挖矿 + +![](https://s2.loli.net/2022/12/02/84z1iTa5Qk3CgLd.png) + +![](https://s2.loli.net/2022/12/02/9EWYarsVecQ7ij3.png) + +从 blockchain.com 网站 截取一个最近的block: + +![](https://s2.loli.net/2022/12/03/t1FDuAzh3HmIQLq.png) + +可以看到,区块哈希与前一区块哈希都是以一长串0开头的,挖矿本身就是尝试各种nonce,使得产生的区块哈希值小于等于目标阈值。 + +看一下 block header 的代码描述: [bitcoin](https://github.com/bitcoin/bitcoin/blob/master/src/primitives/block.h) + +![](https://s2.loli.net/2022/12/03/sC3A8qNUnXYLa9D.png) + +nonce是一个32位的无符号整型数据,在挖矿时候是通过不断调整nonce进行的,但可以看到,nonce的取值最多为$2^{32}$种。但并非将这些nonce全部遍历一遍,就一定能找到符合要求的nonce。由于近年来,挖矿人员越来越多,挖矿难度已经调整的比较大了,而这一搜索空间太小,所以仅调整nonce很大可能找不到正确的结果。 + +可以看到除了nonce其他的元素只有ntime和hashMerkleRoot可以进行调整。我们主要是调整MerkleRoot。 + +每个发布区块者可以得到出快奖励,也就是可以在区块中发布一个 铸币交易(coinbase交易) +,这也是BTC系统中产生新比特币的唯一方式。中可以写入任何内容,在这里写什么都没有影响。所以可以在这里添加一些任意信息,便可以实现无法篡改(也无法删除)。所以,只要我们改变了写入内容,便可以改变Merkle Tree 的根哈希值。 + +所以,在实际的挖矿中,包含两层循环。外层循环调整coinbase域(可以规定只将其中前x个字节作为另一个nonce),算出block header中根哈希值后,内层循环再调整nonce。 + +### 概率分析 + +挖矿本质上是不断尝试各种nonce,来求解这样一个puzzle。每次尝试nonce,可以视为一次伯努利试验。最典型的 +伯努利试验就是投掷硬币,正面和反面朝上概率为p和1-p。在挖矿过程中,一次伯努利试验,成功的概率极小,失败 +的概率极大。挖矿便是多次进行伯努利试验,且每次随机。这些伯努利试验便构成了a sequence of independent +Bernoulli trials(一系列独立的伯努利试验)。根据概率论相关知识知道,伯努利试验本身具有无记忆性。也就是说,无论之前做多少大量试验,对后续继续试验没有任何影响。 对于挖矿来说,便是多次伯努利试验尝试nonce,最终找到一个符合要求的nonce。在这种情况下,可以采用泊松分布进行近似,由此通过概率论可以推断出,系统出块时间服从指数分布。(需要注意的是,出块时间指的是整个系统出块时间,并非挖矿的个人) + +系统平均出块时间为10min,该时间为系统本身设计,通过难度调整维护其平均出块时间。 指数分布本身也具有无 +记忆性。也就是说,对整个系统而言,已经过去10min,仍然没有人挖到区块,那么平均仍然还需要等10min(很不 +符合人的直觉)。也就是说,将来要挖多久和已经挖多久无关。 + +### 工具演化 + +CPU->GPU->ASIC专用矿机 + +### 矿池 + +矿池通常是一个全节点驱动多台矿机。矿工只需要不停计算哈希值,而全节点其他职责由矿主来承担。ASIC芯片只能计算哈希值,不能实现全节点其他功能。此外,矿池出现解决了单个矿工收益不稳定的问题。当获得收益后,所有矿工对收益进行分配,从而保证了收益的稳定性。 + +矿池一般具有两种组织形式。1.类似大型数据中心(同一机构),集中成千上万矿机进行哈希计算。2.分布式。矿工 +与矿主不认识(不同机构),矿工与矿主联系,自愿加入其矿池,矿主分配任务,矿工进行计算,获得收益后整个矿池 +中所有矿工进行利益分配。 + +### 难度调整 + +在比特币系统中,区块链的出块时间保持在平均10min左右。毫无疑问的是,伴随着参与挖矿的人增多,系统总算力不断增强,出块时间越来越短,虽然提高了系统效率但是增加了不稳定性,不利于达成共识,所以挖矿的难度绝对不能一成不变。 + +**这里的难度系数就是上面block信息中的difficulty**。 + +H(block header)<=target.(target便是目标阈值,target越小,目标难度就越大)对于挖矿难度的调 +整,可以视为调整目标空间在整个输出空间中所占比例大小。 + +之前有提及,比特币系统采用的哈希算法为SHA-256,所以整个输出空间大小为$2^{256}$,调整目标空间所占比 +例,简单的说需要目标值前需要多少个0。 当然,挖矿难度和目标阈值成反比,如下图所示,其中 +difficulty-1-target为是挖矿难度为1时候的target,即最小挖矿难度: + +$$difficulty = \frac{difficulty-1-target}{target} $$ + +在BTC协议中规定,每隔2016个区块需要调整一次难度,根据10min产生一个新区块可以得到,大概需要14天的时 +间。具体调整公式如下: + +$$\text { target }=\text { target } * \frac{\text { actual time }}{\text { expected time }}$$ + +> 可见,如果实际时间比较长,target会比较大,相应的挖矿难度会降低;如果实际时间比较短,target会比较小,相应的挖矿难度会增大。 + +如何让所有矿工都愿意调整这个挖矿难度呢? 这一调整算法在代码中已经写入,如果有恶意节点故意不调,其 +所产生的区块不会被大多数诚实的节点承认。 在block header中有一个nbits的域,它是对target的编码存储(target为256位,nbits为32位,也就是说block header并未直接存储target),其他节点在进行合法性验证时候会验证nbits域是否合法,不合法则对该区块不予以承认。 + +挖矿难度变低是好事吗? 对于矿工来说,挖矿难度变低,挖矿变得更容易,这也说明大多数人对该币种 +不再看好,这个币种的价值也会大跳水,这对矿工来说可是一个坏消息。 -#### double spending attack +### 安全性分析 +因为矿池的出现,引发了对51%算力攻击的担忧,矿工只负责计算,并不知道矿池的行为(**乌合之众**)。,51%攻击只是一个概率问题,并非达到51%算力就能发动攻击,不能达到就无法发动攻击。此外,矿池本身算力也是在不断变化的。 -#### \ No newline at end of file +1. 分叉攻击: +2. 封锁交易(Boycott) 假如攻击者不喜欢某个账户A,不想让A的交易上区块链,在监听到有其他人将A的交易发布到区块链上时,立刻发动分叉攻击,使A所在链无法成为”最长合法链“。这样,便实现了对A账户的封锁。 +3. 盗币(将他人账户BTC转走) 这个是不可能的,因为其并没有他人账户私钥。如果依仗算力强,强行将没有签 +名的转账发布到区块链,正常节点不会认为其合法,这样,即使这条链再长,其他人也不会认为其是最长合法链。 diff --git "a/content/post/\345\214\272\345\235\227\351\223\276\347\231\275\347\232\256\344\271\246\350\247\243\350\257\273.md" "b/content/post/\345\214\272\345\235\227\351\223\276\347\231\275\347\232\256\344\271\246\350\247\243\350\257\273.md" index 0092c2d..d609ae3 100644 --- "a/content/post/\345\214\272\345\235\227\351\223\276\347\231\275\347\232\256\344\271\246\350\247\243\350\257\273.md" +++ "b/content/post/\345\214\272\345\235\227\351\223\276\347\231\275\347\232\256\344\271\246\350\247\243\350\257\273.md" @@ -20,4 +20,34 @@ toc: true 传统区块链系统的单链结构在交易认证,吞吐量,资源消耗等方面存在缺陷,DAG结构的区块链是一个有效的解决方案。 +## Fabric 白皮书 +Hyperledger Fabric 是 Linux 基金会 的 一个项目,是Hyperledger下面的一个子项目。作为一个开源联盟链,被很多项目应用。 + +它的主要特点是模块化的共识机制,相对高性能,和可以使用常规语言编写智能合约(golang)。 + +### 概念 + +#### 联盟链 + +文中划分联盟链和公链的标准是: **是否发币和节点身份是否可知** + +状态机复制(SMR)是建设弹性应用众所周知的方式,但是如果我们把运行在区块链上的智能合约看作一种分布式应用,与传统的SMR区别在于: + +- 许多应用并发运行 +- 这些应用可以被任何人动态地部署 +- 这些应用的代码是不被信任的,可能有恶意 + +#### order-execute + +现有的大部分可以运行智能合约的区块链遵循SMR实现一种order-execute的架构: 节点先将交易排序再将它们广播给其他节点,然后每个节点顺序执行。 + +![](https://s2.loli.net/2022/12/06/B4Ns3GZAKl8dIXT.png) + +所有节点对所有交易的顺序执行限制了性能,并且需要采取复杂的措施来防止源自不受信任的合约(例如在以太坊中使用“gas”计算运行时)的针对平台的拒绝服务攻击;智能合约很难做到并发。 + +最大的限制是交易必须是确定的,这就使得不能使用常规编程语言来实现,必须使用特定的语言。 + +#### execute-order-validate + +![](https://s2.loli.net/2022/12/07/jDBxcLmYrfXSnbl.png) diff --git "a/content/post/\350\201\224\351\202\246\345\255\246\344\271\240\347\233\270\345\205\263\350\256\272\346\226\207\351\230\205\350\257\273.md" "b/content/post/\350\201\224\351\202\246\345\255\246\344\271\240\347\233\270\345\205\263\350\256\272\346\226\207\351\230\205\350\257\273.md" index 1fb33e4..f3a60d1 100644 --- "a/content/post/\350\201\224\351\202\246\345\255\246\344\271\240\347\233\270\345\205\263\350\256\272\346\226\207\351\230\205\350\257\273.md" +++ "b/content/post/\350\201\224\351\202\246\345\255\246\344\271\240\347\233\270\345\205\263\350\256\272\346\226\207\351\230\205\350\257\273.md" @@ -215,3 +215,8 @@ discriminative aggregation (**判别聚合**) 每轮开始时,服务器先检查客户端的模型,根据给定的超参数$\tau$和滞后容忍算法来分配模型。服务器收集上传的更新,错过上次更新的节点会被优先采集。等采集到的更新满足预先设置的标准后,执行三步合并,然后更新缓存状态。 +## IOT ‘22《A Blockchain-based Model Migration Approach for Secure and Sustainable Federated Learning in IoT Systems》 + +### 背景 + + diff --git a/docs/404.html b/docs/404.html index 629fe14..b7faab5 100644 --- a/docs/404.html +++ b/docs/404.html @@ -283,7 +283,7 @@

Built with Hugo and theme Tokiwa.
- 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git a/docs/categories/index.html b/docs/categories/index.html index 97cad9f..e5839c4 100644 --- a/docs/categories/index.html +++ b/docs/categories/index.html @@ -295,7 +295,7 @@

Built with Hugo and theme Tokiwa.
- 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git a/docs/index.html b/docs/index.html index 7bd0de7..786a739 100644 --- a/docs/index.html +++ b/docs/index.html @@ -527,7 +527,7 @@

Built with Hugo and theme Tokiwa.
- 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git a/docs/index.json b/docs/index.json index 5a4c53c..4b79777 100644 --- a/docs/index.json +++ b/docs/index.json @@ -1 +1 @@ -[{"categories":null,"contents":"比特币白皮书 以太坊白皮书 Tangle 白皮书 详细的内容见 Tangle白皮书中文版\ntangle 是 IOTA 所用的技术,为物联网和小额支付提供支持。不同于常见的区块链,它使用一个DAG(有向无环图)作为结构,这里称为Tangle。\n传统区块链系统的单链结构在交易认证,吞吐量,资源消耗等方面存在缺陷,DAG结构的区块链是一个有效的解决方案。\n","date":"2022-11-25T00:00:00Z","permalink":"https://chi-kai.github.io/post/%E5%8C%BA%E5%9D%97%E9%93%BE%E7%99%BD%E7%9A%AE%E4%B9%A6%E8%A7%A3%E8%AF%BB/","section":"post","tags":["区块链","论文阅读"],"title":"区块链白皮书解读"},{"categories":null,"contents":"《Direct Acyclic Graph-Based Ledger for Internet of Things: Performance and Security Analysis》 问题背景 由于区块链的安全性,去中心化,可信性,在IoT系统上有可观的应用前景(如智能车,能源交易)。IoT系统具有规模大,资源受限的特性。所以其上的共识算法必须满足资源需求小,低消耗,和高的交易吞吐量。\n现在主要的两种共识算法:PoW需要高的资源消耗,PoS的币龄证明可能造成垄断和中心化。\n典型的区块链是一种单链结构,为了避免非法的fork,应用的共识算法必须降低新的block生成速率。这导致了吞吐量瓶颈和区块认证延迟的问题,在IoT系统上又有交易花费高和资源消耗大的问题。\nDAG共识算法可以允许任何节点可以立即向ledger插入一个新的block,前提是它能先处理更早的交易。这种方式会造成很多fork,DAG有很多算法来避免在传统区块链上面临的double-spending问题(Markov Chain Monte Carlo algorithm and virtual voting algorithm)。DAG共识算法的交易吞吐是不受限制的,而且资源消耗很低,这符合IoT的应用场景。\nDAG概述 名词定义 这里使用典型的Tangle算法来进行解释。\nBlock: 所有块是记录信息的存储单元(包括交易,数字签名,哈希值),在Tangle里一个块记录一个交易\nTip: 还没有被验证的块(交易)\nDirect approval:直接验证,两个块直接由一条边来链接,称为直接验证。\nindirect approval:间接验证,两个块有通过一个块和\nOwn weight: 与它的提出者的工作量有关\nCumulative weight: 代表一个交易的认证级别。是一个交易自身own weight以及它直接证明和间接证明的交易的交易own weight总和。\n共识过程 节点创造一个块来储存交易 节点通过MCMC tips 选择算法来选择两个没有冲突的tips,然后添加它们的hash到块中 节点解决一个低难度的pow问题,来避免垃圾信息 使用私钥给交易签名并广播,当其他节点收到时会检查是否合法 成功添加的交易成为tip,等待被验证。直到它的cumulative weight 达到定义的标准。 分叉问题 在分布式账本中,构建分叉以重做工作是篡改存储数据的唯一方法。基于此,double-spending的主要思想是将两笔相互冲突的交易并行放置在两条链上。在第一笔交易花费在服务上之后,攻击者扩展包含冲突交易的链并让它超过第一条链。当此操作成功时,第一笔交易将被孤立,攻击者可以多次使用token。\n单链模型: 以最长的一个链为标准,正常的矿工会在最长的链上工作 DAG模型: 以累计权重最大的子图为标准,正常的节点会通过MCMC tips 选择算法扩展权重最大的链。 《TIPS: Transaction Inclusion Protocol with Signaling in DAG-based Blockchain》 问题背景 由于DAG区块链的高并发场景和网络延迟,矿工通常不能及时获取整个网络的更新信息,导致重复在一个并行区块包括相同的交易,在区块链中生成冗余的记录。这个交易包含冲突会浪费区块容量和降低系统性能。尽管DAG区块链已经限制交易的高并发,但是交易冲突的风险实际还会诱发矿工收益和系统吞吐的困境。\n问题分析 三种交易包含策略:\n随机包含($P^{rand}$): $p_{1}=p_{2}=\\cdots=p_{m}=\\frac{n}{m}$ 有优先级的随机包含($P^{priority}$): $p_{1} \\geq p_{2} \\geq \\cdots \\geq p_{m} \\text { and } \\frac{p_{1}}{f_{1}}=\\frac{p_{2}}{f_{2}}=\\cdots=\\frac{p_{m}}{f_{m}}$ Top n ($P^{top}$): $p_{1}=p_{2}=\\cdots=p_{n}=1 \\text { and } p_{n+1}=p_{n+2}=\\cdots=p_{m}=0$ 这里仅考虑矿工收益中的交易费用奖励。\n收入困境 算法设计 《SilentDelivery: Practical Timed-delivery of Private Information using Smart Contracts》 ","date":"2022-11-10T00:00:00Z","permalink":"https://chi-kai.github.io/post/dag%E5%8C%BA%E5%9D%97%E9%93%BE%E8%AE%BA%E6%96%87%E9%98%85%E8%AF%BB/","section":"post","tags":["区块链","论文阅读"],"title":"DAG区块链论文阅读"},{"categories":null,"contents":"问题现状 区块链面临的问题 区块链作为现在一个热门技术,越来越多的人涌入其中。传统的区块链由于其单链结构和共识算法的限制,存在[!!!]等问题。 之前有研究工作提出,一个区块链中区块链的去中心化,安全,和规模三个特新不能共存。\n解决方案 分片技术: 将一个交易分片来并行处理,但是很难达成共识,跨链技术通过在不同分片之间建立通道来解决这个问题。 Layer2 Protocl:参与者能够通过私有通信而不是广播到整个网络来执行脱(主)链交易挑战是如何正确有效地保证链下和链上交易的有效性和一致性。 辅助链技术: 通过增加辅助链来让更多的交易参与。 混合结构 混合共识算法 修改硬解码参数 这些方案都受限于区块链的线性结构,因此结构上的改变成为一个新兴方案。\nDAG区块链的提出 单链结构使得同一时间多个节点竞争一个可用位置,这导致了认证缓慢,交易竞争和算力浪费。 为了能在同一时间提交更多交易,提出了基于DAG的区块链。\n概览 DAG 是指有向无环图,通常被当作一种基础数据结构应用于导航寻址,数据压缩等算法场景。 这个概念首次被Sompolinksky在GHOST中引入区块链,用来解决并发问题。改进版本被作为核心共识算法应用于以太坊中。之后,Lerner在DAGCoin中将粒度从块提升到交易,抛弃了打包和计算步骤大大提高了效率。IOTA和ByteBall 应用了无块的概念,发布了开源实现,至今引领市场。随后,一些工作又在DAG的基础上进行了改进。如Spectre,Hashgraph,Nano等。\n基于DAG的系统主要有利于需要高性能低消耗的分布式应用(DAPP)。直接应用底层的区块链可以享受到更好的特性,但是需要专业的开发技巧和昂贵的硬件设备。使用一些官方的组件是一种可替代的方案,如IOTA,MAM,Qubic。目前可以考虑应用的领域有: 物联网,数据管理,车载应用,智能家具等。\n建模 一个有环无向图由点集和边集组成。点集的每个元素可以是一个交易,一个块,或者协议中的一个事件。边集的元素是一个元组,代表两点之间的关系。\n关键参数 因为现在模型缺乏具体的实现,使用定性的参数来描述系统的基本技术。\n出度入度: 描述每个单元连接数目。 出度是指从节点指出的边,即节点的前任。入度是指指向节点的边,即节点的继承者。 交易模型:描述如何完成一笔交易。UXTO 代表一种无损耗的输出,交易时原子的,不可分割。每个操作必须通过这些交易完成。Account model 维持一种平衡状态。 可信度: 一个累加的数字展示一个单元被子块直接或者间接认证的程度。也反映下一轮被选择的概率。 认证: 一些独特的参数被用于在网络中认证单元。 分类 节点表现形式\n这个间接显示一个系统结构,是交易,事件或者区块。我们定义两种类型: $1^{od}$ 和 $2^{od}\t$。 前者表示请求到达时会被即刻处理,不需要等待来自节点的更多请求。这种形式包括区块和触发事件。 后者表示请求需要更多操作,多数情况下这个请求需要被预先计算或打包,然后被散播。这种形式包括区块和事件。\n节点形式表示系统结构,同时也决定账本模型,表示交易如何在DAG中生成。有两种交易模型: UXTO-based model 和 account-based model。第一个意味着所有操作都必须通过原子事务来实现。用户可以通过跟踪以前的交易历史来计算余额。对于第二个,每个用户都拥有一个帐户,并且交易被配置为其结构中的字段之一。用户直接在他们的账户中计算余额。\n网络技术\n分为三种 发散,并行,收敛。发散表示单元在不确定的方向稀疏的传播。并行表示在多个链的单元被一组节点维护。收敛表示单元按照一个确定的趋势收敛到一个确定的序列。\n按照上述标准分类如下: 共识算法 这里讨论共识算法的几个方面。\n开放程度: 表明一个任意节点是否无限制运行共识算法。\n成员选择: 选择节点成为出块节点的规则。\n单元分配: 共识算法的准备。\n单元定位: 确定一个单元在网络中的位置。\n扩展规则: 如何扩展图或者链和解除联系。\n冲突解决: 表示一系列可以确定冲突单元优先级的参数。\n特别技术: 与其他系统不同的技术。\n第一类 blockless ane natural expanding graph\nIOTA 无限制网络,使用UTXO数据模型,通过交易建立系统。IOTA把节点的事务称为tangle。一个待确认的tip需要先确认前面的两个tip,参与者也共同维护系统安全。但是如果恶意tips被持续生成,可能造成整个图向多个方向发散。所以tip选择算法是必不可少的,有三种机制提供: 一致随机,未加权随机和加权随机移动。最先进的是加权随机移动算法,是马尔可夫链蒙特卡罗 (MCMC) 算法的应用。有一些修改tip选择算法的变体,如GIOTA,EIOTA。\nGraphchain: 无限制网络。\nIOAT去除了激励机制,而Graphchain又重新引入。每个交易必须认证足够多的节点来获得激励。\nAvalancheq 一个无限制,新共识机制的网络。\n共识机制不同于拜占庭和中本聪共识。是一种叫做Slush的协议,从gossip算法和流行病网络中获得灵感的CFT容忍协议。\n第二类 based on blocks,natural expanding graph\nSpectre 一种无限制网络。关键技术是基于块的优先级的递归加权投票算法,\nPhantom 总结 从上面提到的区块链系统提取出常用的技术:\nCross-referencing(交叉引用)。一个块可以引用多个区块,也可以被多个区块引用。交叉引用可以提高吞吐量,扩大规模,降低认证时间。 Trusted authority。一个权威中心来做最后的决定,可以减少确认时间,但是会减弱去中心化特性。 Pairwise vote(成对投票)。2 对 1 的投票选择,而不是正常投票算法中的 n 对 1。 Transaction sharding(事务分片)。将交易分配到不同的链来阻止排序过程中可能的复制和冲突。 PoW 机制。用作一个反恶意节点的工具,对于子序列的PoW是预先计算的,可以保证交易瞬间完成。 对常用的共识机制分析:\nTip selection algorithm。一个新的交易如何选择之前的交易进行确认。增加了吞吐量和规模,一定程度上降低了安全性和一致性。 Recursive algorithm。递归调用一个函数直到得到一个稳定值。这个算法被共识机制采用来使得无序的块聚合成一个有序的链,使系统可以在一个确定的方向扩展。 BFT-style consensus。分为三种 传统BFT。需要根据资源确定(PoW,PoS)一个commitee,commitee成员来执行共识操作。 async-BA/leaderless BFT: 每个链都可以广播块和投票,当一个块得到足够的票数就可以提交,但是由于这个提交和确认是异步的,所以一个全局的线性排序很难达到。 前面两种的整合。经典的拜占庭容错协议首先应用于各个独立的区域,上层协议采用async-BA实现跨区域的最终共识。 Nakamoto consensus and its variants (中本聪共识和它的变体)。现存最流行的方法。传统的NC选择最长的链,变体NC选择权重最大的链,多用于形成主链。 Sorting algorithm。按照总体的线性顺序来排序,对于确保全局一致性是必不可少的。根据一些参数来确定优先级。 特性分析 BA和NC特性 拜占庭共识和中本聪共识是最流行的两种共识协议。\n拜占庭共识 在存在恶意节点的情况下可以达成的共识。有三个特性:\nargeement: validity: termination: 中本聪共识 允许所有节点可以参与共识通过PoW,PoS等方式。有两个关键特性:\npersistence: liveness: 安全分析 Parasite Chain Attack 尝试用预先准备好的子链来替换正确的子链。\n首先攻击者参照主链来构造一个有很高可信度的子链。然后分别发送一对冲突的交易到主链和构造的私链,接下来要确保子链得到有竞争力的可信度。这时,这个冲突交易可能已经在主链上确认,攻击者再发布他的子链,这样很可能使正确的主链无效然后一个coin可能被使用两次。\n这种攻击需要攻击者有充足的算力来生成区块,以没有强力领导者的协议为目标。\nBalance attack / liveness attack 保持多个子图平衡增长来获取收益,攻击者在一个子图发布交易后,又在另一个子图发布交易,动态维持几个子图的平衡来获取收益。\n需要一个很大算力,主要攻击基于POW的协议。\nSplitting attack 类似平衡攻击,攻击者找到两个相近的分支或者子图来发送冲突交易获取利润。\n攻击者需要有强大的算力,主要攻击没有强力中心的系统。\nLarge Weight Attack Censorship Attack Replay Attack Sybi Attack ","date":"2022-11-07T00:00:00Z","permalink":"https://chi-kai.github.io/post/dag%E5%8C%BA%E5%9D%97%E9%93%BE%E7%BB%BC%E8%BF%B0/","section":"post","tags":["区块链","论文阅读"],"title":"DAG区块链综述"},{"categories":null,"contents":"《Towards On-Device Federated Learning: A Direct Acyclic Graph-based Blockchain Approach》 目标 为了解决联邦学习中的设备异步和异常检测问题,同时避免由区块链导致的资源浪费\n设备异步。传统的中心化和同步FL(Google FL),一个节点必须等待其他节点完成任务才能一起进入下一轮训练,一个崩溃的节点可能阻塞整个系统。\n异常检测。一个节点的数据集和操作对其他节点是不可见的。一些恶意节点可能会破坏整个系统的准确度和降低效率。\n主要贡献 提出了第一个基于DAG的FL异步框架来解决设备异步和异常节点检测问题。\nDAG-FL 模型 “This feature promises that a node in DAG-FL can immediately participate in an iteration of FL whenever it is in idle state. When the node completes an iteration of FL and gets a new trained local model, the new local model can be published on its local DAG as a transaction immediately, and latter the new published transaction would be seen by all other nodes.” (Cao 等。, p. 4) (pdf) ”\n还是用本地的数据来训练模型,并没有和其他节点做聚合?\n“initial” (Cao 等。, p. 5) (pdf)\n异步构架 FL Layer:\n全局模型由存储在DAG中的本地模型使用FedAvg算法聚合产生,节点使用本地数据集进行训练。得到的新的模型被当作一个交易发布到DAG中。(每笔交易就是一个模型)\nDAG Layer:\n每个节点维护一个本地DAG,其中每个交易包含下相应的认证信息,本地模型参数,和许可链接。本地DAG通过广播和无线网络更新,最终一个新的交易可以得到传播。\nApplication Layer:\n一个外部接口,通过智能合约发布任务。这个客户端可以观察整个FL过程的进展,从而控制整个FL的进行和停止。\n异步性分析:\n在整个DAG-FL中没有central server,每个节点是通过本地已有的模型来构建新的全局模型。节点可以在合适的时间来进行FL迭代,获取的新的模型发布在本地DAG,随后可以被其他节点所见。节点之间的行为互不影响可以异步进行。\n共识算法 当一个节点完成一轮迭代,生成一个新的模型。他会从本地DAG选取几个tips(加入DAG但是没有被验证的block)来进行验证:\n验证使用RSA等算法加密的身份证书。可以避免恶意节点的女巫攻击。\n使用本地的测试集来计算模型的精确度。\n然后选取模型精确度最高的几个模型来构成新的全局模型。\n节点使用这个模型和本地数据集训练,得到的新模型通过一个新的交易发布到DAG中。\n随着DAG的持续扩展,一个交易的每次认证都意味着这个交易代表的模型被选择去构建一个全局模型,进而影响最终模型的生成。得到的认证越多,它在FL中的影响越大。反之,节点就会被孤立,在FL中影响越小。\n所以,最终DAG-FL训练的模型会向大多数节点期望的方向发展,少部分恶意节点会逐渐被孤立,影响降到最小化。\nFL算法 符号定义:\nD = {1,2,3,…,N $_D$ },代表整个设备集群。D $_i$是第i个节点。\nS $_i$是D $_i$的训练集,|S $_i$| = N $_i$,这里N $_i$是S $_i$的samples数量。\nD $_i$在本地创建一个仅自己可见的DAG为g $_i$,存储在其中的交易为w\n时间t,在D $_i$训练得到的本地模型为w $_i$ $^t$\n算法流程:\nD $_i$在t $_0$开始FL算法迭代,首先验证本地DAG的一些tips(验证他们的身份和用测试集验证准确度) 将准确度较高的的k个tips使用FedAvg算法聚合成一个全局模型: $$ \\omega^{t_{0}}=\\sum_{i=1}^{k} n_{i} \\omega_{d_{i}}^{t_{i}} $$ 这里 n $_i$是表示模型重要性的权重因子,为了简化这里设置为1/k,表示同等重要。 节点从数据集S $_i$中提取m个samples作为一个最小batch z$_i$ 来对得到的全局模型训练 $\\beta$个epochs. 得到一个新模型 $\\omega {i}^{ t{0}}$,将它发布到$g_{i}$上。 DAG-FL 操作 这里介绍框架中的两个重要算法。\nDAG-FL Controlling 在应用层的外接客户端可以认为是一个权威组织,负责任务发布任务。通过智能合约执行DAG-FL 控制算法。过程如上图。\nDAG-FL Updating 节点在空闲时执行DAG-FL Updating 算法。\n节点随机从本地DAG选取staleness范围在可以接收的tip。 节点先认证所选tips的身份,然后用自己的test数据集来计算所选tips的模型精确度。 选择精确度高的k个tips,使用FedAvging算法得到一个全局模型。节点用本地数据来训练这个全局模型。 一个新的交易被生成。包括身份信息,上一阶段训练得到的模型,和最k个tips的验证信息。 未来工作 在模型可用性上,使用一个小的test set可能对于一些特定场景不适合,考虑其他的异常检测方法。 信用评估 权重聚合。本文的模型使用的方法是同等权重系数的FedAvg,可以使用更好的方法来给高质量的模型更高的权重提高模型精度。 《Implicit Model Specialization through DAG-based Decentralized Federated Learning》 背景 由于联邦学习数据的非独立同分布特性,所有节点训练一个模型太过宽泛,提出一种基于DAG区块链的联邦学习框架,所有节点利用本地数据和其他节点相似的数据训练自己的特例化模型,和全局的泛化模型结合来提高系统的性能。\n这篇论文是在联邦学习之前已经有的对本地数据特例化的研究基础上,引入DAG区块链。\n模型 每个节点执行四个步骤,通过基础的随机移动算法选择DAG上的两个tips,将这个两个选择的模型参数平均,得到的模型在本地数据集上训练,如果最终得到的模型有提高就发布。\ntips选择算法 从区块链接的相反方向随机遍历。每个交易(区块)根据它的子图大小分配一个权重,通常会选择最高权重的区块,使得这个遍历方向收敛。 同时,对于每个节点有特定化的偏向处理,遍历的每一步,对于下一跳可以到达的潜在模型在本地数据集上进行评估。\nWEIGHTEDCHOICE 函数从这些模型中随机选择,并根据子节点对本地数据的精确度进行加权。(到底是按照权重还是随机选择?)\n这里的精确度计和权重计算:\n遍历的随机性可以由 α 参数确定,其中值越大,权重之间的差异越大,因此随机性越小,确定性越高。另一方面,较小的 α 值会导致权重收敛,从而导致更多随机性。模型之间的预期精度差异取决于我们的方法所应用的机器学习问题,以及学习率、批量大小和局部时期等超参数。为了在模型之间的精度变化很小的情况下也能实现良好的特性化,即使在差异很大的情况下也能实现良好的泛化,将每个步骤中的精度分布 max(accuracies) - min(accuracies) 精度归一化 * 的一部分:\n《SAFA: A Semi-Asynchronous Protocol for Fast Federated Learning With Low Overhead》 背景 原有的FedAvg算法存在一些问题:\n同步开销大: 每轮迭代中央服务器需要传输全局模型给所有客户端,带宽达到一个峰值。 客户端利用不足:随机选择的客户端使得许多可以参加训练的客户端闲置。 这个论点不充分。FedAvg是在所有可用的客户端中随机选择一定比例,并不是完全随机的。\n进度浪费: 被选中的设备如果在完成本地训练之前失败,则之前的工作都会作废。 每回合效率低: 在每轮结束进行聚合,FedAvg必须等待所有客户端完成。如果一些客户端故障,整个过程会等到超时结束。 模型 SAFA包括三部分: Lag-tolerant Model Distribution (滞后容忍模型分布),Compensatory First-Come-First-Merge (CFCFM) client selection [补偿性先到先合并 (CFCFM) 客户端选择], discriminative aggregation (判别聚合)\n下面SAFA的一个系统图:\n文中所用的参数表\nLag-tolerant Model Distribution 整个模块关键的有两点:\n有选择的同步全局模型。\n不像FedAvg算法一样每轮每个参与节点都要同步模型,这是一个交流密集的过程,开销很大。SAFA模型只要求两种类型必须同步。上一轮成功完成的节点(FedAvg中的正常节点)和被标记为过时的节点,这个过时节点是由于网络或者其他原因本地模型落后全局太多的客户端。这里有个滞后容忍参数 $\\tau$,来调节这个滞后范围。\n这里t-1表示上一轮的最新模型, $\\omega _{k}$代表第k个节点的本地模型,$\\omega$ 代表全局模型。可以看到,在 $t- \\tau$ 范围内本地节点的滞后是可以容忍的。\n没有被选中的节点也可以参与迭代。\n没有被选中的节点也可以上传更新。这部分不会被直接合并,而是会通过一个bypass结构影响下一轮。 一个cache用来保存那些选中的节点上传的更新,没被选中的节点的更新保存在bypass中。bypass会在汇聚步骤完成后和cache合并,使得实际的有影响节点比率比参数C决定的更高。\nCompensatory First-Come-First-Merge (CFCFM) client selection 代替必须等待所有选中节点上传更新,SAFA采用先来先合并方法,只要上传的更新达到所需要的比率(是不是意味着可以上传的节点大于实际所需要的节点)就可以执行合并操作。\n给予那些参与较少的客户更高的优先级。在每一轮中,服务器都会维护一个错过上一轮训练的客户端的 id 列表,它们上传的更新会优先选择。\nDiscriminative Aggregation 聚合算法如下:\n对于选择的客户端,它们的更新将在合并到全局模型后保留在缓存中。对于未选择的客户端,更新不会在本轮生效,但会被缓存带入下一轮。对于崩溃的客户端,只有在它们没有被弃用的情况下,它们的模型才会保持不变。\n算法流程 每轮开始时,服务器先检查客户端的模型,根据给定的超参数$\\tau$和滞后容忍算法来分配模型。服务器收集上传的更新,错过上次更新的节点会被优先采集。等采集到的更新满足预先设置的标准后,执行三步合并,然后更新缓存状态。\n","date":"2022-11-06T00:00:00Z","permalink":"https://chi-kai.github.io/post/%E8%81%94%E9%82%A6%E5%AD%A6%E4%B9%A0%E7%9B%B8%E5%85%B3%E8%AE%BA%E6%96%87%E9%98%85%E8%AF%BB/","section":"post","tags":["联邦学习","论文阅读"],"title":"联邦学习论文阅读"},{"categories":null,"contents":"领导人选举 首先按照论文中最关键的figure 2补全节点和RPC结构。\n节点有三个状态: Leader,Candidate,Follower 和 两个计时器: 选举计时器,心跳计时器。\nFollower: 有一个选举计时器,随机选举超时时间,每当选举超时,就转变为Candidate.\nCandidate: 中间态,当Follower一端时间没有收到心跳,选举计时器到期,就会转变为Candidate,term加1,为自己投票并向其他节点发送投票请求,\nLeader: 当一个Candidate获得半数以上的投票就会转变为Leader,最重要的节点,负责和客户端交互。需要定时向每个Follower发送心跳来维持权威。\n节点一开始状态都是Follower,Term为0。 当一个选举计时器到期时,节点转变为Candidate,term加1,开始发送投票请求。这里有三种情况: 其他节点按照先来先到的原则投票,获得半数以上的投票可以胜出成为Leader. 在选举过程中得到更高term的RPC,Candidate会转变为Follower. 如果两个Candidate获得同样的票数,等选举计时器再次超时,会开始下一轮投票。 选出Leader后,Leader马上广播心跳来维持权威。 代码遇到的问题:\n选举超时要真的随机时间,有的随机函数返回的是固定值,这里用时间做种子。\nr := rand.New(rand.NewSource(time.Now().UnixNano())) t := time.Duration(ElectionTime+r.Intn(ElectionTime)) * time.Millisecond 由于网络原因,可能一些节点的回复不能及时收到。当收到一个超期的回复时,处理办法就是抛弃。\n// 如果回复晚了,不是同一个term或者leader则抛弃 // 处理心跳回复 if rf.currentTerm == args.Term \u0026amp;\u0026amp; rf.state == StateLeader // 在本Term内的投票且state仍为Candidate,超时过期的丢弃 // 处理投票回复 if rf.state == StateCandidate \u0026amp;\u0026amp; rf.currentTerm == args.Term 当收到一个更高term的回复RPC,Candidate转变为Follower,term变为更高的term,投票变为null。\n当收到一个更高的term的心跳时,状态转变为Follower, term变为更高的term,投票变为null。\n节点在两种情况拒绝投票,一是Candidate的term小于自己,二是自己在本term中已经投过票。\nif rf.currentTerm \u0026gt; args.Term || (rf.currentTerm == args.Term \u0026amp;\u0026amp; rf.votedFor != -1 \u0026amp;\u0026amp; rf.votedFor != args.CandidateId) 锁的使用: 当一个数据有写有读,写和读必须加锁。如果一个数据只读不写,不用加锁。\n日志复制 当选举结束后,leader开始为客户端提供服务。客户端发出的每一条请求会被交给leader处理。\nleader将每一条指令打包成一个entry \u0026lt;index,term,cmd\u0026gt;,将这个entry附加到日志中去,然后并行地发起 AppendEntries RPCs 给其他的服务器,让他们复制这条entry。\n当大部分服务器同意接收这个entry,leader将这个entry应用于状态机中,称为已提交,同时领导人的日志中之前的所有日志条目也都会被提交,包括由其他领导人创建的条目。然后将执行结果返回给客户端。\n为了维护日志的一致性,要保证日志匹配特性:\n如果在不同的日志中的两个条目拥有相同的索引和任期号,那么他们存储了相同的指令。 如果在不同的日志中的两个条目拥有相同的索引和任期号,那么他们之前的所有日志条目也全部相同。 第一个特性由 “只有一个leader可以创建entry” 来保证。\n第二个特性由附加日志 RPC 的一个简单的一致性检查所保证。它的步骤如下:\n**在发送附加日志 RPC 的时候,会带上上一条entry信息。**如果跟随者在它的日志中找不到包含相同index和term的条目,那么他就会拒绝接收新的日志条目。\n找到最大共识点。 在被跟随者拒绝之后,leader就会减小 nextIndex 值并进行重试(发送再上一个entry),直到找到最大共识点,被follower接收。之后,leader会强制覆盖follower最大共识点后面所有日志。这样就保证follower与leader日志始终一致。\nleader为所有follower节点维持一个nextIndex,记录每个follower下一个日志的index。当一个leader刚刚当选的时候,初始化所有nextIndex为自己最后一条日志的Index加1。(假设所有follower与leader日志保持一致)。\n日志复制可以做一些优化。比如在正常复制时可以批量复制日志以减少系统调用的开销;在寻找共识点时可以只携带一条日志以减少不必要的流量传输。\n安全性 选举限制 leader 只能发送日志给follower,而不能从follower接收日志,所以选出的leader必须包含集群中所有已经提交的日志。\n在选举投票时,携带最新的日志信息,和follower相比较,看谁的日志最新。如果候选人更新,则获得投票。\n这里更新的定义是: 通过比较两份日志中最后一条日志条目的索引值和任期号定义谁的日志比较新。如果两份日志最后的条目的任期号不同,那么任期号大的日志更加新。如果两份日志最后的条目任期号相同,那么日志比较长的那个就更加新。\n提交之前任期内的日志条目 在上图中,raft 为了避免出现一致性问题,要求 leader 绝不会提交过去的 term 的 entry (即使该 entry 已经被复制到了多数节点上)。leader 永远只提交当前 term 的 entry, 过去的 entry 只会随着当前的 entry 被一并提交。(上图中的 c,term2 只会跟随 term4 被提交。)\n如果一个 candidate 能取得多数同意,说明它的日志已经是多数节点中最完备的, 那么也就可以认为该 candidate 已经包含了整个集群的所有 committed entries。\n因此 leader 当选后,应当立刻发起 AppendEntriesRPC 提交一个 no-op entry。注意,这是一个 Must,不是一个 Should,否则会有许多 corner case 存在问题。如:\n读请求:leader 此时的状态机可能并不是最新的,若服务读请求可能会违反线性一致性,即出现 safety 的问题;若不服务读请求则可能会有 liveness 的问题。\n配置变更:可能会导致数据丢失\n实际上,leader 当选后提交一个 no-op entry 日志的做法就是Raft 算法解决 “幽灵复现” 问题的解法,相关博客\n","date":"2022-10-30T00:00:00Z","permalink":"https://chi-kai.github.io/post/raft%E8%AE%BA%E6%96%87%E9%98%85%E8%AF%BB/","section":"post","tags":["分布式","论文阅读","Raft"],"title":"Raft论文阅读"},{"categories":null,"contents":"什么是联邦学习 本质:联邦学习本质上是一种分布式机器学习技术,或机器学习框架。\n目标:联邦学习的目标是在保证数据隐私安全及合法合规的基础上,实现共同建模,提升AI模型的效果。\n前置知识: IID:独立同分布,表示一组随机变量的概率分布都相同,而且相互独立。例如掷色子。联邦学习背景下,数据集是非独立同分布的。\nSGD: 梯度下降算法\n绝大多数机器学习模型都有一个损失函数,来衡量预测值与实际值的差异。损失函数的值越小,模型的精确度就越高。通过使用梯度下降来调节参数,进而最小化损失函数。\n损失函数里一般有两种参数,一种是控制输入信号量的权重(Weight, 简称 w ),另一种是调整函数与真实值距离的偏差(Bias,简称 b )。我们所要做的工作,就是通过梯度下降方法,不断地调整权重 w 和偏差b,使得损失函数的值变得越来越小。\n通过计算梯度可以找到下降的方向,然后通过学习率a来控制下降的快慢。\ndef train(X, y, W, B, alpha, max_iters): \u0026#39;‘’ 选取所有的数据作为训练样本来执行梯度下降 X : 训练数据集 y : 训练数据集所对应的目标值 W : 权重向量 B : 偏差变量 alpha : 学习速率 max_iters : 梯度下降过程最大的迭代次数 \u0026#39;\u0026#39;\u0026#39; dW = 0 # 初始化权重向量的梯度累加器 dB = 0 # 初始化偏差向量的梯度累加器 m = X.shape[0] # 训练数据的数量 # 开始梯度下降的迭代 for i in range(max_iters): dW = 0 # 重新设置权重向量的梯度累加器 dB = 0 # 重新设置偏差向量的梯度累加器 # 对所有的训练数据进行遍历 for j in range(m): # 1. 遍历所有的训练数据 # 2. 计算每个训练数据的权重向量梯度w_grad和偏差向量梯度b_grad # 3. 把w_grad和b_grad的值分别累加到dW和dB两个累加器里 W = W - alpha * (dW / m) # 更新权重的值 B = B - alpha * (dB / m) # 更新偏差的值 return W, B # 返回更新后的权重和偏差。 优化过程 固定总数 K 个客户端,每个客户端都有本地数据集。\n每次选取分数 C (比例)个客户端\n服务器将当前的全局算法发送给每个客户端。\n每个被选定的客户端执行本地计算,并将服务器更新。\n通信成本占主导 一般数据中心中,通讯花费占少数,计算花费占大头。但是在联邦优化中,通讯占主导地位\n通常上传带宽被限制到1MB或者更低。\n客户端只有在充电,插入电源,和有不限量WIFI的情况下才会参与到优化过程中来。\n希望每个客户每天只参加少量的更新回合。\n因为单个客户端的训练数据很小,而且当前智能手机等客户端的计算能力是足够强的,所以通过使用额外的计算量来减少通信的次数\n增加并行量。在每次通信过程中,使用更多客户端来更新。\n增加每个客户端的计算量。\n联邦平均算法 联邦背景下,对梯度下降算法的扩展。\n选取K个Client,Server将当前的参数传递给Client,Client根据本地数据集和参数来进行梯度下降。\n最后将训练后的参数返回给Server,Server将获得的所有参数加权处理后得到最终的参数。然后再进行下一轮计算。\n如图所示,当B $\\rightarrow$ $\\infty$,E $\\rightarrow$ 1 表示本地数据全部参与训练,只训练一次,称为FedSGD。\n分类 我们把每个参与共同建模的企业称为参与方,根据多参与方之间数据分布的不同,把联邦学习分为三类:横向联邦学习、纵向联邦学习和联邦迁移学习。\n横向联邦学习 横向联邦学习的本质是 样本的联合,适用于参与者间业态相同但触达客户不同,即特征重叠多,用户重叠少时的场景,比如不同地区的银行间,他们的业务相似(特征相似),但用户不同(样本不同)\n学习过程: 参与方各自从服务器A下载最新模型 每个参与方利用本地数据训练模型,加密梯度上传给服务器A,服务器A聚合各用户的梯度更新模型参数; 服务器A返回更新后的模型给各参与方; 各参与方更新各自模型。 纵向联邦学习 纵向联邦学习的本质是 特征的联合,适用于用户重叠多,特征重叠少的场景,比如同一地区的商超和银行,他们触达的用户都为该地区的居民(样本相同),但业务不同(特征不同)。\n学习过程 纵向联邦学习的本质是交叉用户在不同业态下的特征联合,比如商超A和银行B,在传统的机器学习建模过程中,需要将两部分数据集中到一个数据中心,然后再将每个用户的特征join成一条数据用来训练模型,所以就需要双方有用户交集(基于join结果建模),并有一方存在label。其学习步骤如上图所示,分为两大步:\n第一步:加密样本对齐。是在系统级做这件事,因此在企业感知层面不会暴露非交叉用户。\n第二步:对齐样本进行模型加密训练:\n由第三方C向A和B发送公钥,用来加密需要传输的数据; A和B分别计算和自己相关的特征中间结果,并加密交互,用来求得各自梯度和损失; A和B分别计算各自加密后的梯度并添加掩码发送给C,同时B计算加密后的损失发送给C; C解密梯度和损失后回传给A和B,A、B去除掩码并更新模型。 联邦迁移学习 当参与者间特征和样本重叠都很少时可以考虑使用联邦迁移学习,如不同地区的银行和商超间的联合。主要适用于以深度神经网络为基模型的场景。\n迁移学习,是指利用数据、任务、或模型之间的相似性,将在源领域学习过的模型,应用于 目标领域的一种学习过程。\n","date":"2022-10-17T00:00:00Z","permalink":"https://chi-kai.github.io/post/%E8%81%94%E9%82%A6%E5%AD%A6%E4%B9%A0%E7%BB%BC%E8%BF%B0/","section":"post","tags":["联邦学习","论文阅读"],"title":"联邦学习综述"},{"categories":null,"contents":"基础数据结构部分 动态字符串 SDS 前置知识 由于我对C语言没有深入了解,有很多知识点会在前面补充。\nattribute ((packed)): 对齐优化\nattribute((args)) 是GNU C的一个机制,可以通过编译器来修饰结构体,函数等。\n现代计算机中内存空间都是按照字节(byte)划分的,从理论上讲似乎对任何类型的变量的访问可以从任何地址开始,但实际情况是在访问特定变量的时候经常在特定的内存地址访问,这就需要各类型数据按照一定的规则在空间上排列,而不是顺序地一个接一个地排放,这就是对齐.\n为了提高效率,计算机从内存中取数据是按照一个固定长度的。以32位机为例,它每次取32个位,也就是4个字节(每字节8个位)。字节对齐有什么好处?以int型数据为例,如果它在内存中存放的位置按4字节对齐,也就是说1个int的数据全部落在计算机一次取数的区间内,那么只需要取一次就可以了\n使用__packed__参数是表示,使用原来的地址空间,编译时不要字节对齐,这样用时间换空间,使得结构体紧密。\n详细的用法见 机制详解。\nuint8_t uint16_t ... size_t 使用\n后面加_t表示是一个typedef 定义的类型,本质是原有类型。这样做是为了更好的跨平台移植,因为不同的平台中int,long 这些基础类型可能占用的字节不同,这对于一些对内存严格要求的库造成不便。使用uint8_t 等类型,在不同平台上都代表占一个字节8位,便于程序的实现。\n同理 size_t 也是用来保持跨平台移植性。可以是unsigned int unsigned char unsigned long等等,取决于实现,size_t = typeof(sizeof(X))。\nstatic inline\n头文件中很多函数使用了static inline 关键字,inline 建议编译器将函数作为一个宏内联,这样可以减少函数调用时的堆栈消耗,提高性能。但是编译器不一定会内联函数,这时候static可以保证这个函数是仅在本文件可见,避免重复包含冲突。\n数据结构 这里以sdshdr8为例\n// 一个字节 8位 // __attribute__ ((__packed__)) 用来告诉编译器取消结构在编译中的优化对齐,按照实际占用。 // 因为内存是按照2的倍数读取的,否则可能读两次,速度变慢。这里是用时间换空间 // 保证整个结构体的空间紧密 struct __attribute__ ((__packed__)) sdshdr8 { // buf 中已经使用的字节数 uint8_t len; /* used */ // 去掉头和null结束符,已经分配的字节数=有效长度+数据长度 uint8_t alloc; /* excluding the header and null terminator */ // 8位,只用前三位 unsigned char flags; /* 3 lsb of type, 5 unused bits */ // 柔性数组,没有分配之前不占内存 char buf[]; }; 二进制安全 什么是二进制安全?通俗地讲,C语言中,用“\\0”表示字符串的结束,如果字符串中本身就有“\\0”字符,字符串就会被截断,即非二进制安全;若通过某种机制,保证读写字符串时不损害其内容,则是二进制安全。在网络报文中常常需要二进制安全。 sds使用 len 来控制字符串长度,而不是使用\u0026quot;\\0\u0026quot;,保障了二进制安全。 极致的内存使用 对于不同的长度的字符串有不同的结构,上面的sdshdr8 表示长度为8位的字符串,还有sdshdr16/32/64。保证不会因为字符串过小而额外浪费字节,也不会因为字符串过长而频繁扩容。 - 结构体紧密,放弃对齐优化。在前置说明了结构体使用编译器参数packed来放弃优化,用时间换空间。 - flag。使用一个unsigned char,8位的小端三位来表示结构体的性质(5/8/16/32/64)。 - 变长数组(柔性数组)。柔性数组成员(flexible array member),也叫伸缩性数组成员,只能被放在结构体的末尾。包含柔性数组成员的结构体,通过malloc函数为柔性数组动态分配内存。之所以用柔性数组存放字符串,是因为柔性数组的地址和结构体是连续的,这样查找内存更快(因为不需要额外通过指针找到字符串的位置);可以很方便地通过柔性数组的首地址偏移得到结构体首地址,进而能很方便地获取其余变量。 与c字符串函数 始终将buf指针暴露给上层,可以和c字符串函数切合。同时可以很容易地通过减去一个sdshdr大小偏移到结构体首部来调用结构体属性。 如下面的宏定义: ```c #define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T))); #define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T)))) ``` 基本操作 创建 sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) { void *sh; sds s; char type = sdsReqType(initlen); // 因为通常总是有空字符串,而使用type5每增加一次就需要扩容,所以直接使用type 8 ---- 为什么不把type5直接删了? /* Empty strings are usually created in order to append. Use type 8 * since type 5 is not good at this. */ if (type == SDS_TYPE_5 \u0026amp;\u0026amp; initlen == 0) type = SDS_TYPE_8; int hdrlen = sdsHdrSize(type); unsigned char *fp; /* flags pointer. */ size_t usable; // initlen 是buf中实际装的大小,hdrlen是sds header大小,+1 是\\0终止符 assert(initlen + hdrlen + 1 \u0026gt; initlen); /* Catch size_t overflow */ //malloc_usable还是调用tyymalloc_usable sh = trymalloc? s_trymalloc_usable(hdrlen+initlen+1, \u0026amp;usable) : s_malloc_usable(hdrlen+initlen+1, \u0026amp;usable); if (sh == NULL) return NULL; if (init==SDS_NOINIT) init = NULL; else if (!init) memset(sh, 0, hdrlen+initlen+1); // 指向buf s = (char*)sh+hdrlen; fp = ((unsigned char*)s)-1; usable = usable-hdrlen-1; // 可能申请的超过类型MaxSize if (usable \u0026gt; sdsTypeMaxSize(type)) usable = sdsTypeMaxSize(type); switch(type) { case SDS_TYPE_5: { *fp = type | (initlen \u0026lt;\u0026lt; SDS_TYPE_BITS); break; } case SDS_TYPE_8: { SDS_HDR_VAR(8,s); sh-\u0026gt;len = initlen; sh-\u0026gt;alloc = usable; *fp = type; break; } // ... } if (initlen \u0026amp;\u0026amp; init) memcpy(s, init, initlen); s[initlen] = \u0026#39;\\0\u0026#39;; return s; } 首先根据申请的初始大小来确定类型,通过类型可以确定hdr大小,然后来申请空间。 这里使用的s_trymalloc_usable与s_malloc_usable都是文件zmallo.c实现的内存管理函数,后面会专门讲解。这里只用知道它会申请前一个参数大小的空间,并且将空间大小赋值给后一个参数usable。 得到空间的首地址,加上头大小得到buf地址s,s[-1] 得到类型指针fp,usable减去头大小hdrlen和类型大小1得到实际可用大小。 根据类型来构建一个sds结构体,最后返回是buf的指针,补上终止符'\\0'。这里使用的是一个宏,可以借鉴这种写法,一个经常使用的操作,如果写成函数,会增加堆栈调度消耗,写成宏可以提高性能,代价是编译后的文件大小会增加。 这是sds创建的底层实现,实际使用的是上层的封装,只是对这个函数的封装调用。\n销毁 有两种方法,一种是直接销毁:\nvoid sdsfree(sds s) { if (s == NULL) return; s_free((char*)s-sdsHdrSize(s[-1])); } 一种是仅仅将sds的len标记为0,但是实际的buf并不会释放,而是等待覆写。这样可以优化性能。\n扩容 sds _sdsMakeRoomFor(sds s, size_t addlen, int greedy) { void *sh, *newsh; size_t avail = sdsavail(s); size_t len, newlen, reqlen; char type, oldtype = s[-1] \u0026amp; SDS_TYPE_MASK; int hdrlen; size_t usable; /* Return ASAP if there is enough space left. */ if (avail \u0026gt;= addlen) return s; len = sdslen(s); sh = (char*)s-sdsHdrSize(oldtype); reqlen = newlen = (len+addlen); // 这里是防止溢出 assert(newlen \u0026gt; len); /* Catch size_t overflow */ if (greedy == 1) { // SDS_MAX_PREALLOC 是 1MB if (newlen \u0026lt; SDS_MAX_PREALLOC) newlen *= 2; else newlen += SDS_MAX_PREALLOC; } type = sdsReqType(newlen); /* Don\u0026#39;t use type 5: the user is appending to the string and type 5 is * not able to remember empty space, so sdsMakeRoomFor() must be called * at every appending operation. */ if (type == SDS_TYPE_5) type = SDS_TYPE_8; hdrlen = sdsHdrSize(type); assert(hdrlen + newlen + 1 \u0026gt; reqlen); /* Catch size_t overflow */ if (oldtype==type) { // 和原类型相同,则不用释放内存,直接将buf扩容即可 newsh = s_realloc_usable(sh, hdrlen+newlen+1, \u0026amp;usable); if (newsh == NULL) return NULL; s = (char*)newsh+hdrlen; } else { /* Since the header size changes, need to move the string forward, * and can\u0026#39;t use realloc */ // 类型改变,需要重新申请内存,原内存释放 newsh = s_malloc_usable(hdrlen+newlen+1, \u0026amp;usable); if (newsh == NULL) return NULL; memcpy((char*)newsh+hdrlen, s, len+1); s_free(sh); s = (char*)newsh+hdrlen; s[-1] = type; sdssetlen(s, len); } usable = usable-hdrlen-1; // type 是通过newlen判断得到的,而usable 是 hdrlen + newlen + 1 可能出现超出的情况 if (usable \u0026gt; sdsTypeMaxSize(type)) usable = sdsTypeMaxSize(type); sdssetalloc(s, usable); return s; } 首先判断newlen加上len是否超出可用的大小avail,没超就不扩容。 和之前不同,这一版本加入了greedy参数,来调节扩容策略,当greedy为1时,每次会扩大的比所需要的更多,这样可以减少扩容频率。而greedy为0时,就是节约内存 greedy为1时启用此策略: 如果newlen小于1MB,每次扩容二背,大于1MB时每次增加1MB。(每次2倍内存很快就耗尽了) 这里根据新的newlen来确定类型,如果类型不变,只需要扩展buf数组,而类型改变的话就需要重新申请内存。 跳表zskiplist 对应的代码在 server.h 和 t_zset.c。\n跳表可以看作链表加上都多层索引,一般每两个\n/* ZSETs use a specialized version of Skiplists */ typedef struct zskiplistNode { sds ele; // 存储字符串类型的数据 double score; // 储存排序的分值 struct zskiplistNode *backward; // 后向指针 头节点和第一个节点都为NULL struct zskiplistLevel { struct zskiplistNode *forward; // 指向本层下一个节点 unsigned long span; // 跨度 指向本层下一个节点中间跨越的节点个数 } level[]; // 柔性数组,未分配内存时不占空间。初始化时,level 随机分配1~32 } zskiplistNode; typedef struct zskiplist { struct zskiplistNode *header, *tail; unsigned long length; // 除了头节点以外节点总数 int level; // 跳表的高度 } zskiplist; 用的图片是网图 链接,其中obj一般为sds,在Redis6中已经改为sds ele。\n可以从图和代码很清晰地看出跳跃表的结构。\n跳跃表是Redis有序集合的底层实现方式之一,所以每个节点的ele存储有序集合的成员member值,score存储成员score值。所有节点的分值是按从小到大的方式排序的,当有序集合的成员分值相同时,节点会按member的字典序进行排序。\n通过跳跃表结构体的属性我们可以看到,程序可以在O(1)的时间复杂度下,快速获取到跳跃表的头节点、尾节点、长度和高度。\n创建 Redis通过zslRandomLevel函数随机生成一个1~32的值,作为新建节点的高度,值越大出现的概率越低。节点层高确定之后便不会再修改。生成随机层高的代码如下。\n// ZSKIPLIST 为 0.25 int zslRandomLevel(void) { static const int threshold = ZSKIPLIST_P*RAND_MAX; int level = 1; while (random() \u0026lt; threshold) level += 1; // 这里 ZSKIPLIST_MAXLEVEL 为32 return (level\u0026lt;ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL; } 当p=0.25时,跳跃表节点的期望层高为1/(1-0.25)≈1.33。\n下面是创建函数\n/* Create a skiplist node with the specified number of levels. * The SDS string \u0026#39;ele\u0026#39; is referenced by the node after the call. */ zskiplistNode *zslCreateNode(int level, double score, sds ele) { zskiplistNode *zn = // level 柔性数组加上头大小 来申请内存 zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel)); zn-\u0026gt;score = score; zn-\u0026gt;ele = ele; return zn; } /* Create a new skiplist. */ zskiplist *zslCreate(void) { int j; zskiplist *zsl; zsl = zmalloc(sizeof(*zsl)); zsl-\u0026gt;level = 1; zsl-\u0026gt;length = 0; // 头节点的level是最大层数 zsl-\u0026gt;header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL); for (j = 0; j \u0026lt; ZSKIPLIST_MAXLEVEL; j++) { zsl-\u0026gt;header-\u0026gt;level[j].forward = NULL; zsl-\u0026gt;header-\u0026gt;level[j].span = 0; } zsl-\u0026gt;header-\u0026gt;backward = NULL; zsl-\u0026gt;tail = NULL; return zsl; } 头节点是一个特殊的节点,不存储有序集合的member信息。头节点是跳跃表中第一个插入的节点,其level数组的每项forward都为NULL, span值都为0\n插入 /* Insert a new node in the skiplist. Assumes the element does not already * exist (up to the caller to enforce that). The skiplist takes ownership * of the passed SDS string \u0026#39;ele\u0026#39;. */ zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) { // 记录每层所能到达的最右边节点 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; // 记录每层从header到update[i}所需的步长 unsigned long rank[ZSKIPLIST_MAXLEVEL]; int i, level; // 判断score 是不是NAN serverAssert(!isnan(score)); x = zsl-\u0026gt;header; // 从最高层索引开始遍历 for (i = zsl-\u0026gt;level-1; i \u0026gt;= 0; i--) { /* store rank that is crossed to reach the insert position */ // 当在最高层时,先将rank赋值为0,先假设 rank[i] = i == (zsl-\u0026gt;level-1) ? 0 : rank[i+1]; // 在第i层一直向前移动比较,因为是按照score 从小到大排列的 // 找到这层大于插入score 的位置然后下移 while (x-\u0026gt;level[i].forward \u0026amp;\u0026amp; (x-\u0026gt;level[i].forward-\u0026gt;score \u0026lt; score || (x-\u0026gt;level[i].forward-\u0026gt;score == score \u0026amp;\u0026amp; sdscmp(x-\u0026gt;level[i].forward-\u0026gt;ele,ele) \u0026lt; 0))) { // 更新总的span rank[i] += x-\u0026gt;level[i].span; x = x-\u0026gt;level[i].forward; } // 记录这层的终点 update[i] = x; } /* we assume the element is not already inside, since we allow duplicated * scores, reinserting the same element should never happen since the * caller of zslInsert() should test in the hash table if the element is * already inside or not. */ // zslInsert不能应用在插入节点已经存在的情况下。 // 所以不用检查存在 //为插入节点计算随机层数 level = zslRandomLevel(); //大于原来层高的部分,只需要调整header就行。 if (level \u0026gt; zsl-\u0026gt;level) { for (i = zsl-\u0026gt;level; i \u0026lt; level; i++) { rank[i] = 0; update[i] = zsl-\u0026gt;header; // 为啥是这个?可能是用来占位 update[i]-\u0026gt;level[i].span = zsl-\u0026gt;length; } zsl-\u0026gt;level = level; } x = zslCreateNode(level,score,ele); for (i = 0; i \u0026lt; level; i++) { // 插入到每层最右侧能到达的节点之后 x-\u0026gt;level[i].forward = update[i]-\u0026gt;level[i].forward; update[i]-\u0026gt;level[i].forward = x; /* update span covered by update[i] as x is inserted here */ // 插入节点每层的span更新,这个看下图 x-\u0026gt;level[i].span = update[i]-\u0026gt;level[i].span - (rank[0] - rank[i]); update[i]-\u0026gt;level[i].span = (rank[0] - rank[i]) + 1; } /* increment span for untouched levels */ for (i = level; i \u0026lt; zsl-\u0026gt;level; i++) { update[i]-\u0026gt;level[i].span++; } x-\u0026gt;backward = (update[0] == zsl-\u0026gt;header) ? NULL : update[0]; if (x-\u0026gt;level[0].forward) x-\u0026gt;level[0].forward-\u0026gt;backward = x; else zsl-\u0026gt;tail = x; zsl-\u0026gt;length++; return x; } 下图来源于 链接\n以节点19插入为例,其中 黑色箭头的表示的跨度为update[i]-\u0026gt;level[i].span 蓝色箭头表示的跨度为rank[0] - rank[i]即节点19在level_0的update[0]为11, 在level_1的update[1]为7,rank[0] - rank[i]为节点7与节点11之间的跨度 绿色箭头表示的跨度为节点19到节点37的span\n删除 首先查找到对应的节点,将每层最右边到达的节点记录下来,对应的update。 辅助函数:\nvoid zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) { // 调整对应的span和forward int i; for (i = 0; i \u0026lt; zsl-\u0026gt;level; i++) { if (update[i]-\u0026gt;level[i].forward == x) { update[i]-\u0026gt;level[i].span += x-\u0026gt;level[i].span - 1; update[i]-\u0026gt;level[i].forward = x-\u0026gt;level[i].forward; } else { update[i]-\u0026gt;level[i].span -= 1; } } if (x-\u0026gt;level[0].forward) { x-\u0026gt;level[0].forward-\u0026gt;backward = x-\u0026gt;backward; } else { zsl-\u0026gt;tail = x-\u0026gt;backward; } // 调整level while(zsl-\u0026gt;level \u0026gt; 1 \u0026amp;\u0026amp; zsl-\u0026gt;header-\u0026gt;level[zsl-\u0026gt;level-1].forward == NULL) zsl-\u0026gt;level--; zsl-\u0026gt;length--; } 删除函数:\nint zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; int i; //查找位置 x = zsl-\u0026gt;header; for (i = zsl-\u0026gt;level-1; i \u0026gt;= 0; i--) { while (x-\u0026gt;level[i].forward \u0026amp;\u0026amp; (x-\u0026gt;level[i].forward-\u0026gt;score \u0026lt; score || (x-\u0026gt;level[i].forward-\u0026gt;score == score \u0026amp;\u0026amp; sdscmp(x-\u0026gt;level[i].forward-\u0026gt;ele,ele) \u0026lt; 0))) { x = x-\u0026gt;level[i].forward; } // 保存每层最右边的节点 update[i] = x; } /* We may have multiple elements with the same score, what we need * is to find the element with both the right score and object. */ // 可能同一个score有多个ele x = x-\u0026gt;level[0].forward; if (x \u0026amp;\u0026amp; score == x-\u0026gt;score \u0026amp;\u0026amp; sdscmp(x-\u0026gt;ele,ele) == 0) { zslDeleteNode(zsl, x, update); if (!node) zslFreeNode(x); else *node = x; return 1; } return 0; /* not found */ } 压缩列表 具体的实现在ziplist.h和ziplist.c\n压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是O(1)。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。\n// ziplist 结构 \u0026lt;zlbytes\u0026gt; \u0026lt;zltail\u0026gt; \u0026lt;zllen\u0026gt; \u0026lt;entry\u0026gt; \u0026lt;entry\u0026gt; ... \u0026lt;entry\u0026gt; \u0026lt;zlend\u0026gt; 这里的所有结构都是按照小端存储。\nzlbytes: 压缩列表的字节长度,占4个字节,因此压缩列表最多有$2^{32}-1$个字节。这个设计是为了resize时不必遍历整个列表 zltail: 压缩列表尾元素相对于压缩列表起始地址的偏移量,占4个字节,这个设计可以使pop操作不必要遍历全部。 zllen: 压缩列表的元素个数,占2个字节。zllen无法存储元素个数超过65535($2^{16}-1$)的压缩列表,必须遍历整个压缩列表才能获取到元素个数。 zlend: 压缩列表的结尾,占1个字节,恒为0xFF。 这里可以清楚地感受到C语言对内存的掌控,通过指针位移来获取结构信息。这里使用宏又是C语言的一个特色,比起inline只是建议编译器内联,宏真正是内联,对于一些细小而频繁的操作提高了性能。\n/* Return total bytes a ziplist is composed of. */ #define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl))) /* Return the offset of the last item inside the ziplist. */ #define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t)))) /* Return the length of a ziplist, or UINT16_MAX if the length cannot be * determined without scanning the whole ziplist. */ #define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2))) /* The size of a ziplist header: two 32 bit integers for the total * bytes count and last item offset. One 16 bit integer for the number * of items field. */ #define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t)) /* Size of the \u0026#34;end of ziplist\u0026#34; entry. Just one byte. */ #define ZIPLIST_END_SIZE (sizeof(uint8_t)) /* Return the pointer to the first entry of a ziplist. */ #define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE) /* Return the pointer to the last entry of a ziplist, using the * last entry offset inside the ziplist header. */ #define ZIPLIST_ENTRY_TAIL(zl) ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) /* Return the pointer to the last byte of a ziplist, which is, the * end of ziplist FF entry. */ #define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-ZIPLIST_END_SIZE) 对于 结构如下:\n\u0026lt;prevlen\u0026gt; \u0026lt;encoding\u0026gt; \u0026lt;entry-data\u0026gt; previous_entry_length字段表示前一个元素的字节长度,占1个或者5个字节,当前一个元素的长度小于254字节时,用1个字节表示;当前一个元素的长度大于或等于254字节时,用5个字节来表示。而此时previous_entry_length字段的第1个字节是固定的0xFE,后面4个字节才真正表示前一个元素的长度。假设已知当前元素的首地址为p,那么p-previous_entry_length就是前一个元素的首地址,从而实现压缩列表从尾到头的遍历。\nencoding字段表示当前元素的编码,即content字段存储的数据类型(整数或者字节数组),数据内容存储在content字段。为了节约内存,encoding字段同样长度可变。\nRedis使用宏来表示\n#define ZIP_STR_MASK 0xc0 #define ZIP_INT_MASK 0x30 #define ZIP_STR_06B (0 \u0026lt;\u0026lt; 6) #define ZIP_STR_14B (1 \u0026lt;\u0026lt; 6) #define ZIP_STR_32B (2 \u0026lt;\u0026lt; 6) #define ZIP_INT_16B (0xc0 | 0\u0026lt;\u0026lt;4) #define ZIP_INT_32B (0xc0 | 1\u0026lt;\u0026lt;4) #define ZIP_INT_64B (0xc0 | 2\u0026lt;\u0026lt;4) #define ZIP_INT_24B (0xc0 | 3\u0026lt;\u0026lt;4) #define ZIP_INT_8B 0xfe 这里使用位运算来代表类型,既节省了内存又提高了性能。\n结构 typedef struct zlentry { unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/ unsigned int prevrawlen; /* Previous entry len. */ unsigned int lensize; /* Bytes used to encode this entry type/len. For example strings have a 1, 2 or 5 bytes header. Integers always use a single byte.*/ unsigned int len; /* Bytes used to represent the actual entry. For strings this is just the string length while for integers it is 1, 2, 3, 4, 8 or 0 (for 4 bit immediate) depending on the number range. */ unsigned int headersize; /* prevrawlensize + lensize. */ unsigned char encoding; /* Set to ZIP_STR_* or ZIP_INT_* depending on the entry encoding. However for 4 bits immediate integers this can assume a range of values and must be range-checked. */ unsigned char *p; /* Pointer to the very start of the entry, that is, this points to prev-entry-len field. */ } zlentry; 对于压缩列表的任意元素,获取前一个元素的长度、判断存储的数据类型、获取数据内容都需要经过复杂的解码运算。解码后的结果应该被缓存起来,为此定义了结构体zlentry,用于表示解码后的压缩列表元素。\nstatic inline void zipEntry(unsigned char *p, zlentry *e) { ZIP_DECODE_PREVLEN(p, e-\u0026gt;prevrawlensize, e-\u0026gt;prevrawlen); ZIP_ENTRY_ENCODING(p + e-\u0026gt;prevrawlensize, e-\u0026gt;encoding); ZIP_DECODE_LENGTH(p + e-\u0026gt;prevrawlensize, e-\u0026gt;encoding, e-\u0026gt;lensize, e-\u0026gt;len); assert(e-\u0026gt;lensize != 0); /* check that encoding was valid. */ e-\u0026gt;headersize = e-\u0026gt;prevrawlensize + e-\u0026gt;lensize; e-\u0026gt;p = p; } 字典 结构 节点:\ntypedef struct dictEntry { void *key; // 节省内存 不同场景下使用不同字段 union { void *val; // db.dict 储存值 uint64_t u64; int64_t s64; // db.expires 储存过期时间 double d; } v; // 单链表法 解决哈希冲突。 struct dictEntry *next; /* Next entry in the same hash bucket. */ void *metadata[]; /* An arbitrary number of bytes (starting at a * pointer-aligned address) of size as returned * by dictType\u0026#39;s dictEntryMetadataBytes(). */ } dictEntry; 可以看出是使用链表法来解决hash冲突的。\nstruct dict { dictType *type; // 对应特定类型操作函数 dictEntry **ht_table[2]; unsigned long ht_used[2]; long rehashidx; /* rehashing not in progress if rehashidx == -1 */ /* Keep small vars at end for optimal (minimal) struct padding */ int16_t pauserehash; /* If \u0026gt;0 rehashing is paused (\u0026lt;0 indicates coding error) */ signed char ht_size_exp[2]; /* exponent of size. (size = 1\u0026lt;\u0026lt;exp) */ }; ","date":"2022-05-22T00:00:00Z","permalink":"https://chi-kai.github.io/post/redis%E6%BA%90%E7%A0%81%E5%89%96%E6%9E%90-%E4%B8%80/","section":"post","tags":["源码剖析","Redis"],"title":"Redis源码剖析(一)"}] \ No newline at end of file +[{"categories":null,"contents":"比特币白皮书 以太坊白皮书 Tangle 白皮书 详细的内容见 Tangle白皮书中文版\ntangle 是 IOTA 所用的技术,为物联网和小额支付提供支持。不同于常见的区块链,它使用一个DAG(有向无环图)作为结构,这里称为Tangle。\n传统区块链系统的单链结构在交易认证,吞吐量,资源消耗等方面存在缺陷,DAG结构的区块链是一个有效的解决方案。\nFabric 白皮书 Hyperledger Fabric 是 Linux 基金会 的 一个项目,是Hyperledger下面的一个子项目。作为一个开源联盟链,被很多项目应用。\n它的主要特点是模块化的共识机制,相对高性能,和可以使用常规语言编写智能合约(golang)。\n概念 联盟链 文中划分联盟链和公链的标准是: 是否发币和节点身份是否可知\n状态机复制(SMR)是建设弹性应用众所周知的方式,但是如果我们把运行在区块链上的智能合约看作一种分布式应用,与传统的SMR区别在于:\n许多应用并发运行 这些应用可以被任何人动态地部署 这些应用的代码是不被信任的,可能有恶意 order-execute 现有的大部分可以运行智能合约的区块链遵循SMR实现一种order-execute的架构: 节点先将交易排序再将它们广播给其他节点,然后每个节点顺序执行。\n所有节点对所有交易的顺序执行限制了性能,并且需要采取复杂的措施来防止源自不受信任的合约(例如在以太坊中使用“gas”计算运行时)的针对平台的拒绝服务攻击;智能合约很难做到并发。\n最大的限制是交易必须是确定的,这就使得不能使用常规编程语言来实现,必须使用特定的语言。\nexecute-order-validate ","date":"2022-11-25T00:00:00Z","permalink":"https://chi-kai.github.io/post/%E5%8C%BA%E5%9D%97%E9%93%BE%E7%99%BD%E7%9A%AE%E4%B9%A6%E8%A7%A3%E8%AF%BB/","section":"post","tags":["区块链","论文阅读"],"title":"区块链白皮书解读"},{"categories":null,"contents":"《Direct Acyclic Graph-Based Ledger for Internet of Things: Performance and Security Analysis》 问题背景 由于区块链的安全性,去中心化,可信性,在IoT系统上有可观的应用前景(如智能车,能源交易)。IoT系统具有规模大,资源受限的特性。所以其上的共识算法必须满足资源需求小,低消耗,和高的交易吞吐量。\n现在主要的两种共识算法:PoW需要高的资源消耗,PoS的币龄证明可能造成垄断和中心化。\n典型的区块链是一种单链结构,为了避免非法的fork,应用的共识算法必须降低新的block生成速率。这导致了吞吐量瓶颈和区块认证延迟的问题,在IoT系统上又有交易花费高和资源消耗大的问题。\nDAG共识算法可以允许任何节点可以立即向ledger插入一个新的block,前提是它能先处理更早的交易。这种方式会造成很多fork,DAG有很多算法来避免在传统区块链上面临的double-spending问题(Markov Chain Monte Carlo algorithm and virtual voting algorithm)。DAG共识算法的交易吞吐是不受限制的,而且资源消耗很低,这符合IoT的应用场景。\nDAG概述 名词定义 这里使用典型的Tangle算法来进行解释。\nBlock: 所有块是记录信息的存储单元(包括交易,数字签名,哈希值),在Tangle里一个块记录一个交易\nTip: 还没有被验证的块(交易)\nDirect approval:直接验证,两个块直接由一条边来链接,称为直接验证。\nindirect approval:间接验证,两个块有通过一个块和\nOwn weight: 与它的提出者的工作量有关\nCumulative weight: 代表一个交易的认证级别。是一个交易自身own weight以及它直接证明和间接证明的交易的交易own weight总和。\n共识过程 节点创造一个块来储存交易 节点通过MCMC tips 选择算法来选择两个没有冲突的tips,然后添加它们的hash到块中 节点解决一个低难度的pow问题,来避免垃圾信息 使用私钥给交易签名并广播,当其他节点收到时会检查是否合法 成功添加的交易成为tip,等待被验证。直到它的cumulative weight 达到定义的标准。 分叉问题 在分布式账本中,构建分叉以重做工作是篡改存储数据的唯一方法。基于此,double-spending的主要思想是将两笔相互冲突的交易并行放置在两条链上。在第一笔交易花费在服务上之后,攻击者扩展包含冲突交易的链并让它超过第一条链。当此操作成功时,第一笔交易将被孤立,攻击者可以多次使用token。\n单链模型: 以最长的一个链为标准,正常的矿工会在最长的链上工作 DAG模型: 以累计权重最大的子图为标准,正常的节点会通过MCMC tips 选择算法扩展权重最大的链。 《TIPS: Transaction Inclusion Protocol with Signaling in DAG-based Blockchain》 问题背景 由于DAG区块链的高并发场景和网络延迟,矿工通常不能及时获取整个网络的更新信息,导致重复在一个并行区块包括相同的交易,在区块链中生成冗余的记录。这个交易包含冲突会浪费区块容量和降低系统性能。尽管DAG区块链已经限制交易的高并发,但是交易冲突的风险实际还会诱发矿工收益和系统吞吐的困境。\n问题分析 三种交易包含策略:\n随机包含($P^{rand}$): $p_{1}=p_{2}=\\cdots=p_{m}=\\frac{n}{m}$ 有优先级的随机包含($P^{priority}$): $p_{1} \\geq p_{2} \\geq \\cdots \\geq p_{m} \\text { and } \\frac{p_{1}}{f_{1}}=\\frac{p_{2}}{f_{2}}=\\cdots=\\frac{p_{m}}{f_{m}}$ Top n ($P^{top}$): $p_{1}=p_{2}=\\cdots=p_{n}=1 \\text { and } p_{n+1}=p_{n+2}=\\cdots=p_{m}=0$ 这里仅考虑矿工收益中的交易费用奖励。\n收入困境 算法设计 《SilentDelivery: Practical Timed-delivery of Private Information using Smart Contracts》 ","date":"2022-11-10T00:00:00Z","permalink":"https://chi-kai.github.io/post/dag%E5%8C%BA%E5%9D%97%E9%93%BE%E8%AE%BA%E6%96%87%E9%98%85%E8%AF%BB/","section":"post","tags":["区块链","论文阅读"],"title":"DAG区块链论文阅读"},{"categories":null,"contents":"问题现状 区块链面临的问题 区块链作为现在一个热门技术,越来越多的人涌入其中。传统的区块链由于其单链结构和共识算法的限制,存在[!!!]等问题。 之前有研究工作提出,一个区块链中区块链的去中心化,安全,和规模三个特新不能共存。\n解决方案 分片技术: 将一个交易分片来并行处理,但是很难达成共识,跨链技术通过在不同分片之间建立通道来解决这个问题。 Layer2 Protocl:参与者能够通过私有通信而不是广播到整个网络来执行脱(主)链交易挑战是如何正确有效地保证链下和链上交易的有效性和一致性。 辅助链技术: 通过增加辅助链来让更多的交易参与。 混合结构 混合共识算法 修改硬解码参数 这些方案都受限于区块链的线性结构,因此结构上的改变成为一个新兴方案。\nDAG区块链的提出 单链结构使得同一时间多个节点竞争一个可用位置,这导致了认证缓慢,交易竞争和算力浪费。 为了能在同一时间提交更多交易,提出了基于DAG的区块链。\n概览 DAG 是指有向无环图,通常被当作一种基础数据结构应用于导航寻址,数据压缩等算法场景。 这个概念首次被Sompolinksky在GHOST中引入区块链,用来解决并发问题。改进版本被作为核心共识算法应用于以太坊中。之后,Lerner在DAGCoin中将粒度从块提升到交易,抛弃了打包和计算步骤大大提高了效率。IOTA和ByteBall 应用了无块的概念,发布了开源实现,至今引领市场。随后,一些工作又在DAG的基础上进行了改进。如Spectre,Hashgraph,Nano等。\n基于DAG的系统主要有利于需要高性能低消耗的分布式应用(DAPP)。直接应用底层的区块链可以享受到更好的特性,但是需要专业的开发技巧和昂贵的硬件设备。使用一些官方的组件是一种可替代的方案,如IOTA,MAM,Qubic。目前可以考虑应用的领域有: 物联网,数据管理,车载应用,智能家具等。\n建模 一个有环无向图由点集和边集组成。点集的每个元素可以是一个交易,一个块,或者协议中的一个事件。边集的元素是一个元组,代表两点之间的关系。\n关键参数 因为现在模型缺乏具体的实现,使用定性的参数来描述系统的基本技术。\n出度入度: 描述每个单元连接数目。 出度是指从节点指出的边,即节点的前任。入度是指指向节点的边,即节点的继承者。 交易模型:描述如何完成一笔交易。UXTO 代表一种无损耗的输出,交易时原子的,不可分割。每个操作必须通过这些交易完成。Account model 维持一种平衡状态。 可信度: 一个累加的数字展示一个单元被子块直接或者间接认证的程度。也反映下一轮被选择的概率。 认证: 一些独特的参数被用于在网络中认证单元。 分类 节点表现形式\n这个间接显示一个系统结构,是交易,事件或者区块。我们定义两种类型: $1^{od}$ 和 $2^{od}\t$。 前者表示请求到达时会被即刻处理,不需要等待来自节点的更多请求。这种形式包括区块和触发事件。 后者表示请求需要更多操作,多数情况下这个请求需要被预先计算或打包,然后被散播。这种形式包括区块和事件。\n节点形式表示系统结构,同时也决定账本模型,表示交易如何在DAG中生成。有两种交易模型: UXTO-based model 和 account-based model。第一个意味着所有操作都必须通过原子事务来实现。用户可以通过跟踪以前的交易历史来计算余额。对于第二个,每个用户都拥有一个帐户,并且交易被配置为其结构中的字段之一。用户直接在他们的账户中计算余额。\n网络技术\n分为三种 发散,并行,收敛。发散表示单元在不确定的方向稀疏的传播。并行表示在多个链的单元被一组节点维护。收敛表示单元按照一个确定的趋势收敛到一个确定的序列。\n按照上述标准分类如下: 共识算法 这里讨论共识算法的几个方面。\n开放程度: 表明一个任意节点是否无限制运行共识算法。\n成员选择: 选择节点成为出块节点的规则。\n单元分配: 共识算法的准备。\n单元定位: 确定一个单元在网络中的位置。\n扩展规则: 如何扩展图或者链和解除联系。\n冲突解决: 表示一系列可以确定冲突单元优先级的参数。\n特别技术: 与其他系统不同的技术。\n第一类 blockless ane natural expanding graph\nIOTA 无限制网络,使用UTXO数据模型,通过交易建立系统。IOTA把节点的事务称为tangle。一个待确认的tip需要先确认前面的两个tip,参与者也共同维护系统安全。但是如果恶意tips被持续生成,可能造成整个图向多个方向发散。所以tip选择算法是必不可少的,有三种机制提供: 一致随机,未加权随机和加权随机移动。最先进的是加权随机移动算法,是马尔可夫链蒙特卡罗 (MCMC) 算法的应用。有一些修改tip选择算法的变体,如GIOTA,EIOTA。\nGraphchain: 无限制网络。\nIOAT去除了激励机制,而Graphchain又重新引入。每个交易必须认证足够多的节点来获得激励。\nAvalancheq 一个无限制,新共识机制的网络。\n共识机制不同于拜占庭和中本聪共识。是一种叫做Slush的协议,从gossip算法和流行病网络中获得灵感的CFT容忍协议。\n第二类 based on blocks,natural expanding graph\nSpectre 一种无限制网络。关键技术是基于块的优先级的递归加权投票算法,\nPhantom 总结 从上面提到的区块链系统提取出常用的技术:\nCross-referencing(交叉引用)。一个块可以引用多个区块,也可以被多个区块引用。交叉引用可以提高吞吐量,扩大规模,降低认证时间。 Trusted authority。一个权威中心来做最后的决定,可以减少确认时间,但是会减弱去中心化特性。 Pairwise vote(成对投票)。2 对 1 的投票选择,而不是正常投票算法中的 n 对 1。 Transaction sharding(事务分片)。将交易分配到不同的链来阻止排序过程中可能的复制和冲突。 PoW 机制。用作一个反恶意节点的工具,对于子序列的PoW是预先计算的,可以保证交易瞬间完成。 对常用的共识机制分析:\nTip selection algorithm。一个新的交易如何选择之前的交易进行确认。增加了吞吐量和规模,一定程度上降低了安全性和一致性。 Recursive algorithm。递归调用一个函数直到得到一个稳定值。这个算法被共识机制采用来使得无序的块聚合成一个有序的链,使系统可以在一个确定的方向扩展。 BFT-style consensus。分为三种 传统BFT。需要根据资源确定(PoW,PoS)一个commitee,commitee成员来执行共识操作。 async-BA/leaderless BFT: 每个链都可以广播块和投票,当一个块得到足够的票数就可以提交,但是由于这个提交和确认是异步的,所以一个全局的线性排序很难达到。 前面两种的整合。经典的拜占庭容错协议首先应用于各个独立的区域,上层协议采用async-BA实现跨区域的最终共识。 Nakamoto consensus and its variants (中本聪共识和它的变体)。现存最流行的方法。传统的NC选择最长的链,变体NC选择权重最大的链,多用于形成主链。 Sorting algorithm。按照总体的线性顺序来排序,对于确保全局一致性是必不可少的。根据一些参数来确定优先级。 特性分析 BA和NC特性 拜占庭共识和中本聪共识是最流行的两种共识协议。\n拜占庭共识 在存在恶意节点的情况下可以达成的共识。有三个特性:\nargeement: validity: termination: 中本聪共识 允许所有节点可以参与共识通过PoW,PoS等方式。有两个关键特性:\npersistence: liveness: 安全分析 Parasite Chain Attack 尝试用预先准备好的子链来替换正确的子链。\n首先攻击者参照主链来构造一个有很高可信度的子链。然后分别发送一对冲突的交易到主链和构造的私链,接下来要确保子链得到有竞争力的可信度。这时,这个冲突交易可能已经在主链上确认,攻击者再发布他的子链,这样很可能使正确的主链无效然后一个coin可能被使用两次。\n这种攻击需要攻击者有充足的算力来生成区块,以没有强力领导者的协议为目标。\nBalance attack / liveness attack 保持多个子图平衡增长来获取收益,攻击者在一个子图发布交易后,又在另一个子图发布交易,动态维持几个子图的平衡来获取收益。\n需要一个很大算力,主要攻击基于POW的协议。\nSplitting attack 类似平衡攻击,攻击者找到两个相近的分支或者子图来发送冲突交易获取利润。\n攻击者需要有强大的算力,主要攻击没有强力中心的系统。\nLarge Weight Attack Censorship Attack Replay Attack Sybi Attack ","date":"2022-11-07T00:00:00Z","permalink":"https://chi-kai.github.io/post/dag%E5%8C%BA%E5%9D%97%E9%93%BE%E7%BB%BC%E8%BF%B0/","section":"post","tags":["区块链","论文阅读"],"title":"DAG区块链综述"},{"categories":null,"contents":"《Towards On-Device Federated Learning: A Direct Acyclic Graph-based Blockchain Approach》 目标 为了解决联邦学习中的设备异步和异常检测问题,同时避免由区块链导致的资源浪费\n设备异步。传统的中心化和同步FL(Google FL),一个节点必须等待其他节点完成任务才能一起进入下一轮训练,一个崩溃的节点可能阻塞整个系统。\n异常检测。一个节点的数据集和操作对其他节点是不可见的。一些恶意节点可能会破坏整个系统的准确度和降低效率。\n主要贡献 提出了第一个基于DAG的FL异步框架来解决设备异步和异常节点检测问题。\nDAG-FL 模型 “This feature promises that a node in DAG-FL can immediately participate in an iteration of FL whenever it is in idle state. When the node completes an iteration of FL and gets a new trained local model, the new local model can be published on its local DAG as a transaction immediately, and latter the new published transaction would be seen by all other nodes.” (Cao 等。, p. 4) (pdf) ”\n还是用本地的数据来训练模型,并没有和其他节点做聚合?\n“initial” (Cao 等。, p. 5) (pdf)\n异步构架 FL Layer:\n全局模型由存储在DAG中的本地模型使用FedAvg算法聚合产生,节点使用本地数据集进行训练。得到的新的模型被当作一个交易发布到DAG中。(每笔交易就是一个模型)\nDAG Layer:\n每个节点维护一个本地DAG,其中每个交易包含下相应的认证信息,本地模型参数,和许可链接。本地DAG通过广播和无线网络更新,最终一个新的交易可以得到传播。\nApplication Layer:\n一个外部接口,通过智能合约发布任务。这个客户端可以观察整个FL过程的进展,从而控制整个FL的进行和停止。\n异步性分析:\n在整个DAG-FL中没有central server,每个节点是通过本地已有的模型来构建新的全局模型。节点可以在合适的时间来进行FL迭代,获取的新的模型发布在本地DAG,随后可以被其他节点所见。节点之间的行为互不影响可以异步进行。\n共识算法 当一个节点完成一轮迭代,生成一个新的模型。他会从本地DAG选取几个tips(加入DAG但是没有被验证的block)来进行验证:\n验证使用RSA等算法加密的身份证书。可以避免恶意节点的女巫攻击。\n使用本地的测试集来计算模型的精确度。\n然后选取模型精确度最高的几个模型来构成新的全局模型。\n节点使用这个模型和本地数据集训练,得到的新模型通过一个新的交易发布到DAG中。\n随着DAG的持续扩展,一个交易的每次认证都意味着这个交易代表的模型被选择去构建一个全局模型,进而影响最终模型的生成。得到的认证越多,它在FL中的影响越大。反之,节点就会被孤立,在FL中影响越小。\n所以,最终DAG-FL训练的模型会向大多数节点期望的方向发展,少部分恶意节点会逐渐被孤立,影响降到最小化。\nFL算法 符号定义:\nD = {1,2,3,…,N $_D$ },代表整个设备集群。D $_i$是第i个节点。\nS $_i$是D $_i$的训练集,|S $_i$| = N $_i$,这里N $_i$是S $_i$的samples数量。\nD $_i$在本地创建一个仅自己可见的DAG为g $_i$,存储在其中的交易为w\n时间t,在D $_i$训练得到的本地模型为w $_i$ $^t$\n算法流程:\nD $_i$在t $_0$开始FL算法迭代,首先验证本地DAG的一些tips(验证他们的身份和用测试集验证准确度) 将准确度较高的的k个tips使用FedAvg算法聚合成一个全局模型: $$ \\omega^{t_{0}}=\\sum_{i=1}^{k} n_{i} \\omega_{d_{i}}^{t_{i}} $$ 这里 n $_i$是表示模型重要性的权重因子,为了简化这里设置为1/k,表示同等重要。 节点从数据集S $_i$中提取m个samples作为一个最小batch z$_i$ 来对得到的全局模型训练 $\\beta$个epochs. 得到一个新模型 $\\omega {i}^{ t{0}}$,将它发布到$g_{i}$上。 DAG-FL 操作 这里介绍框架中的两个重要算法。\nDAG-FL Controlling 在应用层的外接客户端可以认为是一个权威组织,负责任务发布任务。通过智能合约执行DAG-FL 控制算法。过程如上图。\nDAG-FL Updating 节点在空闲时执行DAG-FL Updating 算法。\n节点随机从本地DAG选取staleness范围在可以接收的tip。 节点先认证所选tips的身份,然后用自己的test数据集来计算所选tips的模型精确度。 选择精确度高的k个tips,使用FedAvging算法得到一个全局模型。节点用本地数据来训练这个全局模型。 一个新的交易被生成。包括身份信息,上一阶段训练得到的模型,和最k个tips的验证信息。 未来工作 在模型可用性上,使用一个小的test set可能对于一些特定场景不适合,考虑其他的异常检测方法。 信用评估 权重聚合。本文的模型使用的方法是同等权重系数的FedAvg,可以使用更好的方法来给高质量的模型更高的权重提高模型精度。 《Implicit Model Specialization through DAG-based Decentralized Federated Learning》 背景 由于联邦学习数据的非独立同分布特性,所有节点训练一个模型太过宽泛,提出一种基于DAG区块链的联邦学习框架,所有节点利用本地数据和其他节点相似的数据训练自己的特例化模型,和全局的泛化模型结合来提高系统的性能。\n这篇论文是在联邦学习之前已经有的对本地数据特例化的研究基础上,引入DAG区块链。\n模型 每个节点执行四个步骤,通过基础的随机移动算法选择DAG上的两个tips,将这个两个选择的模型参数平均,得到的模型在本地数据集上训练,如果最终得到的模型有提高就发布。\ntips选择算法 从区块链接的相反方向随机遍历。每个交易(区块)根据它的子图大小分配一个权重,通常会选择最高权重的区块,使得这个遍历方向收敛。 同时,对于每个节点有特定化的偏向处理,遍历的每一步,对于下一跳可以到达的潜在模型在本地数据集上进行评估。\nWEIGHTEDCHOICE 函数从这些模型中随机选择,并根据子节点对本地数据的精确度进行加权。(到底是按照权重还是随机选择?)\n这里的精确度计和权重计算:\n遍历的随机性可以由 α 参数确定,其中值越大,权重之间的差异越大,因此随机性越小,确定性越高。另一方面,较小的 α 值会导致权重收敛,从而导致更多随机性。模型之间的预期精度差异取决于我们的方法所应用的机器学习问题,以及学习率、批量大小和局部时期等超参数。为了在模型之间的精度变化很小的情况下也能实现良好的特性化,即使在差异很大的情况下也能实现良好的泛化,将每个步骤中的精度分布 max(accuracies) - min(accuracies) 精度归一化 * 的一部分:\n《SAFA: A Semi-Asynchronous Protocol for Fast Federated Learning With Low Overhead》 背景 原有的FedAvg算法存在一些问题:\n同步开销大: 每轮迭代中央服务器需要传输全局模型给所有客户端,带宽达到一个峰值。 客户端利用不足:随机选择的客户端使得许多可以参加训练的客户端闲置。 这个论点不充分。FedAvg是在所有可用的客户端中随机选择一定比例,并不是完全随机的。\n进度浪费: 被选中的设备如果在完成本地训练之前失败,则之前的工作都会作废。 每回合效率低: 在每轮结束进行聚合,FedAvg必须等待所有客户端完成。如果一些客户端故障,整个过程会等到超时结束。 模型 SAFA包括三部分: Lag-tolerant Model Distribution (滞后容忍模型分布),Compensatory First-Come-First-Merge (CFCFM) client selection [补偿性先到先合并 (CFCFM) 客户端选择], discriminative aggregation (判别聚合)\n下面SAFA的一个系统图:\n文中所用的参数表\nLag-tolerant Model Distribution 整个模块关键的有两点:\n有选择的同步全局模型。\n不像FedAvg算法一样每轮每个参与节点都要同步模型,这是一个交流密集的过程,开销很大。SAFA模型只要求两种类型必须同步。上一轮成功完成的节点(FedAvg中的正常节点)和被标记为过时的节点,这个过时节点是由于网络或者其他原因本地模型落后全局太多的客户端。这里有个滞后容忍参数 $\\tau$,来调节这个滞后范围。\n这里t-1表示上一轮的最新模型, $\\omega _{k}$代表第k个节点的本地模型,$\\omega$ 代表全局模型。可以看到,在 $t- \\tau$ 范围内本地节点的滞后是可以容忍的。\n没有被选中的节点也可以参与迭代。\n没有被选中的节点也可以上传更新。这部分不会被直接合并,而是会通过一个bypass结构影响下一轮。 一个cache用来保存那些选中的节点上传的更新,没被选中的节点的更新保存在bypass中。bypass会在汇聚步骤完成后和cache合并,使得实际的有影响节点比率比参数C决定的更高。\nCompensatory First-Come-First-Merge (CFCFM) client selection 代替必须等待所有选中节点上传更新,SAFA采用先来先合并方法,只要上传的更新达到所需要的比率(是不是意味着可以上传的节点大于实际所需要的节点)就可以执行合并操作。\n给予那些参与较少的客户更高的优先级。在每一轮中,服务器都会维护一个错过上一轮训练的客户端的 id 列表,它们上传的更新会优先选择。\nDiscriminative Aggregation 聚合算法如下:\n对于选择的客户端,它们的更新将在合并到全局模型后保留在缓存中。对于未选择的客户端,更新不会在本轮生效,但会被缓存带入下一轮。对于崩溃的客户端,只有在它们没有被弃用的情况下,它们的模型才会保持不变。\n算法流程 每轮开始时,服务器先检查客户端的模型,根据给定的超参数$\\tau$和滞后容忍算法来分配模型。服务器收集上传的更新,错过上次更新的节点会被优先采集。等采集到的更新满足预先设置的标准后,执行三步合并,然后更新缓存状态。\nIOT ‘22《A Blockchain-based Model Migration Approach for Secure and Sustainable Federated Learning in IoT Systems》 背景 ","date":"2022-11-06T00:00:00Z","permalink":"https://chi-kai.github.io/post/%E8%81%94%E9%82%A6%E5%AD%A6%E4%B9%A0%E7%9B%B8%E5%85%B3%E8%AE%BA%E6%96%87%E9%98%85%E8%AF%BB/","section":"post","tags":["联邦学习","论文阅读"],"title":"联邦学习论文阅读"},{"categories":null,"contents":"领导人选举 首先按照论文中最关键的figure 2补全节点和RPC结构。\n节点有三个状态: Leader,Candidate,Follower 和 两个计时器: 选举计时器,心跳计时器。\nFollower: 有一个选举计时器,随机选举超时时间,每当选举超时,就转变为Candidate.\nCandidate: 中间态,当Follower一端时间没有收到心跳,选举计时器到期,就会转变为Candidate,term加1,为自己投票并向其他节点发送投票请求,\nLeader: 当一个Candidate获得半数以上的投票就会转变为Leader,最重要的节点,负责和客户端交互。需要定时向每个Follower发送心跳来维持权威。\n节点一开始状态都是Follower,Term为0。 当一个选举计时器到期时,节点转变为Candidate,term加1,开始发送投票请求。这里有三种情况: 其他节点按照先来先到的原则投票,获得半数以上的投票可以胜出成为Leader. 在选举过程中得到更高term的RPC,Candidate会转变为Follower. 如果两个Candidate获得同样的票数,等选举计时器再次超时,会开始下一轮投票。 选出Leader后,Leader马上广播心跳来维持权威。 代码遇到的问题:\n选举超时要真的随机时间,有的随机函数返回的是固定值,这里用时间做种子。\nr := rand.New(rand.NewSource(time.Now().UnixNano())) t := time.Duration(ElectionTime+r.Intn(ElectionTime)) * time.Millisecond 由于网络原因,可能一些节点的回复不能及时收到。当收到一个超期的回复时,处理办法就是抛弃。\n// 如果回复晚了,不是同一个term或者leader则抛弃 // 处理心跳回复 if rf.currentTerm == args.Term \u0026amp;\u0026amp; rf.state == StateLeader // 在本Term内的投票且state仍为Candidate,超时过期的丢弃 // 处理投票回复 if rf.state == StateCandidate \u0026amp;\u0026amp; rf.currentTerm == args.Term 当收到一个更高term的回复RPC,Candidate转变为Follower,term变为更高的term,投票变为null。\n当收到一个更高的term的心跳时,状态转变为Follower, term变为更高的term,投票变为null。\n节点在两种情况拒绝投票,一是Candidate的term小于自己,二是自己在本term中已经投过票。\nif rf.currentTerm \u0026gt; args.Term || (rf.currentTerm == args.Term \u0026amp;\u0026amp; rf.votedFor != -1 \u0026amp;\u0026amp; rf.votedFor != args.CandidateId) 锁的使用: 当一个数据有写有读,写和读必须加锁。如果一个数据只读不写,不用加锁。\n日志复制 当选举结束后,leader开始为客户端提供服务。客户端发出的每一条请求会被交给leader处理。\nleader将每一条指令打包成一个entry \u0026lt;index,term,cmd\u0026gt;,将这个entry附加到日志中去,然后并行地发起 AppendEntries RPCs 给其他的服务器,让他们复制这条entry。\n当大部分服务器同意接收这个entry,leader将这个entry应用于状态机中,称为已提交,同时领导人的日志中之前的所有日志条目也都会被提交,包括由其他领导人创建的条目。然后将执行结果返回给客户端。\n为了维护日志的一致性,要保证日志匹配特性:\n如果在不同的日志中的两个条目拥有相同的索引和任期号,那么他们存储了相同的指令。 如果在不同的日志中的两个条目拥有相同的索引和任期号,那么他们之前的所有日志条目也全部相同。 第一个特性由 “只有一个leader可以创建entry” 来保证。\n第二个特性由附加日志 RPC 的一个简单的一致性检查所保证。它的步骤如下:\n**在发送附加日志 RPC 的时候,会带上上一条entry信息。**如果跟随者在它的日志中找不到包含相同index和term的条目,那么他就会拒绝接收新的日志条目。\n找到最大共识点。 在被跟随者拒绝之后,leader就会减小 nextIndex 值并进行重试(发送再上一个entry),直到找到最大共识点,被follower接收。之后,leader会强制覆盖follower最大共识点后面所有日志。这样就保证follower与leader日志始终一致。\nleader为所有follower节点维持一个nextIndex,记录每个follower下一个日志的index。当一个leader刚刚当选的时候,初始化所有nextIndex为自己最后一条日志的Index加1。(假设所有follower与leader日志保持一致)。\n日志复制可以做一些优化。比如在正常复制时可以批量复制日志以减少系统调用的开销;在寻找共识点时可以只携带一条日志以减少不必要的流量传输。\n安全性 选举限制 leader 只能发送日志给follower,而不能从follower接收日志,所以选出的leader必须包含集群中所有已经提交的日志。\n在选举投票时,携带最新的日志信息,和follower相比较,看谁的日志最新。如果候选人更新,则获得投票。\n这里更新的定义是: 通过比较两份日志中最后一条日志条目的索引值和任期号定义谁的日志比较新。如果两份日志最后的条目的任期号不同,那么任期号大的日志更加新。如果两份日志最后的条目任期号相同,那么日志比较长的那个就更加新。\n提交之前任期内的日志条目 在上图中,raft 为了避免出现一致性问题,要求 leader 绝不会提交过去的 term 的 entry (即使该 entry 已经被复制到了多数节点上)。leader 永远只提交当前 term 的 entry, 过去的 entry 只会随着当前的 entry 被一并提交。(上图中的 c,term2 只会跟随 term4 被提交。)\n如果一个 candidate 能取得多数同意,说明它的日志已经是多数节点中最完备的, 那么也就可以认为该 candidate 已经包含了整个集群的所有 committed entries。\n因此 leader 当选后,应当立刻发起 AppendEntriesRPC 提交一个 no-op entry。注意,这是一个 Must,不是一个 Should,否则会有许多 corner case 存在问题。如:\n读请求:leader 此时的状态机可能并不是最新的,若服务读请求可能会违反线性一致性,即出现 safety 的问题;若不服务读请求则可能会有 liveness 的问题。\n配置变更:可能会导致数据丢失\n实际上,leader 当选后提交一个 no-op entry 日志的做法就是Raft 算法解决 “幽灵复现” 问题的解法,相关博客\n","date":"2022-10-30T00:00:00Z","permalink":"https://chi-kai.github.io/post/raft%E8%AE%BA%E6%96%87%E9%98%85%E8%AF%BB/","section":"post","tags":["分布式","论文阅读","Raft"],"title":"Raft论文阅读"},{"categories":null,"contents":"什么是联邦学习 本质:联邦学习本质上是一种分布式机器学习技术,或机器学习框架。\n目标:联邦学习的目标是在保证数据隐私安全及合法合规的基础上,实现共同建模,提升AI模型的效果。\n前置知识: IID:独立同分布,表示一组随机变量的概率分布都相同,而且相互独立。例如掷色子。联邦学习背景下,数据集是非独立同分布的。\nSGD: 梯度下降算法\n绝大多数机器学习模型都有一个损失函数,来衡量预测值与实际值的差异。损失函数的值越小,模型的精确度就越高。通过使用梯度下降来调节参数,进而最小化损失函数。\n损失函数里一般有两种参数,一种是控制输入信号量的权重(Weight, 简称 w ),另一种是调整函数与真实值距离的偏差(Bias,简称 b )。我们所要做的工作,就是通过梯度下降方法,不断地调整权重 w 和偏差b,使得损失函数的值变得越来越小。\n通过计算梯度可以找到下降的方向,然后通过学习率a来控制下降的快慢。\ndef train(X, y, W, B, alpha, max_iters): \u0026#39;‘’ 选取所有的数据作为训练样本来执行梯度下降 X : 训练数据集 y : 训练数据集所对应的目标值 W : 权重向量 B : 偏差变量 alpha : 学习速率 max_iters : 梯度下降过程最大的迭代次数 \u0026#39;\u0026#39;\u0026#39; dW = 0 # 初始化权重向量的梯度累加器 dB = 0 # 初始化偏差向量的梯度累加器 m = X.shape[0] # 训练数据的数量 # 开始梯度下降的迭代 for i in range(max_iters): dW = 0 # 重新设置权重向量的梯度累加器 dB = 0 # 重新设置偏差向量的梯度累加器 # 对所有的训练数据进行遍历 for j in range(m): # 1. 遍历所有的训练数据 # 2. 计算每个训练数据的权重向量梯度w_grad和偏差向量梯度b_grad # 3. 把w_grad和b_grad的值分别累加到dW和dB两个累加器里 W = W - alpha * (dW / m) # 更新权重的值 B = B - alpha * (dB / m) # 更新偏差的值 return W, B # 返回更新后的权重和偏差。 优化过程 固定总数 K 个客户端,每个客户端都有本地数据集。\n每次选取分数 C (比例)个客户端\n服务器将当前的全局算法发送给每个客户端。\n每个被选定的客户端执行本地计算,并将服务器更新。\n通信成本占主导 一般数据中心中,通讯花费占少数,计算花费占大头。但是在联邦优化中,通讯占主导地位\n通常上传带宽被限制到1MB或者更低。\n客户端只有在充电,插入电源,和有不限量WIFI的情况下才会参与到优化过程中来。\n希望每个客户每天只参加少量的更新回合。\n因为单个客户端的训练数据很小,而且当前智能手机等客户端的计算能力是足够强的,所以通过使用额外的计算量来减少通信的次数\n增加并行量。在每次通信过程中,使用更多客户端来更新。\n增加每个客户端的计算量。\n联邦平均算法 联邦背景下,对梯度下降算法的扩展。\n选取K个Client,Server将当前的参数传递给Client,Client根据本地数据集和参数来进行梯度下降。\n最后将训练后的参数返回给Server,Server将获得的所有参数加权处理后得到最终的参数。然后再进行下一轮计算。\n如图所示,当B $\\rightarrow$ $\\infty$,E $\\rightarrow$ 1 表示本地数据全部参与训练,只训练一次,称为FedSGD。\n分类 我们把每个参与共同建模的企业称为参与方,根据多参与方之间数据分布的不同,把联邦学习分为三类:横向联邦学习、纵向联邦学习和联邦迁移学习。\n横向联邦学习 横向联邦学习的本质是 样本的联合,适用于参与者间业态相同但触达客户不同,即特征重叠多,用户重叠少时的场景,比如不同地区的银行间,他们的业务相似(特征相似),但用户不同(样本不同)\n学习过程: 参与方各自从服务器A下载最新模型 每个参与方利用本地数据训练模型,加密梯度上传给服务器A,服务器A聚合各用户的梯度更新模型参数; 服务器A返回更新后的模型给各参与方; 各参与方更新各自模型。 纵向联邦学习 纵向联邦学习的本质是 特征的联合,适用于用户重叠多,特征重叠少的场景,比如同一地区的商超和银行,他们触达的用户都为该地区的居民(样本相同),但业务不同(特征不同)。\n学习过程 纵向联邦学习的本质是交叉用户在不同业态下的特征联合,比如商超A和银行B,在传统的机器学习建模过程中,需要将两部分数据集中到一个数据中心,然后再将每个用户的特征join成一条数据用来训练模型,所以就需要双方有用户交集(基于join结果建模),并有一方存在label。其学习步骤如上图所示,分为两大步:\n第一步:加密样本对齐。是在系统级做这件事,因此在企业感知层面不会暴露非交叉用户。\n第二步:对齐样本进行模型加密训练:\n由第三方C向A和B发送公钥,用来加密需要传输的数据; A和B分别计算和自己相关的特征中间结果,并加密交互,用来求得各自梯度和损失; A和B分别计算各自加密后的梯度并添加掩码发送给C,同时B计算加密后的损失发送给C; C解密梯度和损失后回传给A和B,A、B去除掩码并更新模型。 联邦迁移学习 当参与者间特征和样本重叠都很少时可以考虑使用联邦迁移学习,如不同地区的银行和商超间的联合。主要适用于以深度神经网络为基模型的场景。\n迁移学习,是指利用数据、任务、或模型之间的相似性,将在源领域学习过的模型,应用于 目标领域的一种学习过程。\n","date":"2022-10-17T00:00:00Z","permalink":"https://chi-kai.github.io/post/%E8%81%94%E9%82%A6%E5%AD%A6%E4%B9%A0%E7%BB%BC%E8%BF%B0/","section":"post","tags":["联邦学习","论文阅读"],"title":"联邦学习综述"},{"categories":null,"contents":"基础数据结构部分 动态字符串 SDS 实现在 sds.h/sds.c。\n设计原则 为什么不使用c语言原生的字符串操作库? c字符串用'\\0'作为终止符,不能满足二进制安全,而且求字符串长度,拼接等操作都要遍历到'\\0'来实现,需要自己控制内存使用,操作复杂度高。\n前置知识 由于我对C语言没有深入了解,有很多知识点会在前面补充。\nattribute ((packed)): 对齐优化\nattribute((args)) 是GNU C的一个机制,可以通过编译器来修饰结构体,函数等。\n现代计算机中内存空间都是按照字节(byte)划分的,从理论上讲似乎对任何类型的变量的访问可以从任何地址开始,但实际情况是在访问特定变量的时候经常在特定的内存地址访问,这就需要各类型数据按照一定的规则在空间上排列,而不是顺序地一个接一个地排放,这就是对齐.\n为了提高效率,计算机从内存中取数据是按照一个固定长度的。以32位机为例,它每次取32个位,也就是4个字节(每字节8个位)。字节对齐有什么好处?以int型数据为例,如果它在内存中存放的位置按4字节对齐,也就是说1个int的数据全部落在计算机一次取数的区间内,那么只需要取一次就可以了\n使用__packed__参数是表示,使用原来的地址空间,编译时不要字节对齐,这样用时间换空间,使得结构体紧密。\n详细的用法见 机制详解。\nuint8_t uint16_t ... size_t 使用\n后面加_t表示是一个typedef 定义的类型,本质是原有类型。这样做是为了更好的跨平台移植,因为不同的平台中int,long 这些基础类型可能占用的字节不同,这对于一些对内存严格要求的库造成不便。使用uint8_t 等类型,在不同平台上都代表占一个字节8位,便于程序的实现。\n同理 size_t 也是用来保持跨平台移植性。可以是unsigned int unsigned char unsigned long等等,取决于实现,size_t = typeof(sizeof(X))。\nstatic inline\n头文件中很多函数使用了static inline 关键字,inline 建议编译器将函数作为一个宏内联,这样可以减少函数调用时的堆栈消耗,提高性能。但是编译器不一定会内联函数,这时候static可以保证这个函数是仅在本文件可见,避免重复包含冲突。\n数据结构 这里以sdshdr8为例\n// 一个字节 8位 // __attribute__ ((__packed__)) 用来告诉编译器取消结构在编译中的优化对齐,按照实际占用。 // 因为内存是按照2的倍数读取的,否则可能读两次,速度变慢。这里是用时间换空间 // 保证整个结构体的空间紧密 struct __attribute__ ((__packed__)) sdshdr8 { // buf 中已经使用的字节数 uint8_t len; /* used */ // 去掉头和null结束符,已经分配的字节数=有效长度+数据长度 uint8_t alloc; /* excluding the header and null terminator */ // 8位,只用前三位 unsigned char flags; /* 3 lsb of type, 5 unused bits */ // 柔性数组,没有分配之前不占内存 char buf[]; }; 记录了已经使用的空间和分配的空间,比C字符串操作效率更高。和 C 语言中的字符串操作相比,SDS 通过记录字符数组的使用长度和分配空间大小,避免了对字符串的遍历操作,降低了操作开销,进一步就可以帮助诸多字符串操作更加高效地完成,比如创建、追加、复制、比较等。\n二进制安全 什么是二进制安全?通俗地讲,C语言中,用“\\0”表示字符串的结束,如果字符串中本身就有“\\0”字符,字符串就会被截断,即非二进制安全;若通过某种机制,保证读写字符串时不损害其内容,则是二进制安全。在网络报文中常常需要二进制安全。 sds使用 len 来控制字符串长度,而不是使用\u0026quot;\\0\u0026quot;,保障了二进制安全。 极致的内存使用 对于不同的长度的字符串有不同的结构,上面的sdshdr8 表示长度为8位的字符串,还有sdshdr16/32/64。保证不会因为字符串过小而额外浪费字节,也不会因为字符串过长而频繁扩容。 - 结构体紧密,放弃对齐优化。在前置说明了结构体使用编译器参数packed来放弃优化,用时间换空间。 - flag。使用一个unsigned char,8位的小端三位来表示结构体的性质(5/8/16/32/64)。 - 变长数组(柔性数组)。柔性数组成员(flexible array member),也叫伸缩性数组成员,只能被放在结构体的末尾。包含柔性数组成员的结构体,通过malloc函数为柔性数组动态分配内存。之所以用柔性数组存放字符串,是因为柔性数组的地址和结构体是连续的,这样查找内存更快(因为不需要额外通过指针找到字符串的位置);可以很方便地通过柔性数组的首地址偏移得到结构体首地址,进而能很方便地获取其余变量。 与c字符串函数 始终将buf指针暴露给上层,可以和c字符串函数切合。同时可以很容易地通过减去一个sdshdr大小偏移到结构体首部来调用结构体属性。 如下面的宏定义: ```c #define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T))); #define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T)))) ``` 基本操作 创建 sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) { void *sh; sds s; char type = sdsReqType(initlen); // 因为通常总是有空字符串,而使用type5每增加一次就需要扩容,所以直接使用type 8 ---- 为什么不把type5直接删了? /* Empty strings are usually created in order to append. Use type 8 * since type 5 is not good at this. */ if (type == SDS_TYPE_5 \u0026amp;\u0026amp; initlen == 0) type = SDS_TYPE_8; int hdrlen = sdsHdrSize(type); unsigned char *fp; /* flags pointer. */ size_t usable; // initlen 是buf中实际装的大小,hdrlen是sds header大小,+1 是\\0终止符 assert(initlen + hdrlen + 1 \u0026gt; initlen); /* Catch size_t overflow */ //malloc_usable还是调用tyymalloc_usable sh = trymalloc? s_trymalloc_usable(hdrlen+initlen+1, \u0026amp;usable) : s_malloc_usable(hdrlen+initlen+1, \u0026amp;usable); if (sh == NULL) return NULL; if (init==SDS_NOINIT) init = NULL; else if (!init) memset(sh, 0, hdrlen+initlen+1); // 指向buf s = (char*)sh+hdrlen; fp = ((unsigned char*)s)-1; usable = usable-hdrlen-1; // 可能申请的超过类型MaxSize if (usable \u0026gt; sdsTypeMaxSize(type)) usable = sdsTypeMaxSize(type); switch(type) { case SDS_TYPE_5: { *fp = type | (initlen \u0026lt;\u0026lt; SDS_TYPE_BITS); break; } case SDS_TYPE_8: { SDS_HDR_VAR(8,s); sh-\u0026gt;len = initlen; sh-\u0026gt;alloc = usable; *fp = type; break; } // ... } if (initlen \u0026amp;\u0026amp; init) memcpy(s, init, initlen); s[initlen] = \u0026#39;\\0\u0026#39;; return s; } 首先根据申请的初始大小来确定类型,通过类型可以确定hdr大小,然后来申请空间。 这里使用的s_trymalloc_usable与s_malloc_usable都是文件zmallo.c实现的内存管理函数,后面会专门讲解。这里只用知道它会申请前一个参数大小的空间,并且将空间大小赋值给后一个参数usable。 得到空间的首地址,加上头大小得到buf地址s,s[-1] 得到类型指针fp,usable减去头大小hdrlen和类型大小1得到实际可用大小。 根据类型来构建一个sds结构体,最后返回是buf的指针,补上终止符'\\0'。这里使用的是一个宏,可以借鉴这种写法,一个经常使用的操作,如果写成函数,会增加堆栈调度消耗,写成宏可以提高性能,代价是编译后的文件大小会增加。 这是sds创建的底层实现,实际使用的是上层的封装,只是对这个函数的封装调用。\n销毁 有两种方法,一种是直接销毁:\nvoid sdsfree(sds s) { if (s == NULL) return; s_free((char*)s-sdsHdrSize(s[-1])); } 一种是仅仅将sds的len标记为0,但是实际的buf并不会释放,而是等待覆写。这样可以优化性能。\n扩容 sds _sdsMakeRoomFor(sds s, size_t addlen, int greedy) { void *sh, *newsh; size_t avail = sdsavail(s); size_t len, newlen, reqlen; char type, oldtype = s[-1] \u0026amp; SDS_TYPE_MASK; int hdrlen; size_t usable; /* Return ASAP if there is enough space left. */ if (avail \u0026gt;= addlen) return s; len = sdslen(s); sh = (char*)s-sdsHdrSize(oldtype); reqlen = newlen = (len+addlen); // 这里是防止溢出 assert(newlen \u0026gt; len); /* Catch size_t overflow */ if (greedy == 1) { // SDS_MAX_PREALLOC 是 1MB if (newlen \u0026lt; SDS_MAX_PREALLOC) newlen *= 2; else newlen += SDS_MAX_PREALLOC; } type = sdsReqType(newlen); /* Don\u0026#39;t use type 5: the user is appending to the string and type 5 is * not able to remember empty space, so sdsMakeRoomFor() must be called * at every appending operation. */ if (type == SDS_TYPE_5) type = SDS_TYPE_8; hdrlen = sdsHdrSize(type); assert(hdrlen + newlen + 1 \u0026gt; reqlen); /* Catch size_t overflow */ if (oldtype==type) { // 和原类型相同,则不用释放内存,直接将buf扩容即可 newsh = s_realloc_usable(sh, hdrlen+newlen+1, \u0026amp;usable); if (newsh == NULL) return NULL; s = (char*)newsh+hdrlen; } else { /* Since the header size changes, need to move the string forward, * and can\u0026#39;t use realloc */ // 类型改变,需要重新申请内存,原内存释放 newsh = s_malloc_usable(hdrlen+newlen+1, \u0026amp;usable); if (newsh == NULL) return NULL; memcpy((char*)newsh+hdrlen, s, len+1); s_free(sh); s = (char*)newsh+hdrlen; s[-1] = type; sdssetlen(s, len); } usable = usable-hdrlen-1; // type 是通过newlen判断得到的,而usable 是 hdrlen + newlen + 1 可能出现超出的情况 if (usable \u0026gt; sdsTypeMaxSize(type)) usable = sdsTypeMaxSize(type); sdssetalloc(s, usable); return s; } 首先判断newlen加上len是否超出可用的大小avail,没超就不扩容。 和之前不同,这一版本加入了greedy参数,来调节扩容策略,当greedy为1时,每次会扩大的比所需要的更多,这样可以减少扩容频率。而greedy为0时,就是节约内存 greedy为1时启用此策略: 如果newlen小于1MB,每次扩容二背,大于1MB时每次增加1MB。(每次2倍内存很快就耗尽了) 这里根据新的newlen来确定类型,如果类型不变,只需要扩展buf数组,而类型改变的话就需要重新申请内存。 跳表zskiplist 对应的代码在 server.h 和 t_zset.c。\n跳表可以看作链表加上都多层索引,一般每两个\n/* ZSETs use a specialized version of Skiplists */ typedef struct zskiplistNode { sds ele; // 存储字符串类型的数据 double score; // 储存排序的分值 struct zskiplistNode *backward; // 后向指针 头节点和第一个节点都为NULL struct zskiplistLevel { struct zskiplistNode *forward; // 指向本层下一个节点 unsigned long span; // 跨度 指向本层下一个节点中间跨越的节点个数 } level[]; // 柔性数组,未分配内存时不占空间。初始化时,level 随机分配1~32 } zskiplistNode; typedef struct zskiplist { struct zskiplistNode *header, *tail; unsigned long length; // 除了头节点以外节点总数 int level; // 跳表的高度 } zskiplist; 用的图片是网图 链接,其中obj一般为sds,在Redis6中已经改为sds ele。\n可以从图和代码很清晰地看出跳跃表的结构。\n跳跃表是Redis有序集合的底层实现方式之一,所以每个节点的ele存储有序集合的成员member值,score存储成员score值。所有节点的分值是按从小到大的方式排序的,当有序集合的成员分值相同时,节点会按member的字典序进行排序。\n通过跳跃表结构体的属性我们可以看到,程序可以在O(1)的时间复杂度下,快速获取到跳跃表的头节点、尾节点、长度和高度。\n创建 Redis通过zslRandomLevel函数随机生成一个1~32的值,作为新建节点的高度,值越大出现的概率越低。节点层高确定之后便不会再修改。生成随机层高的代码如下。\n// ZSKIPLIST 为 0.25 int zslRandomLevel(void) { static const int threshold = ZSKIPLIST_P*RAND_MAX; int level = 1; while (random() \u0026lt; threshold) level += 1; // 这里 ZSKIPLIST_MAXLEVEL 为32 return (level\u0026lt;ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL; } 当p=0.25时,跳跃表节点的期望层高为1/(1-0.25)≈1.33。\n下面是创建函数\n/* Create a skiplist node with the specified number of levels. * The SDS string \u0026#39;ele\u0026#39; is referenced by the node after the call. */ zskiplistNode *zslCreateNode(int level, double score, sds ele) { zskiplistNode *zn = // level 柔性数组加上头大小 来申请内存 zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel)); zn-\u0026gt;score = score; zn-\u0026gt;ele = ele; return zn; } /* Create a new skiplist. */ zskiplist *zslCreate(void) { int j; zskiplist *zsl; zsl = zmalloc(sizeof(*zsl)); zsl-\u0026gt;level = 1; zsl-\u0026gt;length = 0; // 头节点的level是最大层数 zsl-\u0026gt;header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL); for (j = 0; j \u0026lt; ZSKIPLIST_MAXLEVEL; j++) { zsl-\u0026gt;header-\u0026gt;level[j].forward = NULL; zsl-\u0026gt;header-\u0026gt;level[j].span = 0; } zsl-\u0026gt;header-\u0026gt;backward = NULL; zsl-\u0026gt;tail = NULL; return zsl; } 头节点是一个特殊的节点,不存储有序集合的member信息。头节点是跳跃表中第一个插入的节点,其level数组的每项forward都为NULL, span值都为0\n插入 /* Insert a new node in the skiplist. Assumes the element does not already * exist (up to the caller to enforce that). The skiplist takes ownership * of the passed SDS string \u0026#39;ele\u0026#39;. */ zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) { // 记录每层所能到达的最右边节点 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; // 记录每层从header到update[i}所需的步长 unsigned long rank[ZSKIPLIST_MAXLEVEL]; int i, level; // 判断score 是不是NAN serverAssert(!isnan(score)); x = zsl-\u0026gt;header; // 从最高层索引开始遍历 for (i = zsl-\u0026gt;level-1; i \u0026gt;= 0; i--) { /* store rank that is crossed to reach the insert position */ // 当在最高层时,先将rank赋值为0,先假设 rank[i] = i == (zsl-\u0026gt;level-1) ? 0 : rank[i+1]; // 在第i层一直向前移动比较,因为是按照score 从小到大排列的 // 找到这层大于插入score 的位置然后下移 while (x-\u0026gt;level[i].forward \u0026amp;\u0026amp; (x-\u0026gt;level[i].forward-\u0026gt;score \u0026lt; score || (x-\u0026gt;level[i].forward-\u0026gt;score == score \u0026amp;\u0026amp; sdscmp(x-\u0026gt;level[i].forward-\u0026gt;ele,ele) \u0026lt; 0))) { // 更新总的span rank[i] += x-\u0026gt;level[i].span; x = x-\u0026gt;level[i].forward; } // 记录这层的终点 update[i] = x; } /* we assume the element is not already inside, since we allow duplicated * scores, reinserting the same element should never happen since the * caller of zslInsert() should test in the hash table if the element is * already inside or not. */ // zslInsert不能应用在插入节点已经存在的情况下。 // 所以不用检查存在 //为插入节点计算随机层数 level = zslRandomLevel(); //大于原来层高的部分,只需要调整header就行。 if (level \u0026gt; zsl-\u0026gt;level) { for (i = zsl-\u0026gt;level; i \u0026lt; level; i++) { rank[i] = 0; update[i] = zsl-\u0026gt;header; // 为啥是这个?可能是用来占位 update[i]-\u0026gt;level[i].span = zsl-\u0026gt;length; } zsl-\u0026gt;level = level; } x = zslCreateNode(level,score,ele); for (i = 0; i \u0026lt; level; i++) { // 插入到每层最右侧能到达的节点之后 x-\u0026gt;level[i].forward = update[i]-\u0026gt;level[i].forward; update[i]-\u0026gt;level[i].forward = x; /* update span covered by update[i] as x is inserted here */ // 插入节点每层的span更新,这个看下图 x-\u0026gt;level[i].span = update[i]-\u0026gt;level[i].span - (rank[0] - rank[i]); update[i]-\u0026gt;level[i].span = (rank[0] - rank[i]) + 1; } /* increment span for untouched levels */ for (i = level; i \u0026lt; zsl-\u0026gt;level; i++) { update[i]-\u0026gt;level[i].span++; } x-\u0026gt;backward = (update[0] == zsl-\u0026gt;header) ? NULL : update[0]; if (x-\u0026gt;level[0].forward) x-\u0026gt;level[0].forward-\u0026gt;backward = x; else zsl-\u0026gt;tail = x; zsl-\u0026gt;length++; return x; } 下图来源于 链接\n以节点19插入为例,其中 黑色箭头的表示的跨度为update[i]-\u0026gt;level[i].span 蓝色箭头表示的跨度为rank[0] - rank[i]即节点19在level_0的update[0]为11, 在level_1的update[1]为7,rank[0] - rank[i]为节点7与节点11之间的跨度 绿色箭头表示的跨度为节点19到节点37的span\n删除 首先查找到对应的节点,将每层最右边到达的节点记录下来,对应的update。 辅助函数:\nvoid zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) { // 调整对应的span和forward int i; for (i = 0; i \u0026lt; zsl-\u0026gt;level; i++) { if (update[i]-\u0026gt;level[i].forward == x) { update[i]-\u0026gt;level[i].span += x-\u0026gt;level[i].span - 1; update[i]-\u0026gt;level[i].forward = x-\u0026gt;level[i].forward; } else { update[i]-\u0026gt;level[i].span -= 1; } } if (x-\u0026gt;level[0].forward) { x-\u0026gt;level[0].forward-\u0026gt;backward = x-\u0026gt;backward; } else { zsl-\u0026gt;tail = x-\u0026gt;backward; } // 调整level while(zsl-\u0026gt;level \u0026gt; 1 \u0026amp;\u0026amp; zsl-\u0026gt;header-\u0026gt;level[zsl-\u0026gt;level-1].forward == NULL) zsl-\u0026gt;level--; zsl-\u0026gt;length--; } 删除函数:\nint zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; int i; //查找位置 x = zsl-\u0026gt;header; for (i = zsl-\u0026gt;level-1; i \u0026gt;= 0; i--) { while (x-\u0026gt;level[i].forward \u0026amp;\u0026amp; (x-\u0026gt;level[i].forward-\u0026gt;score \u0026lt; score || (x-\u0026gt;level[i].forward-\u0026gt;score == score \u0026amp;\u0026amp; sdscmp(x-\u0026gt;level[i].forward-\u0026gt;ele,ele) \u0026lt; 0))) { x = x-\u0026gt;level[i].forward; } // 保存每层最右边的节点 update[i] = x; } /* We may have multiple elements with the same score, what we need * is to find the element with both the right score and object. */ // 可能同一个score有多个ele x = x-\u0026gt;level[0].forward; if (x \u0026amp;\u0026amp; score == x-\u0026gt;score \u0026amp;\u0026amp; sdscmp(x-\u0026gt;ele,ele) == 0) { zslDeleteNode(zsl, x, update); if (!node) zslFreeNode(x); else *node = x; return 1; } return 0; /* not found */ } 压缩列表 具体的实现在ziplist.h和ziplist.c\n压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是 O(1) 。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。\n// ziplist 结构 \u0026lt;zlbytes\u0026gt; \u0026lt;zltail\u0026gt; \u0026lt;zllen\u0026gt; \u0026lt;entry\u0026gt; \u0026lt;entry\u0026gt; ... \u0026lt;entry\u0026gt; \u0026lt;zlend\u0026gt; 这里的所有结构都是按照小端存储。\nzlbytes: 压缩列表的字节长度,占4个字节,因此压缩列表最多有$2^{32}-1$个字节。这个设计是为了resize时不必遍历整个列表 zltail: 压缩列表尾元素相对于压缩列表起始地址的偏移量,占4个字节,这个设计可以使pop操作不必要遍历全部。 zllen: 压缩列表的元素个数,占2个字节。zllen无法存储元素个数超过65535($2^{16}-1$)的压缩列表,必须遍历整个压缩列表才能获取到元素个数。 zlend: 压缩列表的结尾,占1个字节,恒为0xFF。 这里可以清楚地感受到C语言对内存的掌控,通过指针位移来获取结构信息。这里使用宏又是C语言的一个特色,比起inline只是建议编译器内联,宏真正是内联,对于一些细小而频繁的操作提高了性能。\n/* Return total bytes a ziplist is composed of. */ #define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl))) /* Return the offset of the last item inside the ziplist. */ #define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t)))) /* Return the length of a ziplist, or UINT16_MAX if the length cannot be * determined without scanning the whole ziplist. */ #define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2))) /* The size of a ziplist header: two 32 bit integers for the total * bytes count and last item offset. One 16 bit integer for the number * of items field. */ #define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t)) /* Size of the \u0026#34;end of ziplist\u0026#34; entry. Just one byte. */ #define ZIPLIST_END_SIZE (sizeof(uint8_t)) /* Return the pointer to the first entry of a ziplist. */ #define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE) /* Return the pointer to the last entry of a ziplist, using the * last entry offset inside the ziplist header. */ #define ZIPLIST_ENTRY_TAIL(zl) ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) /* Return the pointer to the last byte of a ziplist, which is, the * end of ziplist FF entry. */ #define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-ZIPLIST_END_SIZE) 对于 结构如下:\n\u0026lt;prevlen\u0026gt; \u0026lt;encoding\u0026gt; \u0026lt;entry-data\u0026gt; previous_entry_length字段表示前一个元素的字节长度,占1个或者5个字节,当前一个元素的长度小于254字节时,用1个字节表示;当前一个元素的长度大于或等于254字节时,用5个字节来表示。而此时previous_entry_length字段的第1个字节是固定的0xFE,后面4个字节才真正表示前一个元素的长度。假设已知当前元素的首地址为p,那么p-previous_entry_length就是前一个元素的首地址,从而实现压缩列表从尾到头的遍历。\nencoding字段表示当前元素的编码,即content字段存储的数据类型(整数或者字节数组),数据内容存储在content字段。为了节约内存,encoding字段同样长度可变。\nRedis使用宏来表示\n#define ZIP_STR_MASK 0xc0 #define ZIP_INT_MASK 0x30 #define ZIP_STR_06B (0 \u0026lt;\u0026lt; 6) #define ZIP_STR_14B (1 \u0026lt;\u0026lt; 6) #define ZIP_STR_32B (2 \u0026lt;\u0026lt; 6) #define ZIP_INT_16B (0xc0 | 0\u0026lt;\u0026lt;4) #define ZIP_INT_32B (0xc0 | 1\u0026lt;\u0026lt;4) #define ZIP_INT_64B (0xc0 | 2\u0026lt;\u0026lt;4) #define ZIP_INT_24B (0xc0 | 3\u0026lt;\u0026lt;4) #define ZIP_INT_8B 0xfe 这里使用位运算来代表类型,既节省了内存又提高了性能。\n结构 typedef struct zlentry { unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/ unsigned int prevrawlen; /* Previous entry len. */ unsigned int lensize; /* Bytes used to encode this entry type/len. For example strings have a 1, 2 or 5 bytes header. Integers always use a single byte.*/ unsigned int len; /* Bytes used to represent the actual entry. For strings this is just the string length while for integers it is 1, 2, 3, 4, 8 or 0 (for 4 bit immediate) depending on the number range. */ unsigned int headersize; /* prevrawlensize + lensize. */ unsigned char encoding; /* Set to ZIP_STR_* or ZIP_INT_* depending on the entry encoding. However for 4 bits immediate integers this can assume a range of values and must be range-checked. */ unsigned char *p; /* Pointer to the very start of the entry, that is, this points to prev-entry-len field. */ } zlentry; 对于压缩列表的任意元素,获取前一个元素的长度、判断存储的数据类型、获取数据内容都需要经过复杂的解码运算。解码后的结果应该被缓存起来,为此定义了结构体zlentry,用于表示解码后的压缩列表元素。\n解码操作,主要用宏实现:\nstatic inline void zipEntry(unsigned char *p, zlentry *e) { ZIP_DECODE_PREVLEN(p, e-\u0026gt;prevrawlensize, e-\u0026gt;prevrawlen); ZIP_ENTRY_ENCODING(p + e-\u0026gt;prevrawlensize, e-\u0026gt;encoding); ZIP_DECODE_LENGTH(p + e-\u0026gt;prevrawlensize, e-\u0026gt;encoding, e-\u0026gt;lensize, e-\u0026gt;len); assert(e-\u0026gt;lensize != 0); /* check that encoding was valid. */ e-\u0026gt;headersize = e-\u0026gt;prevrawlensize + e-\u0026gt;lensize; e-\u0026gt;p = p; } 这里主要就是对字节的读取,可以去看源代码。\n操作 创建 /* Create a new empty ziplist. */ // 先申请初始的空间(4+4+2+1),再对zlbytes,zltail,zllen,zlend逐个初始化 unsigned char *ziplistNew(void) { unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE; unsigned char *zl = zmalloc(bytes); ZIPLIST_BYTES(zl) = intrev32ifbe(bytes); ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE); ZIPLIST_LENGTH(zl) = 0; zl[bytes-1] = ZIP_END; return zl; } 插入元素 字典 结构 节点:\ntypedef struct dictEntry { void *key; // 节省内存 不同场景下使用不同字段 union { void *val; // db.dict 储存值 uint64_t u64; int64_t s64; // db.expires 储存过期时间 double d; } v; // 单链表法 解决哈希冲突。 struct dictEntry *next; /* Next entry in the same hash bucket. */ void *metadata[]; /* An arbitrary number of bytes (starting at a * pointer-aligned address) of size as returned * by dictType\u0026#39;s dictEntryMetadataBytes(). */ } dictEntry; 可以看出是使用链表法来解决hash冲突的。\nstruct dict { dictType *type; // 对应特定类型操作函数 dictEntry **ht_table[2]; // 哈希表。有两个,一个正常使用,另外一个在rehash时使用 unsigned long ht_used[2]; // 记录每个哈希表被使用的数目。 long rehashidx; /* rehashing not in progress if rehashidx == -1 */ /* Keep small vars at end for optimal (minimal) struct padding */ int16_t pauserehash; /* If \u0026gt;0 rehashing is paused (\u0026lt;0 indicates coding error) */ // size 的 系数,size 是2 的N次幂 signed char ht_size_exp[2]; /* exponent of size. (size = 1\u0026lt;\u0026lt;exp) */ }; 这里可以看到一个dictType 用来对应特定类型的操作函数,这些函数体现了面向对象编程的思想,会在后面合适的时机用到。\n比如找个hashFunction 用来控制dict使用的hash函数,默认为siphash。\ntypedef struct dictType { uint64_t (*hashFunction)(const void *key); // hash函数 void *(*keyDup)(dict *d, const void *key); // key的 复制函数 void *(*valDup)(dict *d, const void *obj); // val 的复制函数 int (*keyCompare)(dict *d, const void *key1, const void *key2); // key 对比函数 void (*keyDestructor)(dict *d, void *key); // key 销毁函数 void (*valDestructor)(dict *d, void *obj); // val 销毁函数 int (*expandAllowed)(size_t moreMem, double usedRatio); //扩展函数 /* Allow a dictEntry to carry extra caller-defined metadata. The * extra memory is initialized to 0 when a dictEntry is allocated. */ size_t (*dictEntryMetadataBytes)(dict *d); // 元数据 } dictType; 创建 先申请空间,再初始化参数\n/* Reset hash table parameters already initialized with _dictInit()*/ static void _dictReset(dict *d, int htidx) { d-\u0026gt;ht_table[htidx] = NULL; d-\u0026gt;ht_size_exp[htidx] = -1; d-\u0026gt;ht_used[htidx] = 0; } /* Create a new hash table */ dict *dictCreate(dictType *type) { dict *d = zmalloc(sizeof(*d)); _dictInit(d,type); return d; } /* Initialize the hash table */ int _dictInit(dict *d, dictType *type) { _dictReset(d, 0); _dictReset(d, 1); d-\u0026gt;type = type; d-\u0026gt;rehashidx = -1; d-\u0026gt;pauserehash = 0; return DICT_OK; // 使用一些宏来反馈结果 } 增加与扩容 这里先提前讲一下Rehash的概念,便于理解增加扩容中的一些操作:\n扩容后,字典容量及掩码值会发生改变,同一个键与掩码经位运算后得到的索引值就会发生改变,从而导致根据键查找不到值的情况。解决这个问题的方法是,新扩容的内存放到一个全新的Hash表中(ht[1]),并给字典打上在进行rehash操作中的标识(即rehashidx! =-1)。此后,新添加的键值对都往新的Hash表中存储;而修改、删除、查找操作需要在ht[0]、ht[1]中进行检查,然后再决定去对哪个Hash表操作。除此之外,还需要把老Hash表(ht[0])中的数据重新计算索引值后全部迁移插入到新的Hash表(ht[1])中,此迁移过程称作rehash。\n先看增加单个entry的操作:\n/* Add an element to the target hash table */ int dictAdd(dict *d, void *key, void *val) { dictEntry *entry = dictAddRaw(d,key,NULL); if (!entry) return DICT_ERR; dictSetVal(d, entry, val); return DICT_OK; } dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) { long index; dictEntry *entry; int htidx; // 如果正在rehash,在add时进行一步rehash,这里是将大范围的rehash分散来减小资源集中消耗 if (dictIsRehashing(d)) _dictRehashStep(d); /* Get the index of the new element, or -1 if * the element already exists. */ if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1) return NULL; /* Allocate the memory and store the new entry. * Insert the element in top, with the assumption that in a database * system it is more likely that recently added entries are accessed * more frequently. */ htidx = dictIsRehashing(d) ? 1 : 0; size_t metasize = dictMetadataSize(d); entry = zmalloc(sizeof(*entry) + metasize); if (metasize \u0026gt; 0) { memset(dictMetadata(entry), 0, metasize); } // 插入在顶部:根据时空局限性 entry-\u0026gt;next = d-\u0026gt;ht_table[htidx][index]; d-\u0026gt;ht_table[htidx][index] = entry; d-\u0026gt;ht_used[htidx]++; /* Set the hash entry fields. */ dictSetKey(d, entry, key); return entry; } 可以看出,add调用了一个底层的addraw函数。addraw首先使用dictkeyindex来查找一个合适的插入位置,如果这个key已经存在就退出add操作。然后确定是否在rehash,上面我们讲过如果在rehash那么 新添加的键值对都往新的Hash表中存储。后面就申请空间在相应位置顶部插入,这是数据库时空局限性的体现。\n这里看一下dictSetKey和dictSetVal:\n#define dictSetKey(d, entry, _key_) do { \\ if ((d)-\u0026gt;type-\u0026gt;keyDup) \\ (entry)-\u0026gt;key = (d)-\u0026gt;type-\u0026gt;keyDup((d), _key_); \\ else \\ (entry)-\u0026gt;key = (_key_); \\ } while(0) #define dictSetVal(d, entry, _val_) do { \\ if ((d)-\u0026gt;type-\u0026gt;valDup) \\ (entry)-\u0026gt;v.val = (d)-\u0026gt;type-\u0026gt;valDup((d), _val_); \\ else \\ (entry)-\u0026gt;v.val = (_val_); \\ } while(0) 可以看出是用宏的形式调用dict的dicttype函数,也就是说这些操作是可以调整的。\n扩容操作:\n// 将d扩容到2^size的大小 int _dictExpand(dict *d, unsigned long size, int* malloc_failed) { if (malloc_failed) *malloc_failed = 0; /* the size is invalid if it is smaller than the number of * elements already inside the hash table */ if (dictIsRehashing(d) || d-\u0026gt;ht_used[0] \u0026gt; size) return DICT_ERR; /* the new hash table */ dictEntry **new_ht_table; unsigned long new_ht_used; signed char new_ht_size_exp = _dictNextExp(size); /* Detect overflows */ size_t newsize = 1ul\u0026lt;\u0026lt;new_ht_size_exp; // 后者判断在什么时候成立? if (newsize \u0026lt; size || newsize * sizeof(dictEntry*) \u0026lt; newsize) return DICT_ERR; /* Rehashing to the same table size is not useful. */ if (new_ht_size_exp == d-\u0026gt;ht_size_exp[0]) return DICT_ERR; /* Allocate the new hash table and initialize all pointers to NULL */ if (malloc_failed) { new_ht_table = ztrycalloc(newsize*sizeof(dictEntry*)); *malloc_failed = new_ht_table == NULL; if (*malloc_failed) return DICT_ERR; } else new_ht_table = zcalloc(newsize*sizeof(dictEntry*)); // 新的hash表被使用的数量 new_ht_used = 0; /* Is this the first initialization? If so it\u0026#39;s not really a rehashing * we just set the first hash table so that it can accept keys. */ if (d-\u0026gt;ht_table[0] == NULL) { d-\u0026gt;ht_size_exp[0] = new_ht_size_exp; d-\u0026gt;ht_used[0] = new_ht_used; d-\u0026gt;ht_table[0] = new_ht_table; return DICT_OK; } /* Prepare a second hash table for incremental rehashing */ d-\u0026gt;ht_size_exp[1] = new_ht_size_exp; d-\u0026gt;ht_used[1] = new_ht_used; d-\u0026gt;ht_table[1] = new_ht_table; d-\u0026gt;rehashidx = 0; return DICT_OK; } 首先判断是否在rehash,在rehash中不能扩容。然后创建一个新的hash table,这个newsize是2的n次幂。expand操作在刚开始初始化时会使用,也会在这里做一个判断。更常用的是在扩容后进行rehash操作。\n获得size的函数:\n// 确保hash cap 为2的N次幂 static signed char _dictNextExp(unsigned long size) { unsigned char e = DICT_HT_INITIAL_EXP; if (size \u0026gt;= LONG_MAX) return (8*sizeof(long)-1); // 1 \u0026lt;\u0026lt; e == 1 * 2^e // 找到一个大于size 的2^e while(1) { if (((unsigned long)1\u0026lt;\u0026lt;e) \u0026gt;= size) return e; e++; } } 渐进式Rehash 直接看函数:\nint dictRehash(dict *d, int n) { int empty_visits = n*10; /* Max number of empty buckets to visit. */ if (!dictIsRehashing(d)) return 0; while(n-- \u0026amp;\u0026amp; d-\u0026gt;ht_used[0] != 0) { dictEntry *de, *nextde; /* Note that rehashidx can\u0026#39;t overflow as we are sure there are more * elements because ht[0].used != 0 */ assert(DICTHT_SIZE(d-\u0026gt;ht_size_exp[0]) \u0026gt; (unsigned long)d-\u0026gt;rehashidx); while(d-\u0026gt;ht_table[0][d-\u0026gt;rehashidx] == NULL) { d-\u0026gt;rehashidx++; if (--empty_visits == 0) return 1; } de = d-\u0026gt;ht_table[0][d-\u0026gt;rehashidx]; /* Move all the keys in this bucket from the old to the new hash HT */ while(de) { uint64_t h; nextde = de-\u0026gt;next; /* Get the index in the new hash table */ h = dictHashKey(d, de-\u0026gt;key) \u0026amp; DICTHT_SIZE_MASK(d-\u0026gt;ht_size_exp[1]); de-\u0026gt;next = d-\u0026gt;ht_table[1][h]; d-\u0026gt;ht_table[1][h] = de; d-\u0026gt;ht_used[0]--; d-\u0026gt;ht_used[1]++; de = nextde; } d-\u0026gt;ht_table[0][d-\u0026gt;rehashidx] = NULL; d-\u0026gt;rehashidx++; } /* Check if we already rehashed the whole table... */ if (d-\u0026gt;ht_used[0] == 0) { zfree(d-\u0026gt;ht_table[0]); /* Copy the new ht onto the old one */ d-\u0026gt;ht_table[0] = d-\u0026gt;ht_table[1]; d-\u0026gt;ht_used[0] = d-\u0026gt;ht_used[1]; d-\u0026gt;ht_size_exp[0] = d-\u0026gt;ht_size_exp[1]; _dictReset(d, 1); d-\u0026gt;rehashidx = -1; return 0; } /* More to rehash... */ return 1; } rehash除了扩容时会触发,缩容时也会触发。Redis整个rehash的实现,主要分为如下几步完成。\n给Hash表ht[1]申请足够的空间;扩容时空间大小为当前容量2,即d-\u0026gt;ht[0]. used2;当使用量不到总空间10%时,则进行缩容。缩容时空间大小则为能恰好包含d-\u0026gt;ht[0].used个节点的2^N次方幂整数,并把字典中字段rehashidx标识为0 进行rehash操作调用的是dictRehash函数,重新计算ht[0]中每个键的Hash值与索引值(重新计算就叫rehash),依次添加到新的Hash表ht[1],并把老Hash表中该键值对删除。把字典中字段rehashidx字段修改为Hash表ht[0]中正在进行rehash操作节点的索引值. rehash操作后,清空ht[0],然后对调一下ht[1]与ht[0]的值,并把字典中rehashidx字段标识为-1。 我们知道,Redis可以提供高性能的线上服务,而且是单进程模式,当数据库中键值对数量达到了百万、千万、亿级别时,整个rehash过程将非常缓慢,如果不优化rehash过程,可能会造成很严重的服务不可用现象。Redis优化的思想很巧妙,利用分而治之的思想了进行rehash操作,大致的步骤如下。\n执行插入、删除、查找、修改等操作前,都先判断当前字典rehash操作是否在进行中,进行中则调用dictRehashStep函数进行rehash操作(每次只对1个节点进行rehash操作,共执行1次)。除这些操作之外,当服务空闲时,如果当前字典也需要进行rehsh操作,则会调用incrementallyRehash函数进行批量rehash操作(每次对100个节点进行rehash操作,共执行1毫秒)。在经历N次rehash操作后,整个ht[0]的数据都会迁移到ht[1]中,这样做的好处就把是本应集中处理的时间分散到了上百万、千万、亿次操作中,所以其耗时可忽略不计。\n/* This function performs just a step of rehashing, and only if hashing has * not been paused for our hash table. When we have iterators in the * middle of a rehashing we can\u0026#39;t mess with the two hash tables otherwise * some elements can be missed or duplicated. * * This function is called by common lookup or update operations in the * dictionary so that the hash table automatically migrates from H1 to H2 * while it is actively used. */ static void _dictRehashStep(dict *d) { if (d-\u0026gt;pauserehash == 0) dictRehash(d,1); } 删除 static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { uint64_t h, idx; dictEntry *he, *prevHe; int table; /* dict is empty */ if (dictSize(d) == 0) return NULL; if (dictIsRehashing(d)) _dictRehashStep(d); h = dictHashKey(d, key); for (table = 0; table \u0026lt;= 1; table++) { idx = h \u0026amp; DICTHT_SIZE_MASK(d-\u0026gt;ht_size_exp[table]); he = d-\u0026gt;ht_table[table][idx]; prevHe = NULL; // 查找 while(he) { if (key==he-\u0026gt;key || dictCompareKeys(d, key, he-\u0026gt;key)) { /* Unlink the element from the list */ if (prevHe) prevHe-\u0026gt;next = he-\u0026gt;next; else // 在bucket顶部,直接略过 d-\u0026gt;ht_table[table][idx] = he-\u0026gt;next; if (!nofree) { dictFreeUnlinkedEntry(d, he); } d-\u0026gt;ht_used[table]--; return he; } prevHe = he; he = he-\u0026gt;next; } if (!dictIsRehashing(d)) break; } return NULL; /* not found */ } 遍历 遍历Redis整个数据库主要有两种方式:全遍历(例如keys命令)、间断遍历(hscan命令):\n全遍历: 一次命令执行就遍历完整个数据库。 间断遍历: 每次命令执行只取部分数据,分多次遍历。 迭代器——可在容器(容器可为字典、链表等数据结构)上遍访的接口,设计人员无须关心容器的内容,调用迭代器固定的接口就可遍历数据,在很多高级语言中都有实现。\n字典迭代器主要用于迭代字典这个数据结构中的数据,既然是迭代字典中的数据,必然会出现一个问题,迭代过程中,如果发生了数据增删,则可能导致字典触发rehash操作,或迭代开始时字典正在进行rehash操作,从而导致一条数据可能多次遍历到。\ntypedef struct dictIterator { dict *d; long index; // 迭代hash中的索引值 // safe 为1表示是安全迭代器,可以在add,find等rehash场景中使用 int table, safe; // entry 当前读取节点,nextEntry entry 节点的next字段 dictEntry *entry, *nextEntry; /* unsafe iterator fingerprint for misuse detection. */ unsigned long long fingerprint;// 字典指纹,字典发生改变随之改变 } dictIterator; fingerprint字段是一个64位的整数,表示在给定时间内字典的状态。在这里称其为字典的指纹,因为该字段的值为字典(dict结构体)中所有字段值组合在一起生成的Hash值,所以当字典中数据发生任何变化时,其值都会不同,生成算法可参见源码dict.c文件中的dictFingerprint函数。\n/* A fingerprint is a 64 bit number that represents the state of the dictionary * at a given time, it\u0026#39;s just a few dict properties xored together. * When an unsafe iterator is initialized, we get the dict fingerprint, and check * the fingerprint again when the iterator is released. * If the two fingerprints are different it means that the user of the iterator * performed forbidden operations against the dictionary while iterating. */ unsigned long long dictFingerprint(dict *d) { unsigned long long integers[6], hash = 0; int j; integers[0] = (long) d-\u0026gt;ht_table[0]; integers[1] = d-\u0026gt;ht_size_exp[0]; integers[2] = d-\u0026gt;ht_used[0]; integers[3] = (long) d-\u0026gt;ht_table[1]; integers[4] = d-\u0026gt;ht_size_exp[1]; integers[5] = d-\u0026gt;ht_used[1]; /* We hash N integers by summing every successive integer with the integer * hashing of the previous sum. Basically: * * Result = hash(hash(hash(int1)+int2)+int3) ... * * This way the same set of integers in a different order will (likely) hash * to a different number. */ for (j = 0; j \u0026lt; 6; j++) { hash += integers[j]; /* For the hashing step we use Tomas Wang\u0026#39;s 64 bit integer hash. */ hash = (~hash) + (hash \u0026lt;\u0026lt; 21); // hash = (hash \u0026lt;\u0026lt; 21) - hash - 1; hash = hash ^ (hash \u0026gt;\u0026gt; 24); hash = (hash + (hash \u0026lt;\u0026lt; 3)) + (hash \u0026lt;\u0026lt; 8); // hash * 265 hash = hash ^ (hash \u0026gt;\u0026gt; 14); hash = (hash + (hash \u0026lt;\u0026lt; 2)) + (hash \u0026lt;\u0026lt; 4); // hash * 21 hash = hash ^ (hash \u0026gt;\u0026gt; 28); hash = hash + (hash \u0026lt;\u0026lt; 31); } return hash; } 根据迭代器结构中的safe字段,将迭代器分为普通迭代器和安全迭代器:\n普通迭代器: 只遍历数据 安全迭代器: 遍历的同时删除数据 普通迭代器 普通迭代器迭代字典中数据时,会对迭代器中fingerprint字段的值作严格的校验,来保证迭代过程中字典结构不发生任何变化,确保读取出的数据不出现重复\n当Redis执行部分命令时会使用普通迭代器迭代字典数据,例如sort命令。sort命令主要作用是对给定列表、集合、有序集合的元素进行排序,如果给定的是有序集合,其成员名存储用的是字典,分值存储用的是跳跃表,则执行sort命令读取数据的时候会用到迭代器来遍历整个字典。\ndict *set = ((zset*)sortval-\u0026gt;ptr)-\u0026gt;dict; dictIterator *di; dictEntry *setele; sds sdsele; di = dictGetIterator(set); while((setele = dictNext(di)) != NULL) { sdsele = dictGetKey(setele); vector[j].obj = createStringObject(sdsele,sdslen(sdsele)); vector[j].u.score = 0; vector[j].u.cmpobj = NULL; j++; } dictReleaseIterator(di); 调用dictGetIterator函数初始化一个普通迭代器,此时会把iter-\u0026gt;safe值置为0,表示初始化的迭代器为普通迭代器\nvoid dictInitIterator(dictIterator *iter, dict *d) { iter-\u0026gt;d = d; iter-\u0026gt;table = 0; iter-\u0026gt;index = -1; iter-\u0026gt;safe = 0; iter-\u0026gt;entry = NULL; iter-\u0026gt;nextEntry = NULL; } dictIterator *dictGetIterator(dict *d) { dictIterator *iter = zmalloc(sizeof(*iter)); dictInitIterator(iter, d); return iter; } 循环调用dictNext函数依次遍历字典中Hash表的节点,首次遍历时会通过dictFingerprint函数拿到当前字典的指纹值。\n安全迭代器 ","date":"2022-05-22T00:00:00Z","permalink":"https://chi-kai.github.io/post/redis%E6%BA%90%E7%A0%81%E5%89%96%E6%9E%90-%E4%B8%80/","section":"post","tags":["源码剖析","Redis"],"title":"Redis源码剖析(一)"}] \ No newline at end of file diff --git a/docs/index.xml b/docs/index.xml index ac0a13b..08089e3 100644 --- a/docs/index.xml +++ b/docs/index.xml @@ -23,6 +23,25 @@ <p>详细的内容见 <a href="https://lzphi.cn/2020/12/20/2020-12-17-Tangle-%E7%99%BD%E7%9A%AE%E4%B9%A6/">Tangle白皮书中文版</a></p> <p><strong>tangle</strong> 是 <strong>IOTA</strong> 所用的技术,为物联网和小额支付提供支持。不同于常见的区块链,它使用一个DAG(有向无环图)作为结构,这里称为Tangle。</p> <p>传统区块链系统的单链结构在交易认证,吞吐量,资源消耗等方面存在缺陷,DAG结构的区块链是一个有效的解决方案。</p> +<h2 id="fabric-白皮书">Fabric 白皮书</h2> +<p>Hyperledger Fabric 是 Linux 基金会 的 一个项目,是Hyperledger下面的一个子项目。作为一个开源联盟链,被很多项目应用。</p> +<p>它的主要特点是模块化的共识机制,相对高性能,和可以使用常规语言编写智能合约(golang)。</p> +<h3 id="概念">概念</h3> +<h4 id="联盟链">联盟链</h4> +<p>文中划分联盟链和公链的标准是: <strong>是否发币和节点身份是否可知</strong></p> +<p>状态机复制(SMR)是建设弹性应用众所周知的方式,但是如果我们把运行在区块链上的智能合约看作一种分布式应用,与传统的SMR区别在于:</p> +<ul> +<li>许多应用并发运行</li> +<li>这些应用可以被任何人动态地部署</li> +<li>这些应用的代码是不被信任的,可能有恶意</li> +</ul> +<h4 id="order-execute">order-execute</h4> +<p>现有的大部分可以运行智能合约的区块链遵循SMR实现一种order-execute的架构: 节点先将交易排序再将它们广播给其他节点,然后每个节点顺序执行。</p> +<p><img src="https://s2.loli.net/2022/12/06/B4Ns3GZAKl8dIXT.png" alt=""></p> +<p>所有节点对所有交易的顺序执行限制了性能,并且需要采取复杂的措施来防止源自不受信任的合约(例如在以太坊中使用“gas”计算运行时)的针对平台的拒绝服务攻击;智能合约很难做到并发。</p> +<p>最大的限制是交易必须是确定的,这就使得不能使用常规编程语言来实现,必须使用特定的语言。</p> +<h4 id="execute-order-validate">execute-order-validate</h4> +<p><img src="https://s2.loli.net/2022/12/07/jDBxcLmYrfXSnbl.png" alt=""></p> @@ -398,6 +417,8 @@ discriminative aggregation (<strong>判别聚合</strong>)</p> <h4 id="算法流程">算法流程</h4> <p><img src="https://s2.loli.net/2022/11/15/kxSZgVusbUM29hY.png" alt=""></p> <p>每轮开始时,服务器先检查客户端的模型,根据给定的超参数$\tau$和滞后容忍算法来分配模型。服务器收集上传的更新,错过上次更新的节点会被优先采集。等采集到的更新满足预先设置的标准后,执行三步合并,然后更新缓存状态。</p> +<h2 id="iot-22a-blockchain-based-model-migration-approach-for-secure-and-sustainable-federated-learning-in-iot-systems">IOT ‘22《A Blockchain-based Model Migration Approach for Secure and Sustainable Federated Learning in IoT Systems》</h2> +<h3 id="背景-2">背景</h3> @@ -643,6 +664,9 @@ term变为更高的term,投票变为null。</p> https://chi-kai.github.io/post/redis%E6%BA%90%E7%A0%81%E5%89%96%E6%9E%90-%E4%B8%80/ <h1 id="基础数据结构部分">基础数据结构部分</h1> <h2 id="动态字符串-sds">动态字符串 SDS</h2> +<p>实现在 sds.h/sds.c。</p> +<h3 id="设计原则">设计原则</h3> +<p>为什么不使用c语言原生的字符串操作库? c字符串用'\0'作为终止符,不能满足二进制安全,而且求字符串长度,拼接等操作都要遍历到'\0'来实现,需要自己控制内存使用,操作复杂度高。</p> <h3 id="前置知识">前置知识</h3> <p>由于我对C语言没有深入了解,有很多知识点会在前面补充。</p> <ul> @@ -680,7 +704,8 @@ term变为更高的term,投票变为null。</p> </span></span><span style="display:flex;"><span> <span style="color:#75715e">// 柔性数组,没有分配之前不占内存 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">char</span> <span style="color:#111">buf</span><span style="color:#111">[];</span> </span></span><span style="display:flex;"><span><span style="color:#111">};</span> -</span></span></code></pre></div><h4 id="二进制安全">二进制安全</h4> +</span></span></code></pre></div><p>记录了已经使用的空间和分配的空间,比C字符串操作效率更高。和 C 语言中的字符串操作相比,SDS 通过记录字符数组的使用长度和分配空间大小,避免了对字符串的遍历操作,降低了操作开销,进一步就可以帮助诸多字符串操作更加高效地完成,比如创建、追加、复制、比较等。</p> +<h4 id="二进制安全">二进制安全</h4> <pre><code>什么是二进制安全?通俗地讲,C语言中,用“\0”表示字符串的结束,如果字符串中本身就有“\0”字符,字符串就会被截断,即非二进制安全;若通过某种机制,保证读写字符串时不损害其内容,则是二进制安全。在网络报文中常常需要二进制安全。 sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;,保障了二进制安全。 @@ -1043,7 +1068,7 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span><span style="display:flex;"><span><span style="color:#111">}</span> </span></span></code></pre></div><h2 id="压缩列表">压缩列表</h2> <p>具体的实现在ziplist.h和ziplist.c</p> -<p>压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是O(1)。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。</p> +<p>压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是 <strong>O(1)</strong> 。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// ziplist 结构 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#f92672">&lt;</span><span style="color:#111">zlbytes</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zltail</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zllen</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#111">...</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zlend</span><span style="color:#f92672">&gt;</span> </span></span></code></pre></div><p>这里的所有结构都是按照小端存储。</p> @@ -1120,6 +1145,7 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span></span><span style="display:flex;"><span><span style="color:#75715e"> is, this points to prev-entry-len field. */</span> </span></span><span style="display:flex;"><span><span style="color:#111">}</span> <span style="color:#111">zlentry</span><span style="color:#111">;</span> </span></span></code></pre></div><p>对于压缩列表的任意元素,获取前一个元素的长度、判断存储的数据类型、获取数据内容都需要经过复杂的解码运算。解码后的结果应该被缓存起来,为此定义了结构体zlentry,用于表示解码后的压缩列表元素。</p> +<p>解码操作,主要用宏实现:</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">inline</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">zipEntry</span><span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#111">p</span><span style="color:#111">,</span> <span style="color:#111">zlentry</span> <span style="color:#f92672">*</span><span style="color:#111">e</span><span style="color:#111">)</span> <span style="color:#111">{</span> </span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIP_DECODE_PREVLEN</span><span style="color:#111">(</span><span style="color:#111">p</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlen</span><span style="color:#111">);</span> </span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIP_ENTRY_ENCODING</span><span style="color:#111">(</span><span style="color:#111">p</span> <span style="color:#f92672">+</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">encoding</span><span style="color:#111">);</span> @@ -1128,7 +1154,22 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">headersize</span> <span style="color:#f92672">=</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span> <span style="color:#f92672">+</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">lensize</span><span style="color:#111">;</span> </span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">p</span> <span style="color:#f92672">=</span> <span style="color:#111">p</span><span style="color:#111">;</span> </span></span><span style="display:flex;"><span><span style="color:#111">}</span> -</span></span></code></pre></div><h2 id="字典">字典</h2> +</span></span></code></pre></div><p>这里主要就是对字节的读取,可以去看源代码。</p> +<h3 id="操作">操作</h3> +<h4 id="创建-2">创建</h4> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Create a new empty ziplist. */</span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">// 先申请初始的空间(4+4+2+1),再对zlbytes,zltail,zllen,zlend逐个初始化 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#75af00">ziplistNew</span><span style="color:#111">(</span><span style="color:#00a8c8">void</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">int</span> <span style="color:#111">bytes</span> <span style="color:#f92672">=</span> <span style="color:#111">ZIPLIST_HEADER_SIZE</span><span style="color:#f92672">+</span><span style="color:#111">ZIPLIST_END_SIZE</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#111">zl</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#111">bytes</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_BYTES</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#75af00">intrev32ifbe</span><span style="color:#111">(</span><span style="color:#111">bytes</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_TAIL_OFFSET</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#75af00">intrev32ifbe</span><span style="color:#111">(</span><span style="color:#111">ZIPLIST_HEADER_SIZE</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_LENGTH</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">zl</span><span style="color:#111">[</span><span style="color:#111">bytes</span><span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">ZIP_END</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">zl</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h4 id="插入元素">插入元素</h4> +<h2 id="字典">字典</h2> <h3 id="结构-1">结构</h3> <p>节点:</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#111">dictEntry</span> <span style="color:#111">{</span> @@ -1149,17 +1190,400 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span></code></pre></div><p>可以看出是使用链表法来解决hash冲突的。</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">struct</span> <span style="color:#111">dict</span> <span style="color:#111">{</span> </span></span><span style="display:flex;"><span> <span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">;</span> <span style="color:#75715e">// 对应特定类型操作函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">// 哈希表。有两个,一个正常使用,另外一个在rehash时使用 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">// 记录每个哈希表被使用的数目。 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> -</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> -</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> -</span></span><span style="display:flex;"><span> </span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">rehashidx</span><span style="color:#111">;</span> <span style="color:#75715e">/* rehashing not in progress if rehashidx == -1 */</span> </span></span><span style="display:flex;"><span> </span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Keep small vars at end for optimal (minimal) struct padding */</span> </span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int16_t</span> <span style="color:#111">pauserehash</span><span style="color:#111">;</span> <span style="color:#75715e">/* If &gt;0 rehashing is paused (&lt;0 indicates coding error) */</span> -</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">/* exponent of size. (size = 1&lt;&lt;exp) */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// size 的 系数,size 是2 的N次幂 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">/* exponent of size. (size = 1&lt;&lt;exp) */</span> </span></span><span style="display:flex;"><span><span style="color:#111">};</span> -</span></span></code></pre></div> +</span></span></code></pre></div><p>这里可以看到一个dictType 用来对应特定类型的操作函数,这些函数体现了面向对象编程的思想,会在后面合适的时机用到。</p> +<p>比如找个hashFunction 用来控制dict使用的hash函数,默认为siphash。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-go" data-lang="go"><span style="display:flex;"><span><span style="color:#75af00">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#75af00">dictType</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">uint64_t</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">hashFunction</span><span style="color:#111">)(</span><span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// hash函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyDup</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// key的 复制函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">valDup</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">obj</span><span style="color:#111">);</span> <span style="color:#75715e">// val 的复制函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyCompare</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key1</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key2</span><span style="color:#111">);</span> <span style="color:#75715e">// key 对比函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyDestructor</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// key 销毁函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">valDestructor</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">obj</span><span style="color:#111">);</span> <span style="color:#75715e">// val 销毁函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">expandAllowed</span><span style="color:#111">)(</span><span style="color:#75af00">size_t</span> <span style="color:#75af00">moreMem</span><span style="color:#111">,</span> <span style="color:#75af00">double</span> <span style="color:#75af00">usedRatio</span><span style="color:#111">);</span> <span style="color:#75715e">//扩展函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">/* Allow a dictEntry to carry extra caller-defined metadata. The +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * extra memory is initialized to 0 when a dictEntry is allocated. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">size_t</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">dictEntryMetadataBytes</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">);</span> <span style="color:#75715e">// 元数据 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> <span style="color:#75af00">dictType</span><span style="color:#111">;</span> +</span></span></code></pre></div><h3 id="创建-3">创建</h3> +<p>先申请空间,再初始化参数</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Reset hash table parameters already initialized with _dictInit()*/</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">htidx</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">/* Create a new hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictCreate</span><span style="color:#111">(</span><span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictInit</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">type</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">/* Initialize the hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">_dictInit</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">0</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">type</span> <span style="color:#f92672">=</span> <span style="color:#111">type</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">pauserehash</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> <span style="color:#75715e">// 使用一些宏来反馈结果 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="增加与扩容">增加与扩容</h3> +<p>这里先提前讲一下Rehash的概念,便于理解增加扩容中的一些操作:</p> +<p>扩容后,字典容量及掩码值会发生改变,同一个键与掩码经位运算后得到的索引值就会发生改变,从而导致根据键查找不到值的情况。解决这个问题的方法是,<strong>新扩容的内存放到一个全新的Hash表中(ht[1]),并给字典打上在进行rehash操作中的标识(即rehashidx! =-1)</strong>。此后,新添加的键值对都往新的Hash表中存储;而修改、删除、查找操作需要在ht[0]、ht[1]中进行检查,然后再决定去对哪个Hash表操作。除此之外,还需要把老Hash表(ht[0])中的数据重新计算索引值后全部迁移插入到新的Hash表(ht[1])中,此迁移过程称作rehash。</p> +<p>先看增加单个entry的操作:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Add an element to the target hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">dictAdd</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">val</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictAddRaw</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">key</span><span style="color:#111">,</span><span style="color:#111">NULL</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#111">entry</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictSetVal</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#111">val</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictAddRaw</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">existing</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">index</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">htidx</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 如果正在rehash,在add时进行一步rehash,这里是将大范围的rehash分散来减小资源集中消耗 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Get the index of the new element, or -1 if +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * the element already exists. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">((</span><span style="color:#111">index</span> <span style="color:#f92672">=</span> <span style="color:#75af00">_dictKeyIndex</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">key</span><span style="color:#111">),</span> <span style="color:#111">existing</span><span style="color:#111">))</span> <span style="color:#f92672">==</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Allocate the memory and store the new entry. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * Insert the element in top, with the assumption that in a database +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * system it is more likely that recently added entries are accessed +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * more frequently. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">htidx</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">?</span> <span style="color:#ae81ff">1</span> <span style="color:#f92672">:</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">size_t</span> <span style="color:#111">metasize</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictMetadataSize</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">)</span> <span style="color:#f92672">+</span> <span style="color:#111">metasize</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">metasize</span> <span style="color:#f92672">&gt;</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">memset</span><span style="color:#111">(</span><span style="color:#75af00">dictMetadata</span><span style="color:#111">(</span><span style="color:#111">entry</span><span style="color:#111">),</span> <span style="color:#ae81ff">0</span><span style="color:#111">,</span> <span style="color:#111">metasize</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 插入在顶部:根据时空局限性 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">entry</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">][</span><span style="color:#111">index</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">][</span><span style="color:#111">index</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Set the hash entry fields. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictSetKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>可以看出,add调用了一个底层的addraw函数。addraw首先使用dictkeyindex来查找一个合适的插入位置,如果这个key已经存在就退出add操作。然后确定是否在rehash,上面我们讲过如果在rehash那么 <strong>新添加的键值对都往新的Hash表中存储</strong>。后面就申请空间在相应位置顶部插入,这是数据库时空局限性的体现。</p> +<p>这里看一下dictSetKey和dictSetVal:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">#define dictSetKey(d, entry, _key_) do { \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> if ((d)-&gt;type-&gt;keyDup) \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;key = (d)-&gt;type-&gt;keyDup((d), _key_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> else \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;key = (_key_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e">} while(0) +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">#define dictSetVal(d, entry, _val_) do { \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> if ((d)-&gt;type-&gt;valDup) \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;v.val = (d)-&gt;type-&gt;valDup((d), _val_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> else \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;v.val = (_val_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e">} while(0) +</span></span></span></code></pre></div><p>可以看出是用宏的形式调用dict的dicttype函数,也就是说这些操作是可以调整的。</p> +<p>扩容操作:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// 将d扩容到2^size的大小 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">int</span> <span style="color:#75af00">_dictExpand</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">size</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span><span style="color:#f92672">*</span> <span style="color:#111">malloc_failed</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> <span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* the size is invalid if it is smaller than the number of +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * elements already inside the hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">||</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">&gt;</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* the new hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">new_ht_size_exp</span> <span style="color:#f92672">=</span> <span style="color:#75af00">_dictNextExp</span><span style="color:#111">(</span><span style="color:#111">size</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Detect overflows */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">size_t</span> <span style="color:#111">newsize</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">1ul</span><span style="color:#f92672">&lt;&lt;</span><span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 后者判断在什么时候成立? +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">newsize</span> <span style="color:#f92672">&lt;</span> <span style="color:#111">size</span> <span style="color:#f92672">||</span> <span style="color:#111">newsize</span> <span style="color:#f92672">*</span> <span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">)</span> <span style="color:#f92672">&lt;</span> <span style="color:#111">newsize</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Rehashing to the same table size is not useful. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">new_ht_size_exp</span> <span style="color:#f92672">==</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">])</span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Allocate the new hash table and initialize all pointers to NULL */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">=</span> <span style="color:#75af00">ztrycalloc</span><span style="color:#111">(</span><span style="color:#111">newsize</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> <span style="color:#00a8c8">else</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zcalloc</span><span style="color:#111">(</span><span style="color:#111">newsize</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 新的hash表被使用的数量 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">new_ht_used</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Is this the first initialization? If so it&#39;s not really a rehashing +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * we just set the first hash table so that it can accept keys. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Prepare a second hash table for incremental rehashing */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>首先判断是否在rehash,在rehash中不能扩容。然后创建一个新的hash table,这个newsize是2的n次幂。expand操作在刚开始初始化时会使用,也会在这里做一个判断。更常用的是在扩容后进行rehash操作。</p> +<p>获得size的函数:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// 确保hash cap 为2的N次幂 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#75af00">_dictNextExp</span><span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#111">e</span> <span style="color:#f92672">=</span> <span style="color:#111">DICT_HT_INITIAL_EXP</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">size</span> <span style="color:#f92672">&gt;=</span> <span style="color:#111">LONG_MAX</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">(</span><span style="color:#ae81ff">8</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 1 &lt;&lt; e == 1 * 2^e +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">// 找到一个大于size 的2^e +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#ae81ff">1</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(((</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#ae81ff">1</span><span style="color:#f92672">&lt;&lt;</span><span style="color:#111">e</span><span style="color:#111">)</span> <span style="color:#f92672">&gt;=</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">e</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="渐进式rehash">渐进式Rehash</h3> +<p>直接看函数:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">dictRehash</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">n</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">empty_visits</span> <span style="color:#f92672">=</span> <span style="color:#111">n</span><span style="color:#f92672">*</span><span style="color:#ae81ff">10</span><span style="color:#111">;</span> <span style="color:#75715e">/* Max number of empty buckets to visit. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">n</span><span style="color:#f92672">--</span> <span style="color:#f92672">&amp;&amp;</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">!=</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">de</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">nextde</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Note that rehashidx can&#39;t overflow as we are sure there are more +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * elements because ht[0].used != 0 */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">assert</span><span style="color:#111">(</span><span style="color:#75af00">DICTHT_SIZE</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">])</span> <span style="color:#f92672">&gt;</span> <span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">--</span><span style="color:#111">empty_visits</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Move all the keys in this bucket from the old to the new hash HT */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">de</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">uint64_t</span> <span style="color:#111">h</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#111">nextde</span> <span style="color:#f92672">=</span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Get the index in the new hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">h</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span><span style="color:#111">)</span> <span style="color:#f92672">&amp;</span> <span style="color:#75af00">DICTHT_SIZE_MASK</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">][</span><span style="color:#111">h</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">][</span><span style="color:#111">h</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">de</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span><span style="color:#f92672">--</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span> <span style="color:#f92672">=</span> <span style="color:#111">nextde</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Check if we already rehashed the whole table... */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">zfree</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Copy the new ht onto the old one */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* More to rehash... */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>rehash除了扩容时会触发,缩容时也会触发。Redis整个rehash的实现,主要分为如下几步完成。</p> +<ol> +<li>给Hash表ht[1]申请足够的空间;扩容时空间大小为当前容量<em>2,即d-&gt;ht[0]. used</em>2;当使用量不到总空间10%时,则进行缩容。缩容时空间大小则为能恰好包含d-&gt;ht[0].used个节点的2^N次方幂整数,并把字典中字段rehashidx标识为0</li> +<li>进行rehash操作调用的是dictRehash函数,重新计算ht[0]中每个键的Hash值与索引值(重新计算就叫rehash),依次添加到新的Hash表ht[1],并把老Hash表中该键值对删除。把字典中字段rehashidx字段修改为Hash表ht[0]中正在进行rehash操作节点的索引值.</li> +<li>rehash操作后,清空ht[0],然后对调一下ht[1]与ht[0]的值,并把字典中rehashidx字段标识为-1。</li> +</ol> +<p>我们知道,Redis可以提供高性能的线上服务,而且是单进程模式,当数据库中键值对数量达到了百万、千万、亿级别时,整个rehash过程将非常缓慢,如果不优化rehash过程,可能会造成很严重的服务不可用现象。Redis优化的思想很巧妙,利用分而治之的思想了进行rehash操作,大致的步骤如下。</p> +<p>执行插入、删除、查找、修改等操作前,都先判断当前字典rehash操作是否在进行中,进行中则调用dictRehashStep函数进行rehash操作(每次只对1个节点进行rehash操作,共执行1次)。除这些操作之外,当服务空闲时,如果当前字典也需要进行rehsh操作,则会调用incrementallyRehash函数进行批量rehash操作(每次对100个节点进行rehash操作,共执行1毫秒)。在经历N次rehash操作后,整个ht[0]的数据都会迁移到ht[1]中,这样做的好处就把是本应集中处理的时间分散到了上百万、千万、亿次操作中,所以其耗时可忽略不计。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* This function performs just a step of rehashing, and only if hashing has +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * not been paused for our hash table. When we have iterators in the +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * middle of a rehashing we can&#39;t mess with the two hash tables otherwise +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * some elements can be missed or duplicated. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * This function is called by common lookup or update operations in the +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * dictionary so that the hash table automatically migrates from H1 to H2 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * while it is actively used. */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">pauserehash</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#75af00">dictRehash</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="删除-1">删除</h3> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictGenericDelete</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">nofree</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">uint64_t</span> <span style="color:#111">h</span><span style="color:#111">,</span> <span style="color:#111">idx</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">he</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">prevHe</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* dict is empty */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictSize</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">h</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">for</span> <span style="color:#111">(</span><span style="color:#111">table</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> <span style="color:#111">table</span> <span style="color:#f92672">&lt;=</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> <span style="color:#111">table</span><span style="color:#f92672">++</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">idx</span> <span style="color:#f92672">=</span> <span style="color:#111">h</span> <span style="color:#f92672">&amp;</span> <span style="color:#75af00">DICTHT_SIZE_MASK</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">he</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">][</span><span style="color:#111">idx</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 查找 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">he</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">key</span><span style="color:#f92672">==</span><span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span> <span style="color:#f92672">||</span> <span style="color:#75af00">dictCompareKeys</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span><span style="color:#111">))</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Unlink the element from the list */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">prevHe</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">else</span> <span style="color:#75715e">// 在bucket顶部,直接略过 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">][</span><span style="color:#111">idx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#111">nofree</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictFreeUnlinkedEntry</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">he</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">]</span><span style="color:#f92672">--</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">he</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">he</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#00a8c8">break</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> <span style="color:#75715e">/* not found */</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="遍历">遍历</h3> +<p>遍历Redis整个数据库主要有两种方式:全遍历(例如keys命令)、间断遍历(hscan命令):</p> +<ul> +<li>全遍历: 一次命令执行就遍历完整个数据库。</li> +<li>间断遍历: 每次命令执行只取部分数据,分多次遍历。</li> +</ul> +<p>迭代器——可在容器(容器可为字典、链表等数据结构)上遍访的接口,设计人员无须关心容器的内容,调用迭代器固定的接口就可遍历数据,在很多高级语言中都有实现。</p> +<p>字典迭代器主要用于迭代字典这个数据结构中的数据,既然是迭代字典中的数据,必然会出现一个问题,迭代过程中,如果发生了数据增删,则可能导致字典触发rehash操作,或迭代开始时字典正在进行rehash操作,从而导致一条数据可能多次遍历到。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#111">dictIterator</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">index</span><span style="color:#111">;</span> <span style="color:#75715e">// 迭代hash中的索引值 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">// safe 为1表示是安全迭代器,可以在add,find等rehash场景中使用 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">table</span><span style="color:#111">,</span> <span style="color:#111">safe</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// entry 当前读取节点,nextEntry entry 节点的next字段 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">nextEntry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* unsafe iterator fingerprint for misuse detection. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#111">fingerprint</span><span style="color:#111">;</span><span style="color:#75715e">// 字典指纹,字典发生改变随之改变 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> <span style="color:#111">dictIterator</span><span style="color:#111">;</span> +</span></span></code></pre></div><p>fingerprint字段是一个64位的整数,表示在给定时间内字典的状态。在这里称其为字典的指纹,因为该字段的值为字典(dict结构体)中所有字段值组合在一起生成的Hash值,所以当字典中数据发生任何变化时,其值都会不同,生成算法可参见源码dict.c文件中的dictFingerprint函数。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* A fingerprint is a 64 bit number that represents the state of the dictionary +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * at a given time, it&#39;s just a few dict properties xored together. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * When an unsafe iterator is initialized, we get the dict fingerprint, and check +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * the fingerprint again when the iterator is released. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * If the two fingerprints are different it means that the user of the iterator +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * performed forbidden operations against the dictionary while iterating. */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#75af00">dictFingerprint</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">6</span><span style="color:#111">],</span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">j</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">3</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">4</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">5</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* We hash N integers by summing every successive integer with the integer +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * hashing of the previous sum. Basically: +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * Result = hash(hash(hash(int1)+int2)+int3) ... +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * This way the same set of integers in a different order will (likely) hash +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * to a different number. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">for</span> <span style="color:#111">(</span><span style="color:#111">j</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> <span style="color:#111">j</span> <span style="color:#f92672">&lt;</span> <span style="color:#ae81ff">6</span><span style="color:#111">;</span> <span style="color:#111">j</span><span style="color:#f92672">++</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">+=</span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* For the hashing step we use Tomas Wang&#39;s 64 bit integer hash. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#f92672">~</span><span style="color:#111">hash</span><span style="color:#111">)</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">21</span><span style="color:#111">);</span> <span style="color:#75715e">// hash = (hash &lt;&lt; 21) - hash - 1; +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">24</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">3</span><span style="color:#111">))</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">8</span><span style="color:#111">);</span> <span style="color:#75715e">// hash * 265 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">14</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">2</span><span style="color:#111">))</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">4</span><span style="color:#111">);</span> <span style="color:#75715e">// hash * 21 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">28</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">31</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">hash</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>根据迭代器结构中的safe字段,将迭代器分为普通迭代器和安全迭代器:</p> +<ul> +<li>普通迭代器: 只遍历数据</li> +<li>安全迭代器: 遍历的同时删除数据</li> +</ul> +<h4 id="普通迭代器">普通迭代器</h4> +<p>普通迭代器迭代字典中数据时,会对迭代器中fingerprint字段的值作严格的校验,来保证迭代过程中字典结构不发生任何变化,确保读取出的数据不出现重复</p> +<p>当Redis执行部分命令时会使用普通迭代器迭代字典数据,例如sort命令。sort命令主要作用是对给定列表、集合、有序集合的元素进行排序,如果给定的是有序集合,其成员名存储用的是字典,分值存储用的是跳跃表,则执行sort命令读取数据的时候会用到迭代器来遍历整个字典。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">set</span> <span style="color:#f92672">=</span> <span style="color:#111">((</span><span style="color:#111">zset</span><span style="color:#f92672">*</span><span style="color:#111">)</span><span style="color:#111">sortval</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ptr</span><span style="color:#111">)</span><span style="color:#f92672">-&gt;</span><span style="color:#111">dict</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">di</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">setele</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">sds</span> <span style="color:#111">sdsele</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">di</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictGetIterator</span><span style="color:#111">(</span><span style="color:#111">set</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">((</span><span style="color:#111">setele</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictNext</span><span style="color:#111">(</span><span style="color:#111">di</span><span style="color:#111">))</span> <span style="color:#f92672">!=</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">sdsele</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictGetKey</span><span style="color:#111">(</span><span style="color:#111">setele</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">obj</span> <span style="color:#f92672">=</span> <span style="color:#75af00">createStringObject</span><span style="color:#111">(</span><span style="color:#111">sdsele</span><span style="color:#111">,</span><span style="color:#75af00">sdslen</span><span style="color:#111">(</span><span style="color:#111">sdsele</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">u</span><span style="color:#111">.</span><span style="color:#111">score</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">u</span><span style="color:#111">.</span><span style="color:#111">cmpobj</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">j</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictReleaseIterator</span><span style="color:#111">(</span><span style="color:#111">di</span><span style="color:#111">);</span> +</span></span></code></pre></div><ol> +<li> +<p>调用dictGetIterator函数初始化一个普通迭代器,此时会把iter-&gt;safe值置为0,表示初始化的迭代器为普通迭代器</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">void</span> <span style="color:#75af00">dictInitIterator</span><span style="color:#111">(</span><span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">iter</span><span style="color:#111">,</span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">d</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">table</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">index</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">safe</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">nextEntry</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictGetIterator</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">iter</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">iter</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictInitIterator</span><span style="color:#111">(</span><span style="color:#111">iter</span><span style="color:#111">,</span> <span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">iter</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div></li> +<li> +<p>循环调用dictNext函数依次遍历字典中Hash表的节点,首次遍历时会通过dictFingerprint函数拿到当前字典的指纹值。</p> +</li> +<li></li> +</ol> +<h4 id="安全迭代器">安全迭代器</h4> + diff --git a/docs/page/2/index.html b/docs/page/2/index.html index 27fdefc..3827cf1 100644 --- a/docs/page/2/index.html +++ b/docs/page/2/index.html @@ -334,7 +334,7 @@

Built with Hugo and theme Tokiwa.
- 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git a/docs/post/index.html b/docs/post/index.html index 9b61199..d06b69d 100644 --- a/docs/post/index.html +++ b/docs/post/index.html @@ -385,7 +385,7 @@

2022

Built with Hugo and theme Tokiwa.
- 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git a/docs/post/index.xml b/docs/post/index.xml index fc59cd1..5d3b693 100644 --- a/docs/post/index.xml +++ b/docs/post/index.xml @@ -23,6 +23,25 @@ <p>详细的内容见 <a href="https://lzphi.cn/2020/12/20/2020-12-17-Tangle-%E7%99%BD%E7%9A%AE%E4%B9%A6/">Tangle白皮书中文版</a></p> <p><strong>tangle</strong> 是 <strong>IOTA</strong> 所用的技术,为物联网和小额支付提供支持。不同于常见的区块链,它使用一个DAG(有向无环图)作为结构,这里称为Tangle。</p> <p>传统区块链系统的单链结构在交易认证,吞吐量,资源消耗等方面存在缺陷,DAG结构的区块链是一个有效的解决方案。</p> +<h2 id="fabric-白皮书">Fabric 白皮书</h2> +<p>Hyperledger Fabric 是 Linux 基金会 的 一个项目,是Hyperledger下面的一个子项目。作为一个开源联盟链,被很多项目应用。</p> +<p>它的主要特点是模块化的共识机制,相对高性能,和可以使用常规语言编写智能合约(golang)。</p> +<h3 id="概念">概念</h3> +<h4 id="联盟链">联盟链</h4> +<p>文中划分联盟链和公链的标准是: <strong>是否发币和节点身份是否可知</strong></p> +<p>状态机复制(SMR)是建设弹性应用众所周知的方式,但是如果我们把运行在区块链上的智能合约看作一种分布式应用,与传统的SMR区别在于:</p> +<ul> +<li>许多应用并发运行</li> +<li>这些应用可以被任何人动态地部署</li> +<li>这些应用的代码是不被信任的,可能有恶意</li> +</ul> +<h4 id="order-execute">order-execute</h4> +<p>现有的大部分可以运行智能合约的区块链遵循SMR实现一种order-execute的架构: 节点先将交易排序再将它们广播给其他节点,然后每个节点顺序执行。</p> +<p><img src="https://s2.loli.net/2022/12/06/B4Ns3GZAKl8dIXT.png" alt=""></p> +<p>所有节点对所有交易的顺序执行限制了性能,并且需要采取复杂的措施来防止源自不受信任的合约(例如在以太坊中使用“gas”计算运行时)的针对平台的拒绝服务攻击;智能合约很难做到并发。</p> +<p>最大的限制是交易必须是确定的,这就使得不能使用常规编程语言来实现,必须使用特定的语言。</p> +<h4 id="execute-order-validate">execute-order-validate</h4> +<p><img src="https://s2.loli.net/2022/12/07/jDBxcLmYrfXSnbl.png" alt=""></p> @@ -398,6 +417,8 @@ discriminative aggregation (<strong>判别聚合</strong>)</p> <h4 id="算法流程">算法流程</h4> <p><img src="https://s2.loli.net/2022/11/15/kxSZgVusbUM29hY.png" alt=""></p> <p>每轮开始时,服务器先检查客户端的模型,根据给定的超参数$\tau$和滞后容忍算法来分配模型。服务器收集上传的更新,错过上次更新的节点会被优先采集。等采集到的更新满足预先设置的标准后,执行三步合并,然后更新缓存状态。</p> +<h2 id="iot-22a-blockchain-based-model-migration-approach-for-secure-and-sustainable-federated-learning-in-iot-systems">IOT ‘22《A Blockchain-based Model Migration Approach for Secure and Sustainable Federated Learning in IoT Systems》</h2> +<h3 id="背景-2">背景</h3> @@ -643,6 +664,9 @@ term变为更高的term,投票变为null。</p> https://chi-kai.github.io/post/redis%E6%BA%90%E7%A0%81%E5%89%96%E6%9E%90-%E4%B8%80/ <h1 id="基础数据结构部分">基础数据结构部分</h1> <h2 id="动态字符串-sds">动态字符串 SDS</h2> +<p>实现在 sds.h/sds.c。</p> +<h3 id="设计原则">设计原则</h3> +<p>为什么不使用c语言原生的字符串操作库? c字符串用'\0'作为终止符,不能满足二进制安全,而且求字符串长度,拼接等操作都要遍历到'\0'来实现,需要自己控制内存使用,操作复杂度高。</p> <h3 id="前置知识">前置知识</h3> <p>由于我对C语言没有深入了解,有很多知识点会在前面补充。</p> <ul> @@ -680,7 +704,8 @@ term变为更高的term,投票变为null。</p> </span></span><span style="display:flex;"><span> <span style="color:#75715e">// 柔性数组,没有分配之前不占内存 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">char</span> <span style="color:#111">buf</span><span style="color:#111">[];</span> </span></span><span style="display:flex;"><span><span style="color:#111">};</span> -</span></span></code></pre></div><h4 id="二进制安全">二进制安全</h4> +</span></span></code></pre></div><p>记录了已经使用的空间和分配的空间,比C字符串操作效率更高。和 C 语言中的字符串操作相比,SDS 通过记录字符数组的使用长度和分配空间大小,避免了对字符串的遍历操作,降低了操作开销,进一步就可以帮助诸多字符串操作更加高效地完成,比如创建、追加、复制、比较等。</p> +<h4 id="二进制安全">二进制安全</h4> <pre><code>什么是二进制安全?通俗地讲,C语言中,用“\0”表示字符串的结束,如果字符串中本身就有“\0”字符,字符串就会被截断,即非二进制安全;若通过某种机制,保证读写字符串时不损害其内容,则是二进制安全。在网络报文中常常需要二进制安全。 sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;,保障了二进制安全。 @@ -1043,7 +1068,7 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span><span style="display:flex;"><span><span style="color:#111">}</span> </span></span></code></pre></div><h2 id="压缩列表">压缩列表</h2> <p>具体的实现在ziplist.h和ziplist.c</p> -<p>压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是O(1)。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。</p> +<p>压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是 <strong>O(1)</strong> 。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// ziplist 结构 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#f92672">&lt;</span><span style="color:#111">zlbytes</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zltail</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zllen</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#111">...</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zlend</span><span style="color:#f92672">&gt;</span> </span></span></code></pre></div><p>这里的所有结构都是按照小端存储。</p> @@ -1120,6 +1145,7 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span></span><span style="display:flex;"><span><span style="color:#75715e"> is, this points to prev-entry-len field. */</span> </span></span><span style="display:flex;"><span><span style="color:#111">}</span> <span style="color:#111">zlentry</span><span style="color:#111">;</span> </span></span></code></pre></div><p>对于压缩列表的任意元素,获取前一个元素的长度、判断存储的数据类型、获取数据内容都需要经过复杂的解码运算。解码后的结果应该被缓存起来,为此定义了结构体zlentry,用于表示解码后的压缩列表元素。</p> +<p>解码操作,主要用宏实现:</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">inline</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">zipEntry</span><span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#111">p</span><span style="color:#111">,</span> <span style="color:#111">zlentry</span> <span style="color:#f92672">*</span><span style="color:#111">e</span><span style="color:#111">)</span> <span style="color:#111">{</span> </span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIP_DECODE_PREVLEN</span><span style="color:#111">(</span><span style="color:#111">p</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlen</span><span style="color:#111">);</span> </span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIP_ENTRY_ENCODING</span><span style="color:#111">(</span><span style="color:#111">p</span> <span style="color:#f92672">+</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">encoding</span><span style="color:#111">);</span> @@ -1128,7 +1154,22 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">headersize</span> <span style="color:#f92672">=</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span> <span style="color:#f92672">+</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">lensize</span><span style="color:#111">;</span> </span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">p</span> <span style="color:#f92672">=</span> <span style="color:#111">p</span><span style="color:#111">;</span> </span></span><span style="display:flex;"><span><span style="color:#111">}</span> -</span></span></code></pre></div><h2 id="字典">字典</h2> +</span></span></code></pre></div><p>这里主要就是对字节的读取,可以去看源代码。</p> +<h3 id="操作">操作</h3> +<h4 id="创建-2">创建</h4> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Create a new empty ziplist. */</span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">// 先申请初始的空间(4+4+2+1),再对zlbytes,zltail,zllen,zlend逐个初始化 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#75af00">ziplistNew</span><span style="color:#111">(</span><span style="color:#00a8c8">void</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">int</span> <span style="color:#111">bytes</span> <span style="color:#f92672">=</span> <span style="color:#111">ZIPLIST_HEADER_SIZE</span><span style="color:#f92672">+</span><span style="color:#111">ZIPLIST_END_SIZE</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#111">zl</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#111">bytes</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_BYTES</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#75af00">intrev32ifbe</span><span style="color:#111">(</span><span style="color:#111">bytes</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_TAIL_OFFSET</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#75af00">intrev32ifbe</span><span style="color:#111">(</span><span style="color:#111">ZIPLIST_HEADER_SIZE</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_LENGTH</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">zl</span><span style="color:#111">[</span><span style="color:#111">bytes</span><span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">ZIP_END</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">zl</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h4 id="插入元素">插入元素</h4> +<h2 id="字典">字典</h2> <h3 id="结构-1">结构</h3> <p>节点:</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#111">dictEntry</span> <span style="color:#111">{</span> @@ -1149,17 +1190,400 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span></code></pre></div><p>可以看出是使用链表法来解决hash冲突的。</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">struct</span> <span style="color:#111">dict</span> <span style="color:#111">{</span> </span></span><span style="display:flex;"><span> <span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">;</span> <span style="color:#75715e">// 对应特定类型操作函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">// 哈希表。有两个,一个正常使用,另外一个在rehash时使用 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">// 记录每个哈希表被使用的数目。 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> -</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> -</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> -</span></span><span style="display:flex;"><span> </span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">rehashidx</span><span style="color:#111">;</span> <span style="color:#75715e">/* rehashing not in progress if rehashidx == -1 */</span> </span></span><span style="display:flex;"><span> </span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Keep small vars at end for optimal (minimal) struct padding */</span> </span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int16_t</span> <span style="color:#111">pauserehash</span><span style="color:#111">;</span> <span style="color:#75715e">/* If &gt;0 rehashing is paused (&lt;0 indicates coding error) */</span> -</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">/* exponent of size. (size = 1&lt;&lt;exp) */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// size 的 系数,size 是2 的N次幂 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">/* exponent of size. (size = 1&lt;&lt;exp) */</span> </span></span><span style="display:flex;"><span><span style="color:#111">};</span> -</span></span></code></pre></div> +</span></span></code></pre></div><p>这里可以看到一个dictType 用来对应特定类型的操作函数,这些函数体现了面向对象编程的思想,会在后面合适的时机用到。</p> +<p>比如找个hashFunction 用来控制dict使用的hash函数,默认为siphash。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-go" data-lang="go"><span style="display:flex;"><span><span style="color:#75af00">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#75af00">dictType</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">uint64_t</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">hashFunction</span><span style="color:#111">)(</span><span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// hash函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyDup</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// key的 复制函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">valDup</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">obj</span><span style="color:#111">);</span> <span style="color:#75715e">// val 的复制函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyCompare</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key1</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key2</span><span style="color:#111">);</span> <span style="color:#75715e">// key 对比函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyDestructor</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// key 销毁函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">valDestructor</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">obj</span><span style="color:#111">);</span> <span style="color:#75715e">// val 销毁函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">expandAllowed</span><span style="color:#111">)(</span><span style="color:#75af00">size_t</span> <span style="color:#75af00">moreMem</span><span style="color:#111">,</span> <span style="color:#75af00">double</span> <span style="color:#75af00">usedRatio</span><span style="color:#111">);</span> <span style="color:#75715e">//扩展函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">/* Allow a dictEntry to carry extra caller-defined metadata. The +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * extra memory is initialized to 0 when a dictEntry is allocated. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">size_t</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">dictEntryMetadataBytes</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">);</span> <span style="color:#75715e">// 元数据 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> <span style="color:#75af00">dictType</span><span style="color:#111">;</span> +</span></span></code></pre></div><h3 id="创建-3">创建</h3> +<p>先申请空间,再初始化参数</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Reset hash table parameters already initialized with _dictInit()*/</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">htidx</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">/* Create a new hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictCreate</span><span style="color:#111">(</span><span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictInit</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">type</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">/* Initialize the hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">_dictInit</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">0</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">type</span> <span style="color:#f92672">=</span> <span style="color:#111">type</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">pauserehash</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> <span style="color:#75715e">// 使用一些宏来反馈结果 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="增加与扩容">增加与扩容</h3> +<p>这里先提前讲一下Rehash的概念,便于理解增加扩容中的一些操作:</p> +<p>扩容后,字典容量及掩码值会发生改变,同一个键与掩码经位运算后得到的索引值就会发生改变,从而导致根据键查找不到值的情况。解决这个问题的方法是,<strong>新扩容的内存放到一个全新的Hash表中(ht[1]),并给字典打上在进行rehash操作中的标识(即rehashidx! =-1)</strong>。此后,新添加的键值对都往新的Hash表中存储;而修改、删除、查找操作需要在ht[0]、ht[1]中进行检查,然后再决定去对哪个Hash表操作。除此之外,还需要把老Hash表(ht[0])中的数据重新计算索引值后全部迁移插入到新的Hash表(ht[1])中,此迁移过程称作rehash。</p> +<p>先看增加单个entry的操作:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Add an element to the target hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">dictAdd</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">val</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictAddRaw</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">key</span><span style="color:#111">,</span><span style="color:#111">NULL</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#111">entry</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictSetVal</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#111">val</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictAddRaw</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">existing</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">index</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">htidx</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 如果正在rehash,在add时进行一步rehash,这里是将大范围的rehash分散来减小资源集中消耗 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Get the index of the new element, or -1 if +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * the element already exists. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">((</span><span style="color:#111">index</span> <span style="color:#f92672">=</span> <span style="color:#75af00">_dictKeyIndex</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">key</span><span style="color:#111">),</span> <span style="color:#111">existing</span><span style="color:#111">))</span> <span style="color:#f92672">==</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Allocate the memory and store the new entry. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * Insert the element in top, with the assumption that in a database +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * system it is more likely that recently added entries are accessed +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * more frequently. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">htidx</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">?</span> <span style="color:#ae81ff">1</span> <span style="color:#f92672">:</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">size_t</span> <span style="color:#111">metasize</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictMetadataSize</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">)</span> <span style="color:#f92672">+</span> <span style="color:#111">metasize</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">metasize</span> <span style="color:#f92672">&gt;</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">memset</span><span style="color:#111">(</span><span style="color:#75af00">dictMetadata</span><span style="color:#111">(</span><span style="color:#111">entry</span><span style="color:#111">),</span> <span style="color:#ae81ff">0</span><span style="color:#111">,</span> <span style="color:#111">metasize</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 插入在顶部:根据时空局限性 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">entry</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">][</span><span style="color:#111">index</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">][</span><span style="color:#111">index</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Set the hash entry fields. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictSetKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>可以看出,add调用了一个底层的addraw函数。addraw首先使用dictkeyindex来查找一个合适的插入位置,如果这个key已经存在就退出add操作。然后确定是否在rehash,上面我们讲过如果在rehash那么 <strong>新添加的键值对都往新的Hash表中存储</strong>。后面就申请空间在相应位置顶部插入,这是数据库时空局限性的体现。</p> +<p>这里看一下dictSetKey和dictSetVal:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">#define dictSetKey(d, entry, _key_) do { \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> if ((d)-&gt;type-&gt;keyDup) \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;key = (d)-&gt;type-&gt;keyDup((d), _key_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> else \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;key = (_key_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e">} while(0) +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">#define dictSetVal(d, entry, _val_) do { \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> if ((d)-&gt;type-&gt;valDup) \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;v.val = (d)-&gt;type-&gt;valDup((d), _val_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> else \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;v.val = (_val_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e">} while(0) +</span></span></span></code></pre></div><p>可以看出是用宏的形式调用dict的dicttype函数,也就是说这些操作是可以调整的。</p> +<p>扩容操作:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// 将d扩容到2^size的大小 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">int</span> <span style="color:#75af00">_dictExpand</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">size</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span><span style="color:#f92672">*</span> <span style="color:#111">malloc_failed</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> <span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* the size is invalid if it is smaller than the number of +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * elements already inside the hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">||</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">&gt;</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* the new hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">new_ht_size_exp</span> <span style="color:#f92672">=</span> <span style="color:#75af00">_dictNextExp</span><span style="color:#111">(</span><span style="color:#111">size</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Detect overflows */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">size_t</span> <span style="color:#111">newsize</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">1ul</span><span style="color:#f92672">&lt;&lt;</span><span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 后者判断在什么时候成立? +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">newsize</span> <span style="color:#f92672">&lt;</span> <span style="color:#111">size</span> <span style="color:#f92672">||</span> <span style="color:#111">newsize</span> <span style="color:#f92672">*</span> <span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">)</span> <span style="color:#f92672">&lt;</span> <span style="color:#111">newsize</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Rehashing to the same table size is not useful. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">new_ht_size_exp</span> <span style="color:#f92672">==</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">])</span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Allocate the new hash table and initialize all pointers to NULL */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">=</span> <span style="color:#75af00">ztrycalloc</span><span style="color:#111">(</span><span style="color:#111">newsize</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> <span style="color:#00a8c8">else</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zcalloc</span><span style="color:#111">(</span><span style="color:#111">newsize</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 新的hash表被使用的数量 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">new_ht_used</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Is this the first initialization? If so it&#39;s not really a rehashing +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * we just set the first hash table so that it can accept keys. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Prepare a second hash table for incremental rehashing */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>首先判断是否在rehash,在rehash中不能扩容。然后创建一个新的hash table,这个newsize是2的n次幂。expand操作在刚开始初始化时会使用,也会在这里做一个判断。更常用的是在扩容后进行rehash操作。</p> +<p>获得size的函数:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// 确保hash cap 为2的N次幂 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#75af00">_dictNextExp</span><span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#111">e</span> <span style="color:#f92672">=</span> <span style="color:#111">DICT_HT_INITIAL_EXP</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">size</span> <span style="color:#f92672">&gt;=</span> <span style="color:#111">LONG_MAX</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">(</span><span style="color:#ae81ff">8</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 1 &lt;&lt; e == 1 * 2^e +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">// 找到一个大于size 的2^e +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#ae81ff">1</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(((</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#ae81ff">1</span><span style="color:#f92672">&lt;&lt;</span><span style="color:#111">e</span><span style="color:#111">)</span> <span style="color:#f92672">&gt;=</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">e</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="渐进式rehash">渐进式Rehash</h3> +<p>直接看函数:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">dictRehash</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">n</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">empty_visits</span> <span style="color:#f92672">=</span> <span style="color:#111">n</span><span style="color:#f92672">*</span><span style="color:#ae81ff">10</span><span style="color:#111">;</span> <span style="color:#75715e">/* Max number of empty buckets to visit. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">n</span><span style="color:#f92672">--</span> <span style="color:#f92672">&amp;&amp;</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">!=</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">de</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">nextde</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Note that rehashidx can&#39;t overflow as we are sure there are more +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * elements because ht[0].used != 0 */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">assert</span><span style="color:#111">(</span><span style="color:#75af00">DICTHT_SIZE</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">])</span> <span style="color:#f92672">&gt;</span> <span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">--</span><span style="color:#111">empty_visits</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Move all the keys in this bucket from the old to the new hash HT */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">de</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">uint64_t</span> <span style="color:#111">h</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#111">nextde</span> <span style="color:#f92672">=</span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Get the index in the new hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">h</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span><span style="color:#111">)</span> <span style="color:#f92672">&amp;</span> <span style="color:#75af00">DICTHT_SIZE_MASK</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">][</span><span style="color:#111">h</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">][</span><span style="color:#111">h</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">de</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span><span style="color:#f92672">--</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span> <span style="color:#f92672">=</span> <span style="color:#111">nextde</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Check if we already rehashed the whole table... */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">zfree</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Copy the new ht onto the old one */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* More to rehash... */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>rehash除了扩容时会触发,缩容时也会触发。Redis整个rehash的实现,主要分为如下几步完成。</p> +<ol> +<li>给Hash表ht[1]申请足够的空间;扩容时空间大小为当前容量<em>2,即d-&gt;ht[0]. used</em>2;当使用量不到总空间10%时,则进行缩容。缩容时空间大小则为能恰好包含d-&gt;ht[0].used个节点的2^N次方幂整数,并把字典中字段rehashidx标识为0</li> +<li>进行rehash操作调用的是dictRehash函数,重新计算ht[0]中每个键的Hash值与索引值(重新计算就叫rehash),依次添加到新的Hash表ht[1],并把老Hash表中该键值对删除。把字典中字段rehashidx字段修改为Hash表ht[0]中正在进行rehash操作节点的索引值.</li> +<li>rehash操作后,清空ht[0],然后对调一下ht[1]与ht[0]的值,并把字典中rehashidx字段标识为-1。</li> +</ol> +<p>我们知道,Redis可以提供高性能的线上服务,而且是单进程模式,当数据库中键值对数量达到了百万、千万、亿级别时,整个rehash过程将非常缓慢,如果不优化rehash过程,可能会造成很严重的服务不可用现象。Redis优化的思想很巧妙,利用分而治之的思想了进行rehash操作,大致的步骤如下。</p> +<p>执行插入、删除、查找、修改等操作前,都先判断当前字典rehash操作是否在进行中,进行中则调用dictRehashStep函数进行rehash操作(每次只对1个节点进行rehash操作,共执行1次)。除这些操作之外,当服务空闲时,如果当前字典也需要进行rehsh操作,则会调用incrementallyRehash函数进行批量rehash操作(每次对100个节点进行rehash操作,共执行1毫秒)。在经历N次rehash操作后,整个ht[0]的数据都会迁移到ht[1]中,这样做的好处就把是本应集中处理的时间分散到了上百万、千万、亿次操作中,所以其耗时可忽略不计。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* This function performs just a step of rehashing, and only if hashing has +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * not been paused for our hash table. When we have iterators in the +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * middle of a rehashing we can&#39;t mess with the two hash tables otherwise +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * some elements can be missed or duplicated. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * This function is called by common lookup or update operations in the +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * dictionary so that the hash table automatically migrates from H1 to H2 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * while it is actively used. */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">pauserehash</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#75af00">dictRehash</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="删除-1">删除</h3> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictGenericDelete</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">nofree</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">uint64_t</span> <span style="color:#111">h</span><span style="color:#111">,</span> <span style="color:#111">idx</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">he</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">prevHe</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* dict is empty */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictSize</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">h</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">for</span> <span style="color:#111">(</span><span style="color:#111">table</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> <span style="color:#111">table</span> <span style="color:#f92672">&lt;=</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> <span style="color:#111">table</span><span style="color:#f92672">++</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">idx</span> <span style="color:#f92672">=</span> <span style="color:#111">h</span> <span style="color:#f92672">&amp;</span> <span style="color:#75af00">DICTHT_SIZE_MASK</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">he</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">][</span><span style="color:#111">idx</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 查找 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">he</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">key</span><span style="color:#f92672">==</span><span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span> <span style="color:#f92672">||</span> <span style="color:#75af00">dictCompareKeys</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span><span style="color:#111">))</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Unlink the element from the list */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">prevHe</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">else</span> <span style="color:#75715e">// 在bucket顶部,直接略过 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">][</span><span style="color:#111">idx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#111">nofree</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictFreeUnlinkedEntry</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">he</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">]</span><span style="color:#f92672">--</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">he</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">he</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#00a8c8">break</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> <span style="color:#75715e">/* not found */</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="遍历">遍历</h3> +<p>遍历Redis整个数据库主要有两种方式:全遍历(例如keys命令)、间断遍历(hscan命令):</p> +<ul> +<li>全遍历: 一次命令执行就遍历完整个数据库。</li> +<li>间断遍历: 每次命令执行只取部分数据,分多次遍历。</li> +</ul> +<p>迭代器——可在容器(容器可为字典、链表等数据结构)上遍访的接口,设计人员无须关心容器的内容,调用迭代器固定的接口就可遍历数据,在很多高级语言中都有实现。</p> +<p>字典迭代器主要用于迭代字典这个数据结构中的数据,既然是迭代字典中的数据,必然会出现一个问题,迭代过程中,如果发生了数据增删,则可能导致字典触发rehash操作,或迭代开始时字典正在进行rehash操作,从而导致一条数据可能多次遍历到。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#111">dictIterator</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">index</span><span style="color:#111">;</span> <span style="color:#75715e">// 迭代hash中的索引值 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">// safe 为1表示是安全迭代器,可以在add,find等rehash场景中使用 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">table</span><span style="color:#111">,</span> <span style="color:#111">safe</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// entry 当前读取节点,nextEntry entry 节点的next字段 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">nextEntry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* unsafe iterator fingerprint for misuse detection. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#111">fingerprint</span><span style="color:#111">;</span><span style="color:#75715e">// 字典指纹,字典发生改变随之改变 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> <span style="color:#111">dictIterator</span><span style="color:#111">;</span> +</span></span></code></pre></div><p>fingerprint字段是一个64位的整数,表示在给定时间内字典的状态。在这里称其为字典的指纹,因为该字段的值为字典(dict结构体)中所有字段值组合在一起生成的Hash值,所以当字典中数据发生任何变化时,其值都会不同,生成算法可参见源码dict.c文件中的dictFingerprint函数。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* A fingerprint is a 64 bit number that represents the state of the dictionary +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * at a given time, it&#39;s just a few dict properties xored together. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * When an unsafe iterator is initialized, we get the dict fingerprint, and check +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * the fingerprint again when the iterator is released. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * If the two fingerprints are different it means that the user of the iterator +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * performed forbidden operations against the dictionary while iterating. */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#75af00">dictFingerprint</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">6</span><span style="color:#111">],</span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">j</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">3</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">4</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">5</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* We hash N integers by summing every successive integer with the integer +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * hashing of the previous sum. Basically: +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * Result = hash(hash(hash(int1)+int2)+int3) ... +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * This way the same set of integers in a different order will (likely) hash +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * to a different number. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">for</span> <span style="color:#111">(</span><span style="color:#111">j</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> <span style="color:#111">j</span> <span style="color:#f92672">&lt;</span> <span style="color:#ae81ff">6</span><span style="color:#111">;</span> <span style="color:#111">j</span><span style="color:#f92672">++</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">+=</span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* For the hashing step we use Tomas Wang&#39;s 64 bit integer hash. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#f92672">~</span><span style="color:#111">hash</span><span style="color:#111">)</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">21</span><span style="color:#111">);</span> <span style="color:#75715e">// hash = (hash &lt;&lt; 21) - hash - 1; +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">24</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">3</span><span style="color:#111">))</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">8</span><span style="color:#111">);</span> <span style="color:#75715e">// hash * 265 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">14</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">2</span><span style="color:#111">))</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">4</span><span style="color:#111">);</span> <span style="color:#75715e">// hash * 21 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">28</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">31</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">hash</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>根据迭代器结构中的safe字段,将迭代器分为普通迭代器和安全迭代器:</p> +<ul> +<li>普通迭代器: 只遍历数据</li> +<li>安全迭代器: 遍历的同时删除数据</li> +</ul> +<h4 id="普通迭代器">普通迭代器</h4> +<p>普通迭代器迭代字典中数据时,会对迭代器中fingerprint字段的值作严格的校验,来保证迭代过程中字典结构不发生任何变化,确保读取出的数据不出现重复</p> +<p>当Redis执行部分命令时会使用普通迭代器迭代字典数据,例如sort命令。sort命令主要作用是对给定列表、集合、有序集合的元素进行排序,如果给定的是有序集合,其成员名存储用的是字典,分值存储用的是跳跃表,则执行sort命令读取数据的时候会用到迭代器来遍历整个字典。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">set</span> <span style="color:#f92672">=</span> <span style="color:#111">((</span><span style="color:#111">zset</span><span style="color:#f92672">*</span><span style="color:#111">)</span><span style="color:#111">sortval</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ptr</span><span style="color:#111">)</span><span style="color:#f92672">-&gt;</span><span style="color:#111">dict</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">di</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">setele</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">sds</span> <span style="color:#111">sdsele</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">di</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictGetIterator</span><span style="color:#111">(</span><span style="color:#111">set</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">((</span><span style="color:#111">setele</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictNext</span><span style="color:#111">(</span><span style="color:#111">di</span><span style="color:#111">))</span> <span style="color:#f92672">!=</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">sdsele</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictGetKey</span><span style="color:#111">(</span><span style="color:#111">setele</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">obj</span> <span style="color:#f92672">=</span> <span style="color:#75af00">createStringObject</span><span style="color:#111">(</span><span style="color:#111">sdsele</span><span style="color:#111">,</span><span style="color:#75af00">sdslen</span><span style="color:#111">(</span><span style="color:#111">sdsele</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">u</span><span style="color:#111">.</span><span style="color:#111">score</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">u</span><span style="color:#111">.</span><span style="color:#111">cmpobj</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">j</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictReleaseIterator</span><span style="color:#111">(</span><span style="color:#111">di</span><span style="color:#111">);</span> +</span></span></code></pre></div><ol> +<li> +<p>调用dictGetIterator函数初始化一个普通迭代器,此时会把iter-&gt;safe值置为0,表示初始化的迭代器为普通迭代器</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">void</span> <span style="color:#75af00">dictInitIterator</span><span style="color:#111">(</span><span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">iter</span><span style="color:#111">,</span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">d</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">table</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">index</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">safe</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">nextEntry</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictGetIterator</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">iter</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">iter</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictInitIterator</span><span style="color:#111">(</span><span style="color:#111">iter</span><span style="color:#111">,</span> <span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">iter</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div></li> +<li> +<p>循环调用dictNext函数依次遍历字典中Hash表的节点,首次遍历时会通过dictFingerprint函数拿到当前字典的指纹值。</p> +</li> +<li></li> +</ol> +<h4 id="安全迭代器">安全迭代器</h4> + diff --git "a/docs/post/redis\346\272\220\347\240\201\345\211\226\346\236\220-\344\270\200/index.html" "b/docs/post/redis\346\272\220\347\240\201\345\211\226\346\236\220-\344\270\200/index.html" index 09be67e..222f8d3 100644 --- "a/docs/post/redis\346\272\220\347\240\201\345\211\226\346\236\220-\344\270\200/index.html" +++ "b/docs/post/redis\346\272\220\347\240\201\345\211\226\346\236\220-\344\270\200/index.html" @@ -188,6 +188,7 @@

更新中

  • 动态字符串 SDS @@ -227,6 +234,9 @@

    更新中

    基础数据结构部分

    动态字符串 SDS

    +

    实现在 sds.h/sds.c。

    +

    设计原则

    +

    为什么不使用c语言原生的字符串操作库? c字符串用'\0'作为终止符,不能满足二进制安全,而且求字符串长度,拼接等操作都要遍历到'\0'来实现,需要自己控制内存使用,操作复杂度高。

    前置知识

    由于我对C语言没有深入了解,有很多知识点会在前面补充。

      @@ -264,7 +274,8 @@

      数据结构

      // 柔性数组,没有分配之前不占内存 char buf[]; }; -

    二进制安全

    +

    记录了已经使用的空间和分配的空间,比C字符串操作效率更高。和 C 语言中的字符串操作相比,SDS 通过记录字符数组的使用长度和分配空间大小,避免了对字符串的遍历操作,降低了操作开销,进一步就可以帮助诸多字符串操作更加高效地完成,比如创建、追加、复制、比较等。

    +

    二进制安全

    什么是二进制安全?通俗地讲,C语言中,用“\0”表示字符串的结束,如果字符串中本身就有“\0”字符,字符串就会被截断,即非二进制安全;若通过某种机制,保证读写字符串时不损害其内容,则是二进制安全。在网络报文中常常需要二进制安全。
     
     sds使用 len 来控制字符串长度,而不是使用"\0",保障了二进制安全。 
    @@ -627,7 +638,7 @@ 

    删除

    }

    压缩列表

    具体的实现在ziplist.h和ziplist.c

    -

    压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是O(1)。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。

    +

    压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是 O(1) 。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。

    // ziplist 结构
     <zlbytes> <zltail> <zllen> <entry> <entry> ... <entry> <zlend>
     

    这里的所有结构都是按照小端存储。

    @@ -704,6 +715,7 @@

    结构

    is, this points to prev-entry-len field. */ } zlentry;

    对于压缩列表的任意元素,获取前一个元素的长度、判断存储的数据类型、获取数据内容都需要经过复杂的解码运算。解码后的结果应该被缓存起来,为此定义了结构体zlentry,用于表示解码后的压缩列表元素。

    +

    解码操作,主要用宏实现:

    static inline void zipEntry(unsigned char *p, zlentry *e) {
         ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
         ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
    @@ -712,7 +724,22 @@ 

    结构

    e->headersize = e->prevrawlensize + e->lensize; e->p = p; } -

    字典

    +

    这里主要就是对字节的读取,可以去看源代码。

    +

    操作

    +

    创建

    +
    /* Create a new empty ziplist. */
    +// 先申请初始的空间(4+4+2+1),再对zlbytes,zltail,zllen,zlend逐个初始化
    +unsigned char *ziplistNew(void) {
    +    unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE;
    +    unsigned char *zl = zmalloc(bytes);
    +    ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
    +    ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
    +    ZIPLIST_LENGTH(zl) = 0;
    +    zl[bytes-1] = ZIP_END;
    +    return zl;
    +}
    +

    插入元素

    +

    字典

    结构

    节点:

    typedef struct dictEntry {
    @@ -733,17 +760,400 @@ 

    结构

    可以看出是使用链表法来解决hash冲突的。

    struct dict {
         dictType *type; // 对应特定类型操作函数
    +    
    +    dictEntry **ht_table[2]; // 哈希表。有两个,一个正常使用,另外一个在rehash时使用
    +    unsigned long ht_used[2]; // 记录每个哈希表被使用的数目。
     
    -    dictEntry **ht_table[2];
    -    unsigned long ht_used[2];
    -
         long rehashidx; /* rehashing not in progress if rehashidx == -1 */
     
         /* Keep small vars at end for optimal (minimal) struct padding */
         int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */
    -    signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */
    +    // size 的 系数,size 是2 的N次幂
    +    signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */
     };
    -
    +

    这里可以看到一个dictType 用来对应特定类型的操作函数,这些函数体现了面向对象编程的思想,会在后面合适的时机用到。

    +

    比如找个hashFunction 用来控制dict使用的hash函数,默认为siphash。

    +
    typedef struct dictType {
    +    uint64_t (*hashFunction)(const void *key); // hash函数
    +    void *(*keyDup)(dict *d, const void *key); // key的 复制函数
    +    void *(*valDup)(dict *d, const void *obj); // val 的复制函数
    +    int (*keyCompare)(dict *d, const void *key1, const void *key2); // key 对比函数
    +    void (*keyDestructor)(dict *d, void *key); // key 销毁函数
    +    void (*valDestructor)(dict *d, void *obj); // val 销毁函数
    +    int (*expandAllowed)(size_t moreMem, double usedRatio); //扩展函数 
    +    /* Allow a dictEntry to carry extra caller-defined metadata.  The
    +     * extra memory is initialized to 0 when a dictEntry is allocated. */
    +    size_t (*dictEntryMetadataBytes)(dict *d); // 元数据
    +} dictType;
    +

    创建

    +

    先申请空间,再初始化参数

    +
    /* Reset hash table parameters already initialized with _dictInit()*/
    +static void _dictReset(dict *d, int htidx)
    +{
    +    d->ht_table[htidx] = NULL;
    +    d->ht_size_exp[htidx] = -1;
    +    d->ht_used[htidx] = 0;
    +}
    +
    +/* Create a new hash table */
    +dict *dictCreate(dictType *type)
    +{
    +    dict *d = zmalloc(sizeof(*d));
    +
    +    _dictInit(d,type);
    +    return d;
    +}
    +
    +/* Initialize the hash table */
    +int _dictInit(dict *d, dictType *type)
    +{
    +    _dictReset(d, 0);
    +    _dictReset(d, 1);
    +    d->type = type;
    +    d->rehashidx = -1;
    +    d->pauserehash = 0;
    +    return DICT_OK; // 使用一些宏来反馈结果
    +}
    +

    增加与扩容

    +

    这里先提前讲一下Rehash的概念,便于理解增加扩容中的一些操作:

    +

    扩容后,字典容量及掩码值会发生改变,同一个键与掩码经位运算后得到的索引值就会发生改变,从而导致根据键查找不到值的情况。解决这个问题的方法是,新扩容的内存放到一个全新的Hash表中(ht[1]),并给字典打上在进行rehash操作中的标识(即rehashidx! =-1)。此后,新添加的键值对都往新的Hash表中存储;而修改、删除、查找操作需要在ht[0]、ht[1]中进行检查,然后再决定去对哪个Hash表操作。除此之外,还需要把老Hash表(ht[0])中的数据重新计算索引值后全部迁移插入到新的Hash表(ht[1])中,此迁移过程称作rehash。

    +

    先看增加单个entry的操作:

    +
    /* Add an element to the target hash table */
    +int dictAdd(dict *d, void *key, void *val)
    +{
    +    dictEntry *entry = dictAddRaw(d,key,NULL);
    +    if (!entry) return DICT_ERR;
    +    dictSetVal(d, entry, val);
    +    return DICT_OK;
    +}
    +
    +dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
    +{
    +    long index;
    +    dictEntry *entry;
    +    int htidx;
    +
    +    // 如果正在rehash,在add时进行一步rehash,这里是将大范围的rehash分散来减小资源集中消耗
    +    if (dictIsRehashing(d)) _dictRehashStep(d);
    +
    +    /* Get the index of the new element, or -1 if
    +     * the element already exists. */
    +    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
    +        return NULL;
    +
    +    /* Allocate the memory and store the new entry.
    +     * Insert the element in top, with the assumption that in a database
    +     * system it is more likely that recently added entries are accessed
    +     * more frequently. */
    +    htidx = dictIsRehashing(d) ? 1 : 0;
    +    size_t metasize = dictMetadataSize(d);
    +    entry = zmalloc(sizeof(*entry) + metasize);
    +    if (metasize > 0) {
    +        memset(dictMetadata(entry), 0, metasize);
    +    }
    +    // 插入在顶部:根据时空局限性
    +    entry->next = d->ht_table[htidx][index];
    +    d->ht_table[htidx][index] = entry;
    +    d->ht_used[htidx]++;
    +
    +    /* Set the hash entry fields. */
    +    dictSetKey(d, entry, key);
    +    return entry;
    +}
    +

    可以看出,add调用了一个底层的addraw函数。addraw首先使用dictkeyindex来查找一个合适的插入位置,如果这个key已经存在就退出add操作。然后确定是否在rehash,上面我们讲过如果在rehash那么 新添加的键值对都往新的Hash表中存储。后面就申请空间在相应位置顶部插入,这是数据库时空局限性的体现。

    +

    这里看一下dictSetKey和dictSetVal:

    +
    #define dictSetKey(d, entry, _key_) do { \
    +    if ((d)->type->keyDup) \
    +        (entry)->key = (d)->type->keyDup((d), _key_); \
    +    else \
    +        (entry)->key = (_key_); \
    +} while(0)
    +
    +#define dictSetVal(d, entry, _val_) do { \
    +    if ((d)->type->valDup) \
    +        (entry)->v.val = (d)->type->valDup((d), _val_); \
    +    else \
    +        (entry)->v.val = (_val_); \
    +} while(0)
    +

    可以看出是用宏的形式调用dict的dicttype函数,也就是说这些操作是可以调整的。

    +

    扩容操作:

    +
    // 将d扩容到2^size的大小
    +int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
    +{
    +    if (malloc_failed) *malloc_failed = 0;
    +
    +    /* the size is invalid if it is smaller than the number of
    +     * elements already inside the hash table */
    +    if (dictIsRehashing(d) || d->ht_used[0] > size)
    +        return DICT_ERR;
    +
    +    /* the new hash table */
    +    dictEntry **new_ht_table;
    +    unsigned long new_ht_used;
    +    signed char new_ht_size_exp = _dictNextExp(size);
    +
    +    /* Detect overflows */
    +    size_t newsize = 1ul<<new_ht_size_exp;
    +    // 后者判断在什么时候成立?
    +    if (newsize < size || newsize * sizeof(dictEntry*) < newsize)
    +        return DICT_ERR;
    +
    +    /* Rehashing to the same table size is not useful. */
    +    if (new_ht_size_exp == d->ht_size_exp[0]) return DICT_ERR;
    +
    +    /* Allocate the new hash table and initialize all pointers to NULL */
    +    if (malloc_failed) {
    +        new_ht_table = ztrycalloc(newsize*sizeof(dictEntry*));
    +        *malloc_failed = new_ht_table == NULL;
    +        if (*malloc_failed)
    +            return DICT_ERR;
    +    } else
    +        new_ht_table = zcalloc(newsize*sizeof(dictEntry*));
    +
    +    // 新的hash表被使用的数量
    +    new_ht_used = 0;
    +
    +    /* Is this the first initialization? If so it's not really a rehashing
    +     * we just set the first hash table so that it can accept keys. */
    +    if (d->ht_table[0] == NULL) {
    +        d->ht_size_exp[0] = new_ht_size_exp;
    +        d->ht_used[0] = new_ht_used;
    +        d->ht_table[0] = new_ht_table;
    +        return DICT_OK;
    +    }
    +
    +    /* Prepare a second hash table for incremental rehashing */
    +    d->ht_size_exp[1] = new_ht_size_exp;
    +    d->ht_used[1] = new_ht_used;
    +    d->ht_table[1] = new_ht_table;
    +    d->rehashidx = 0;
    +    return DICT_OK;
    +}
    +

    首先判断是否在rehash,在rehash中不能扩容。然后创建一个新的hash table,这个newsize是2的n次幂。expand操作在刚开始初始化时会使用,也会在这里做一个判断。更常用的是在扩容后进行rehash操作。

    +

    获得size的函数:

    +
    // 确保hash cap 为2的N次幂
    +static signed char _dictNextExp(unsigned long size)
    +{
    +    unsigned char e = DICT_HT_INITIAL_EXP;
    +
    +    if (size >= LONG_MAX) return (8*sizeof(long)-1);
    +    // 1 << e == 1 * 2^e  
    +    // 找到一个大于size 的2^e
    +    while(1) {
    +        if (((unsigned long)1<<e) >= size)
    +            return e;
    +        e++;
    +    }
    +}
    +

    渐进式Rehash

    +

    直接看函数:

    +
    int dictRehash(dict *d, int n) {
    +    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    +    if (!dictIsRehashing(d)) return 0;
    +
    +    while(n-- && d->ht_used[0] != 0) {
    +        dictEntry *de, *nextde;
    +
    +        /* Note that rehashidx can't overflow as we are sure there are more
    +         * elements because ht[0].used != 0 */
    +        assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx);
    +        while(d->ht_table[0][d->rehashidx] == NULL) {
    +            d->rehashidx++;
    +            if (--empty_visits == 0) return 1;
    +        }
    +        de = d->ht_table[0][d->rehashidx];
    +        /* Move all the keys in this bucket from the old to the new hash HT */
    +        while(de) {
    +            uint64_t h;
    +
    +            nextde = de->next;
    +            /* Get the index in the new hash table */
    +            h = dictHashKey(d, de->key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
    +            de->next = d->ht_table[1][h];
    +            d->ht_table[1][h] = de;
    +            d->ht_used[0]--;
    +            d->ht_used[1]++;
    +            de = nextde;
    +        }
    +        d->ht_table[0][d->rehashidx] = NULL;
    +        d->rehashidx++;
    +    }
    +
    +    /* Check if we already rehashed the whole table... */
    +    if (d->ht_used[0] == 0) {
    +        zfree(d->ht_table[0]);
    +        /* Copy the new ht onto the old one */
    +        d->ht_table[0] = d->ht_table[1];
    +        d->ht_used[0] = d->ht_used[1];
    +        d->ht_size_exp[0] = d->ht_size_exp[1];
    +        _dictReset(d, 1);
    +        d->rehashidx = -1;
    +        return 0;
    +    }
    +
    +    /* More to rehash... */
    +    return 1;
    +}
    +

    rehash除了扩容时会触发,缩容时也会触发。Redis整个rehash的实现,主要分为如下几步完成。

    +
      +
    1. 给Hash表ht[1]申请足够的空间;扩容时空间大小为当前容量2,即d->ht[0]. used2;当使用量不到总空间10%时,则进行缩容。缩容时空间大小则为能恰好包含d->ht[0].used个节点的2^N次方幂整数,并把字典中字段rehashidx标识为0
    2. +
    3. 进行rehash操作调用的是dictRehash函数,重新计算ht[0]中每个键的Hash值与索引值(重新计算就叫rehash),依次添加到新的Hash表ht[1],并把老Hash表中该键值对删除。把字典中字段rehashidx字段修改为Hash表ht[0]中正在进行rehash操作节点的索引值.
    4. +
    5. rehash操作后,清空ht[0],然后对调一下ht[1]与ht[0]的值,并把字典中rehashidx字段标识为-1。
    6. +
    +

    我们知道,Redis可以提供高性能的线上服务,而且是单进程模式,当数据库中键值对数量达到了百万、千万、亿级别时,整个rehash过程将非常缓慢,如果不优化rehash过程,可能会造成很严重的服务不可用现象。Redis优化的思想很巧妙,利用分而治之的思想了进行rehash操作,大致的步骤如下。

    +

    执行插入、删除、查找、修改等操作前,都先判断当前字典rehash操作是否在进行中,进行中则调用dictRehashStep函数进行rehash操作(每次只对1个节点进行rehash操作,共执行1次)。除这些操作之外,当服务空闲时,如果当前字典也需要进行rehsh操作,则会调用incrementallyRehash函数进行批量rehash操作(每次对100个节点进行rehash操作,共执行1毫秒)。在经历N次rehash操作后,整个ht[0]的数据都会迁移到ht[1]中,这样做的好处就把是本应集中处理的时间分散到了上百万、千万、亿次操作中,所以其耗时可忽略不计。

    +
    /* This function performs just a step of rehashing, and only if hashing has
    + * not been paused for our hash table. When we have iterators in the
    + * middle of a rehashing we can't mess with the two hash tables otherwise
    + * some elements can be missed or duplicated.
    + *
    + * This function is called by common lookup or update operations in the
    + * dictionary so that the hash table automatically migrates from H1 to H2
    + * while it is actively used. */
    +static void _dictRehashStep(dict *d) {
    +    if (d->pauserehash == 0) dictRehash(d,1);
    +}
    +

    删除

    +
    static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    +    uint64_t h, idx;
    +    dictEntry *he, *prevHe;
    +    int table;
    +
    +    /* dict is empty */
    +    if (dictSize(d) == 0) return NULL;
    +
    +    if (dictIsRehashing(d)) _dictRehashStep(d);
    +    h = dictHashKey(d, key);
    +
    +    for (table = 0; table <= 1; table++) {
    +        idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
    +        he = d->ht_table[table][idx];
    +        prevHe = NULL;
    +        // 查找
    +        while(he) {
    +            if (key==he->key || dictCompareKeys(d, key, he->key)) {
    +                /* Unlink the element from the list */
    +                if (prevHe)
    +                    prevHe->next = he->next;
    +                else // 在bucket顶部,直接略过
    +                    d->ht_table[table][idx] = he->next;
    +                if (!nofree) {
    +                    dictFreeUnlinkedEntry(d, he);
    +                }
    +                d->ht_used[table]--;
    +                return he;
    +            }
    +            prevHe = he;
    +            he = he->next;
    +        }
    +        if (!dictIsRehashing(d)) break;
    +    }
    +    return NULL; /* not found */
    +}
    +

    遍历

    +

    遍历Redis整个数据库主要有两种方式:全遍历(例如keys命令)、间断遍历(hscan命令):

    +
      +
    • 全遍历: 一次命令执行就遍历完整个数据库。
    • +
    • 间断遍历: 每次命令执行只取部分数据,分多次遍历。
    • +
    +

    迭代器——可在容器(容器可为字典、链表等数据结构)上遍访的接口,设计人员无须关心容器的内容,调用迭代器固定的接口就可遍历数据,在很多高级语言中都有实现。

    +

    字典迭代器主要用于迭代字典这个数据结构中的数据,既然是迭代字典中的数据,必然会出现一个问题,迭代过程中,如果发生了数据增删,则可能导致字典触发rehash操作,或迭代开始时字典正在进行rehash操作,从而导致一条数据可能多次遍历到。

    +
    typedef struct dictIterator {
    +    dict *d;
    +    long index; // 迭代hash中的索引值
    +    // safe 为1表示是安全迭代器,可以在add,find等rehash场景中使用
    +    int table, safe;
    +    // entry 当前读取节点,nextEntry entry 节点的next字段
    +    dictEntry *entry, *nextEntry;
    +    /* unsafe iterator fingerprint for misuse detection. */
    +    unsigned long long fingerprint;// 字典指纹,字典发生改变随之改变
    +} dictIterator;
    +

    fingerprint字段是一个64位的整数,表示在给定时间内字典的状态。在这里称其为字典的指纹,因为该字段的值为字典(dict结构体)中所有字段值组合在一起生成的Hash值,所以当字典中数据发生任何变化时,其值都会不同,生成算法可参见源码dict.c文件中的dictFingerprint函数。

    +
    /* A fingerprint is a 64 bit number that represents the state of the dictionary
    + * at a given time, it's just a few dict properties xored together.
    + * When an unsafe iterator is initialized, we get the dict fingerprint, and check
    + * the fingerprint again when the iterator is released.
    + * If the two fingerprints are different it means that the user of the iterator
    + * performed forbidden operations against the dictionary while iterating. */
    +unsigned long long dictFingerprint(dict *d) {
    +    unsigned long long integers[6], hash = 0;
    +    int j;
    +
    +    integers[0] = (long) d->ht_table[0];
    +    integers[1] = d->ht_size_exp[0];
    +    integers[2] = d->ht_used[0];
    +    integers[3] = (long) d->ht_table[1];
    +    integers[4] = d->ht_size_exp[1];
    +    integers[5] = d->ht_used[1];
    +
    +    /* We hash N integers by summing every successive integer with the integer
    +     * hashing of the previous sum. Basically:
    +     *
    +     * Result = hash(hash(hash(int1)+int2)+int3) ...
    +     *
    +     * This way the same set of integers in a different order will (likely) hash
    +     * to a different number. */
    +    for (j = 0; j < 6; j++) {
    +        hash += integers[j];
    +        /* For the hashing step we use Tomas Wang's 64 bit integer hash. */
    +        hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
    +        hash = hash ^ (hash >> 24);
    +        hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
    +        hash = hash ^ (hash >> 14);
    +        hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
    +        hash = hash ^ (hash >> 28);
    +        hash = hash + (hash << 31);
    +    }
    +    return hash;
    +}
    +

    根据迭代器结构中的safe字段,将迭代器分为普通迭代器和安全迭代器:

    +
      +
    • 普通迭代器: 只遍历数据
    • +
    • 安全迭代器: 遍历的同时删除数据
    • +
    +

    普通迭代器

    +

    普通迭代器迭代字典中数据时,会对迭代器中fingerprint字段的值作严格的校验,来保证迭代过程中字典结构不发生任何变化,确保读取出的数据不出现重复

    +

    当Redis执行部分命令时会使用普通迭代器迭代字典数据,例如sort命令。sort命令主要作用是对给定列表、集合、有序集合的元素进行排序,如果给定的是有序集合,其成员名存储用的是字典,分值存储用的是跳跃表,则执行sort命令读取数据的时候会用到迭代器来遍历整个字典。

    +
         dict *set = ((zset*)sortval->ptr)->dict;
    +        dictIterator *di;
    +        dictEntry *setele;
    +        sds sdsele;
    +        di = dictGetIterator(set);
    +        while((setele = dictNext(di)) != NULL) {
    +            sdsele =  dictGetKey(setele);
    +            vector[j].obj = createStringObject(sdsele,sdslen(sdsele));
    +            vector[j].u.score = 0;
    +            vector[j].u.cmpobj = NULL;
    +            j++;
    +        }
    +        dictReleaseIterator(di);
    +
      +
    1. +

      调用dictGetIterator函数初始化一个普通迭代器,此时会把iter->safe值置为0,表示初始化的迭代器为普通迭代器

      +
      void dictInitIterator(dictIterator *iter, dict *d)
      +{
      +    iter->d = d;
      +    iter->table = 0;
      +    iter->index = -1;
      +    iter->safe = 0;
      +    iter->entry = NULL;
      +    iter->nextEntry = NULL;
      +}
      +
      +dictIterator *dictGetIterator(dict *d)
      +{
      +  dictIterator *iter = zmalloc(sizeof(*iter));
      +  dictInitIterator(iter, d);
      +  return iter;
      +}
      +
    2. +
    3. +

      循环调用dictNext函数依次遍历字典中Hash表的节点,首次遍历时会通过dictFingerprint函数拿到当前字典的指纹值。

      +
    4. +
    5. +
    +

    安全迭代器

    +
    @@ -826,7 +1236,7 @@

    结构


    - Built with Hugo and theme Tokiwa. 6397 words in this page. + Built with Hugo and theme Tokiwa. 10960 words in this page.
    diff --git "a/docs/post/\345\214\272\345\235\227\351\223\276\347\231\275\347\232\256\344\271\246\350\247\243\350\257\273/index.html" "b/docs/post/\345\214\272\345\235\227\351\223\276\347\231\275\347\232\256\344\271\246\350\247\243\350\257\273/index.html" index a12cf30..ee53fdf 100644 --- "a/docs/post/\345\214\272\345\235\227\351\223\276\347\231\275\347\232\256\344\271\246\350\247\243\350\257\273/index.html" +++ "b/docs/post/\345\214\272\345\235\227\351\223\276\347\231\275\347\232\256\344\271\246\350\247\243\350\257\273/index.html" @@ -189,6 +189,11 @@

    时不时更新

  • 比特币白皮书
  • 以太坊白皮书
  • Tangle 白皮书
  • +
  • Fabric 白皮书 + +
@@ -210,6 +215,25 @@

Tangle 白皮书

详细的内容见 Tangle白皮书中文版

tangleIOTA 所用的技术,为物联网和小额支付提供支持。不同于常见的区块链,它使用一个DAG(有向无环图)作为结构,这里称为Tangle。

传统区块链系统的单链结构在交易认证,吞吐量,资源消耗等方面存在缺陷,DAG结构的区块链是一个有效的解决方案。

+

Fabric 白皮书

+

Hyperledger Fabric 是 Linux 基金会 的 一个项目,是Hyperledger下面的一个子项目。作为一个开源联盟链,被很多项目应用。

+

它的主要特点是模块化的共识机制,相对高性能,和可以使用常规语言编写智能合约(golang)。

+

概念

+

联盟链

+

文中划分联盟链和公链的标准是: 是否发币和节点身份是否可知

+

状态机复制(SMR)是建设弹性应用众所周知的方式,但是如果我们把运行在区块链上的智能合约看作一种分布式应用,与传统的SMR区别在于:

+
    +
  • 许多应用并发运行
  • +
  • 这些应用可以被任何人动态地部署
  • +
  • 这些应用的代码是不被信任的,可能有恶意
  • +
+

order-execute

+

现有的大部分可以运行智能合约的区块链遵循SMR实现一种order-execute的架构: 节点先将交易排序再将它们广播给其他节点,然后每个节点顺序执行。

+

+

所有节点对所有交易的顺序执行限制了性能,并且需要采取复杂的措施来防止源自不受信任的合约(例如在以太坊中使用“gas”计算运行时)的针对平台的拒绝服务攻击;智能合约很难做到并发。

+

最大的限制是交易必须是确定的,这就使得不能使用常规编程语言来实现,必须使用特定的语言。

+

execute-order-validate

+

@@ -293,7 +317,7 @@

Tangle 白皮书


- Built with Hugo and theme Tokiwa. 152 words in this page. + Built with Hugo and theme Tokiwa. 603 words in this page.
diff --git "a/docs/post/\350\201\224\351\202\246\345\255\246\344\271\240\347\233\270\345\205\263\350\256\272\346\226\207\351\230\205\350\257\273/index.html" "b/docs/post/\350\201\224\351\202\246\345\255\246\344\271\240\347\233\270\345\205\263\350\256\272\346\226\207\351\230\205\350\257\273/index.html" index 574b69f..f7e5b22 100644 --- "a/docs/post/\350\201\224\351\202\246\345\255\246\344\271\240\347\233\270\345\205\263\350\256\272\346\226\207\351\230\205\350\257\273/index.html" +++ "b/docs/post/\350\201\224\351\202\246\345\255\246\344\271\240\347\233\270\345\205\263\350\256\272\346\226\207\351\230\205\350\257\273/index.html" @@ -210,6 +210,11 @@

持续更新

  • 模型
  • +
  • IOT ‘22《A Blockchain-based Model Migration Approach for Secure and Sustainable Federated Learning in IoT Systems》 + +
  • @@ -387,6 +392,8 @@

    Discriminative Aggregation

    算法流程

    每轮开始时,服务器先检查客户端的模型,根据给定的超参数$\tau$和滞后容忍算法来分配模型。服务器收集上传的更新,错过上次更新的节点会被优先采集。等采集到的更新满足预先设置的标准后,执行三步合并,然后更新缓存状态。

    +

    IOT ‘22《A Blockchain-based Model Migration Approach for Secure and Sustainable Federated Learning in IoT Systems》

    +

    背景

    @@ -470,7 +477,7 @@

    算法流程


    - Built with Hugo and theme Tokiwa. 3547 words in this page. + Built with Hugo and theme Tokiwa. 3575 words in this page.
    diff --git a/docs/series/index.html b/docs/series/index.html index 35b701a..a4e4653 100644 --- a/docs/series/index.html +++ b/docs/series/index.html @@ -295,7 +295,7 @@

    Built with Hugo and theme Tokiwa.
    - 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git a/docs/tags/index.html b/docs/tags/index.html index 6772f99..f7afdc4 100644 --- a/docs/tags/index.html +++ b/docs/tags/index.html @@ -484,7 +484,7 @@

    Built with Hugo and theme Tokiwa.
    - 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git a/docs/tags/raft/index.html b/docs/tags/raft/index.html index 651ceb5..e48f961 100644 --- a/docs/tags/raft/index.html +++ b/docs/tags/raft/index.html @@ -334,7 +334,7 @@

    Built with Hugo and theme Tokiwa.
    - 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git a/docs/tags/redis/index.html b/docs/tags/redis/index.html index b310916..0b7f7b9 100644 --- a/docs/tags/redis/index.html +++ b/docs/tags/redis/index.html @@ -331,7 +331,7 @@

    Built with Hugo and theme Tokiwa.
    - 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git a/docs/tags/redis/index.xml b/docs/tags/redis/index.xml index e19a1e3..b36a1b4 100644 --- a/docs/tags/redis/index.xml +++ b/docs/tags/redis/index.xml @@ -19,6 +19,9 @@ https://chi-kai.github.io/post/redis%E6%BA%90%E7%A0%81%E5%89%96%E6%9E%90-%E4%B8%80/ <h1 id="基础数据结构部分">基础数据结构部分</h1> <h2 id="动态字符串-sds">动态字符串 SDS</h2> +<p>实现在 sds.h/sds.c。</p> +<h3 id="设计原则">设计原则</h3> +<p>为什么不使用c语言原生的字符串操作库? c字符串用'\0'作为终止符,不能满足二进制安全,而且求字符串长度,拼接等操作都要遍历到'\0'来实现,需要自己控制内存使用,操作复杂度高。</p> <h3 id="前置知识">前置知识</h3> <p>由于我对C语言没有深入了解,有很多知识点会在前面补充。</p> <ul> @@ -56,7 +59,8 @@ </span></span><span style="display:flex;"><span> <span style="color:#75715e">// 柔性数组,没有分配之前不占内存 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">char</span> <span style="color:#111">buf</span><span style="color:#111">[];</span> </span></span><span style="display:flex;"><span><span style="color:#111">};</span> -</span></span></code></pre></div><h4 id="二进制安全">二进制安全</h4> +</span></span></code></pre></div><p>记录了已经使用的空间和分配的空间,比C字符串操作效率更高。和 C 语言中的字符串操作相比,SDS 通过记录字符数组的使用长度和分配空间大小,避免了对字符串的遍历操作,降低了操作开销,进一步就可以帮助诸多字符串操作更加高效地完成,比如创建、追加、复制、比较等。</p> +<h4 id="二进制安全">二进制安全</h4> <pre><code>什么是二进制安全?通俗地讲,C语言中,用“\0”表示字符串的结束,如果字符串中本身就有“\0”字符,字符串就会被截断,即非二进制安全;若通过某种机制,保证读写字符串时不损害其内容,则是二进制安全。在网络报文中常常需要二进制安全。 sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;,保障了二进制安全。 @@ -419,7 +423,7 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span><span style="display:flex;"><span><span style="color:#111">}</span> </span></span></code></pre></div><h2 id="压缩列表">压缩列表</h2> <p>具体的实现在ziplist.h和ziplist.c</p> -<p>压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是O(1)。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。</p> +<p>压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是 <strong>O(1)</strong> 。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// ziplist 结构 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#f92672">&lt;</span><span style="color:#111">zlbytes</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zltail</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zllen</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#111">...</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zlend</span><span style="color:#f92672">&gt;</span> </span></span></code></pre></div><p>这里的所有结构都是按照小端存储。</p> @@ -496,6 +500,7 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span></span><span style="display:flex;"><span><span style="color:#75715e"> is, this points to prev-entry-len field. */</span> </span></span><span style="display:flex;"><span><span style="color:#111">}</span> <span style="color:#111">zlentry</span><span style="color:#111">;</span> </span></span></code></pre></div><p>对于压缩列表的任意元素,获取前一个元素的长度、判断存储的数据类型、获取数据内容都需要经过复杂的解码运算。解码后的结果应该被缓存起来,为此定义了结构体zlentry,用于表示解码后的压缩列表元素。</p> +<p>解码操作,主要用宏实现:</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">inline</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">zipEntry</span><span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#111">p</span><span style="color:#111">,</span> <span style="color:#111">zlentry</span> <span style="color:#f92672">*</span><span style="color:#111">e</span><span style="color:#111">)</span> <span style="color:#111">{</span> </span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIP_DECODE_PREVLEN</span><span style="color:#111">(</span><span style="color:#111">p</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlen</span><span style="color:#111">);</span> </span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIP_ENTRY_ENCODING</span><span style="color:#111">(</span><span style="color:#111">p</span> <span style="color:#f92672">+</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">encoding</span><span style="color:#111">);</span> @@ -504,7 +509,22 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">headersize</span> <span style="color:#f92672">=</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span> <span style="color:#f92672">+</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">lensize</span><span style="color:#111">;</span> </span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">p</span> <span style="color:#f92672">=</span> <span style="color:#111">p</span><span style="color:#111">;</span> </span></span><span style="display:flex;"><span><span style="color:#111">}</span> -</span></span></code></pre></div><h2 id="字典">字典</h2> +</span></span></code></pre></div><p>这里主要就是对字节的读取,可以去看源代码。</p> +<h3 id="操作">操作</h3> +<h4 id="创建-2">创建</h4> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Create a new empty ziplist. */</span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">// 先申请初始的空间(4+4+2+1),再对zlbytes,zltail,zllen,zlend逐个初始化 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#75af00">ziplistNew</span><span style="color:#111">(</span><span style="color:#00a8c8">void</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">int</span> <span style="color:#111">bytes</span> <span style="color:#f92672">=</span> <span style="color:#111">ZIPLIST_HEADER_SIZE</span><span style="color:#f92672">+</span><span style="color:#111">ZIPLIST_END_SIZE</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#111">zl</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#111">bytes</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_BYTES</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#75af00">intrev32ifbe</span><span style="color:#111">(</span><span style="color:#111">bytes</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_TAIL_OFFSET</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#75af00">intrev32ifbe</span><span style="color:#111">(</span><span style="color:#111">ZIPLIST_HEADER_SIZE</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_LENGTH</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">zl</span><span style="color:#111">[</span><span style="color:#111">bytes</span><span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">ZIP_END</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">zl</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h4 id="插入元素">插入元素</h4> +<h2 id="字典">字典</h2> <h3 id="结构-1">结构</h3> <p>节点:</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#111">dictEntry</span> <span style="color:#111">{</span> @@ -525,17 +545,400 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span></code></pre></div><p>可以看出是使用链表法来解决hash冲突的。</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">struct</span> <span style="color:#111">dict</span> <span style="color:#111">{</span> </span></span><span style="display:flex;"><span> <span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">;</span> <span style="color:#75715e">// 对应特定类型操作函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">// 哈希表。有两个,一个正常使用,另外一个在rehash时使用 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">// 记录每个哈希表被使用的数目。 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> -</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> -</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> -</span></span><span style="display:flex;"><span> </span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">rehashidx</span><span style="color:#111">;</span> <span style="color:#75715e">/* rehashing not in progress if rehashidx == -1 */</span> </span></span><span style="display:flex;"><span> </span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Keep small vars at end for optimal (minimal) struct padding */</span> </span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int16_t</span> <span style="color:#111">pauserehash</span><span style="color:#111">;</span> <span style="color:#75715e">/* If &gt;0 rehashing is paused (&lt;0 indicates coding error) */</span> -</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">/* exponent of size. (size = 1&lt;&lt;exp) */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// size 的 系数,size 是2 的N次幂 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">/* exponent of size. (size = 1&lt;&lt;exp) */</span> </span></span><span style="display:flex;"><span><span style="color:#111">};</span> -</span></span></code></pre></div> +</span></span></code></pre></div><p>这里可以看到一个dictType 用来对应特定类型的操作函数,这些函数体现了面向对象编程的思想,会在后面合适的时机用到。</p> +<p>比如找个hashFunction 用来控制dict使用的hash函数,默认为siphash。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-go" data-lang="go"><span style="display:flex;"><span><span style="color:#75af00">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#75af00">dictType</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">uint64_t</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">hashFunction</span><span style="color:#111">)(</span><span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// hash函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyDup</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// key的 复制函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">valDup</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">obj</span><span style="color:#111">);</span> <span style="color:#75715e">// val 的复制函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyCompare</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key1</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key2</span><span style="color:#111">);</span> <span style="color:#75715e">// key 对比函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyDestructor</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// key 销毁函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">valDestructor</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">obj</span><span style="color:#111">);</span> <span style="color:#75715e">// val 销毁函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">expandAllowed</span><span style="color:#111">)(</span><span style="color:#75af00">size_t</span> <span style="color:#75af00">moreMem</span><span style="color:#111">,</span> <span style="color:#75af00">double</span> <span style="color:#75af00">usedRatio</span><span style="color:#111">);</span> <span style="color:#75715e">//扩展函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">/* Allow a dictEntry to carry extra caller-defined metadata. The +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * extra memory is initialized to 0 when a dictEntry is allocated. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">size_t</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">dictEntryMetadataBytes</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">);</span> <span style="color:#75715e">// 元数据 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> <span style="color:#75af00">dictType</span><span style="color:#111">;</span> +</span></span></code></pre></div><h3 id="创建-3">创建</h3> +<p>先申请空间,再初始化参数</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Reset hash table parameters already initialized with _dictInit()*/</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">htidx</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">/* Create a new hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictCreate</span><span style="color:#111">(</span><span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictInit</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">type</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">/* Initialize the hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">_dictInit</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">0</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">type</span> <span style="color:#f92672">=</span> <span style="color:#111">type</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">pauserehash</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> <span style="color:#75715e">// 使用一些宏来反馈结果 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="增加与扩容">增加与扩容</h3> +<p>这里先提前讲一下Rehash的概念,便于理解增加扩容中的一些操作:</p> +<p>扩容后,字典容量及掩码值会发生改变,同一个键与掩码经位运算后得到的索引值就会发生改变,从而导致根据键查找不到值的情况。解决这个问题的方法是,<strong>新扩容的内存放到一个全新的Hash表中(ht[1]),并给字典打上在进行rehash操作中的标识(即rehashidx! =-1)</strong>。此后,新添加的键值对都往新的Hash表中存储;而修改、删除、查找操作需要在ht[0]、ht[1]中进行检查,然后再决定去对哪个Hash表操作。除此之外,还需要把老Hash表(ht[0])中的数据重新计算索引值后全部迁移插入到新的Hash表(ht[1])中,此迁移过程称作rehash。</p> +<p>先看增加单个entry的操作:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Add an element to the target hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">dictAdd</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">val</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictAddRaw</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">key</span><span style="color:#111">,</span><span style="color:#111">NULL</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#111">entry</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictSetVal</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#111">val</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictAddRaw</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">existing</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">index</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">htidx</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 如果正在rehash,在add时进行一步rehash,这里是将大范围的rehash分散来减小资源集中消耗 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Get the index of the new element, or -1 if +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * the element already exists. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">((</span><span style="color:#111">index</span> <span style="color:#f92672">=</span> <span style="color:#75af00">_dictKeyIndex</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">key</span><span style="color:#111">),</span> <span style="color:#111">existing</span><span style="color:#111">))</span> <span style="color:#f92672">==</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Allocate the memory and store the new entry. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * Insert the element in top, with the assumption that in a database +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * system it is more likely that recently added entries are accessed +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * more frequently. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">htidx</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">?</span> <span style="color:#ae81ff">1</span> <span style="color:#f92672">:</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">size_t</span> <span style="color:#111">metasize</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictMetadataSize</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">)</span> <span style="color:#f92672">+</span> <span style="color:#111">metasize</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">metasize</span> <span style="color:#f92672">&gt;</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">memset</span><span style="color:#111">(</span><span style="color:#75af00">dictMetadata</span><span style="color:#111">(</span><span style="color:#111">entry</span><span style="color:#111">),</span> <span style="color:#ae81ff">0</span><span style="color:#111">,</span> <span style="color:#111">metasize</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 插入在顶部:根据时空局限性 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">entry</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">][</span><span style="color:#111">index</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">][</span><span style="color:#111">index</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Set the hash entry fields. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictSetKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>可以看出,add调用了一个底层的addraw函数。addraw首先使用dictkeyindex来查找一个合适的插入位置,如果这个key已经存在就退出add操作。然后确定是否在rehash,上面我们讲过如果在rehash那么 <strong>新添加的键值对都往新的Hash表中存储</strong>。后面就申请空间在相应位置顶部插入,这是数据库时空局限性的体现。</p> +<p>这里看一下dictSetKey和dictSetVal:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">#define dictSetKey(d, entry, _key_) do { \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> if ((d)-&gt;type-&gt;keyDup) \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;key = (d)-&gt;type-&gt;keyDup((d), _key_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> else \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;key = (_key_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e">} while(0) +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">#define dictSetVal(d, entry, _val_) do { \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> if ((d)-&gt;type-&gt;valDup) \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;v.val = (d)-&gt;type-&gt;valDup((d), _val_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> else \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;v.val = (_val_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e">} while(0) +</span></span></span></code></pre></div><p>可以看出是用宏的形式调用dict的dicttype函数,也就是说这些操作是可以调整的。</p> +<p>扩容操作:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// 将d扩容到2^size的大小 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">int</span> <span style="color:#75af00">_dictExpand</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">size</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span><span style="color:#f92672">*</span> <span style="color:#111">malloc_failed</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> <span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* the size is invalid if it is smaller than the number of +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * elements already inside the hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">||</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">&gt;</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* the new hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">new_ht_size_exp</span> <span style="color:#f92672">=</span> <span style="color:#75af00">_dictNextExp</span><span style="color:#111">(</span><span style="color:#111">size</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Detect overflows */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">size_t</span> <span style="color:#111">newsize</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">1ul</span><span style="color:#f92672">&lt;&lt;</span><span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 后者判断在什么时候成立? +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">newsize</span> <span style="color:#f92672">&lt;</span> <span style="color:#111">size</span> <span style="color:#f92672">||</span> <span style="color:#111">newsize</span> <span style="color:#f92672">*</span> <span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">)</span> <span style="color:#f92672">&lt;</span> <span style="color:#111">newsize</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Rehashing to the same table size is not useful. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">new_ht_size_exp</span> <span style="color:#f92672">==</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">])</span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Allocate the new hash table and initialize all pointers to NULL */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">=</span> <span style="color:#75af00">ztrycalloc</span><span style="color:#111">(</span><span style="color:#111">newsize</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> <span style="color:#00a8c8">else</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zcalloc</span><span style="color:#111">(</span><span style="color:#111">newsize</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 新的hash表被使用的数量 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">new_ht_used</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Is this the first initialization? If so it&#39;s not really a rehashing +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * we just set the first hash table so that it can accept keys. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Prepare a second hash table for incremental rehashing */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>首先判断是否在rehash,在rehash中不能扩容。然后创建一个新的hash table,这个newsize是2的n次幂。expand操作在刚开始初始化时会使用,也会在这里做一个判断。更常用的是在扩容后进行rehash操作。</p> +<p>获得size的函数:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// 确保hash cap 为2的N次幂 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#75af00">_dictNextExp</span><span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#111">e</span> <span style="color:#f92672">=</span> <span style="color:#111">DICT_HT_INITIAL_EXP</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">size</span> <span style="color:#f92672">&gt;=</span> <span style="color:#111">LONG_MAX</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">(</span><span style="color:#ae81ff">8</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 1 &lt;&lt; e == 1 * 2^e +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">// 找到一个大于size 的2^e +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#ae81ff">1</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(((</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#ae81ff">1</span><span style="color:#f92672">&lt;&lt;</span><span style="color:#111">e</span><span style="color:#111">)</span> <span style="color:#f92672">&gt;=</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">e</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="渐进式rehash">渐进式Rehash</h3> +<p>直接看函数:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">dictRehash</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">n</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">empty_visits</span> <span style="color:#f92672">=</span> <span style="color:#111">n</span><span style="color:#f92672">*</span><span style="color:#ae81ff">10</span><span style="color:#111">;</span> <span style="color:#75715e">/* Max number of empty buckets to visit. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">n</span><span style="color:#f92672">--</span> <span style="color:#f92672">&amp;&amp;</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">!=</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">de</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">nextde</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Note that rehashidx can&#39;t overflow as we are sure there are more +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * elements because ht[0].used != 0 */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">assert</span><span style="color:#111">(</span><span style="color:#75af00">DICTHT_SIZE</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">])</span> <span style="color:#f92672">&gt;</span> <span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">--</span><span style="color:#111">empty_visits</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Move all the keys in this bucket from the old to the new hash HT */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">de</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">uint64_t</span> <span style="color:#111">h</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#111">nextde</span> <span style="color:#f92672">=</span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Get the index in the new hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">h</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span><span style="color:#111">)</span> <span style="color:#f92672">&amp;</span> <span style="color:#75af00">DICTHT_SIZE_MASK</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">][</span><span style="color:#111">h</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">][</span><span style="color:#111">h</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">de</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span><span style="color:#f92672">--</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span> <span style="color:#f92672">=</span> <span style="color:#111">nextde</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Check if we already rehashed the whole table... */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">zfree</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Copy the new ht onto the old one */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* More to rehash... */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>rehash除了扩容时会触发,缩容时也会触发。Redis整个rehash的实现,主要分为如下几步完成。</p> +<ol> +<li>给Hash表ht[1]申请足够的空间;扩容时空间大小为当前容量<em>2,即d-&gt;ht[0]. used</em>2;当使用量不到总空间10%时,则进行缩容。缩容时空间大小则为能恰好包含d-&gt;ht[0].used个节点的2^N次方幂整数,并把字典中字段rehashidx标识为0</li> +<li>进行rehash操作调用的是dictRehash函数,重新计算ht[0]中每个键的Hash值与索引值(重新计算就叫rehash),依次添加到新的Hash表ht[1],并把老Hash表中该键值对删除。把字典中字段rehashidx字段修改为Hash表ht[0]中正在进行rehash操作节点的索引值.</li> +<li>rehash操作后,清空ht[0],然后对调一下ht[1]与ht[0]的值,并把字典中rehashidx字段标识为-1。</li> +</ol> +<p>我们知道,Redis可以提供高性能的线上服务,而且是单进程模式,当数据库中键值对数量达到了百万、千万、亿级别时,整个rehash过程将非常缓慢,如果不优化rehash过程,可能会造成很严重的服务不可用现象。Redis优化的思想很巧妙,利用分而治之的思想了进行rehash操作,大致的步骤如下。</p> +<p>执行插入、删除、查找、修改等操作前,都先判断当前字典rehash操作是否在进行中,进行中则调用dictRehashStep函数进行rehash操作(每次只对1个节点进行rehash操作,共执行1次)。除这些操作之外,当服务空闲时,如果当前字典也需要进行rehsh操作,则会调用incrementallyRehash函数进行批量rehash操作(每次对100个节点进行rehash操作,共执行1毫秒)。在经历N次rehash操作后,整个ht[0]的数据都会迁移到ht[1]中,这样做的好处就把是本应集中处理的时间分散到了上百万、千万、亿次操作中,所以其耗时可忽略不计。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* This function performs just a step of rehashing, and only if hashing has +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * not been paused for our hash table. When we have iterators in the +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * middle of a rehashing we can&#39;t mess with the two hash tables otherwise +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * some elements can be missed or duplicated. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * This function is called by common lookup or update operations in the +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * dictionary so that the hash table automatically migrates from H1 to H2 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * while it is actively used. */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">pauserehash</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#75af00">dictRehash</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="删除-1">删除</h3> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictGenericDelete</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">nofree</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">uint64_t</span> <span style="color:#111">h</span><span style="color:#111">,</span> <span style="color:#111">idx</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">he</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">prevHe</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* dict is empty */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictSize</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">h</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">for</span> <span style="color:#111">(</span><span style="color:#111">table</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> <span style="color:#111">table</span> <span style="color:#f92672">&lt;=</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> <span style="color:#111">table</span><span style="color:#f92672">++</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">idx</span> <span style="color:#f92672">=</span> <span style="color:#111">h</span> <span style="color:#f92672">&amp;</span> <span style="color:#75af00">DICTHT_SIZE_MASK</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">he</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">][</span><span style="color:#111">idx</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 查找 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">he</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">key</span><span style="color:#f92672">==</span><span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span> <span style="color:#f92672">||</span> <span style="color:#75af00">dictCompareKeys</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span><span style="color:#111">))</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Unlink the element from the list */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">prevHe</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">else</span> <span style="color:#75715e">// 在bucket顶部,直接略过 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">][</span><span style="color:#111">idx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#111">nofree</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictFreeUnlinkedEntry</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">he</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">]</span><span style="color:#f92672">--</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">he</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">he</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#00a8c8">break</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> <span style="color:#75715e">/* not found */</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="遍历">遍历</h3> +<p>遍历Redis整个数据库主要有两种方式:全遍历(例如keys命令)、间断遍历(hscan命令):</p> +<ul> +<li>全遍历: 一次命令执行就遍历完整个数据库。</li> +<li>间断遍历: 每次命令执行只取部分数据,分多次遍历。</li> +</ul> +<p>迭代器——可在容器(容器可为字典、链表等数据结构)上遍访的接口,设计人员无须关心容器的内容,调用迭代器固定的接口就可遍历数据,在很多高级语言中都有实现。</p> +<p>字典迭代器主要用于迭代字典这个数据结构中的数据,既然是迭代字典中的数据,必然会出现一个问题,迭代过程中,如果发生了数据增删,则可能导致字典触发rehash操作,或迭代开始时字典正在进行rehash操作,从而导致一条数据可能多次遍历到。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#111">dictIterator</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">index</span><span style="color:#111">;</span> <span style="color:#75715e">// 迭代hash中的索引值 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">// safe 为1表示是安全迭代器,可以在add,find等rehash场景中使用 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">table</span><span style="color:#111">,</span> <span style="color:#111">safe</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// entry 当前读取节点,nextEntry entry 节点的next字段 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">nextEntry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* unsafe iterator fingerprint for misuse detection. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#111">fingerprint</span><span style="color:#111">;</span><span style="color:#75715e">// 字典指纹,字典发生改变随之改变 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> <span style="color:#111">dictIterator</span><span style="color:#111">;</span> +</span></span></code></pre></div><p>fingerprint字段是一个64位的整数,表示在给定时间内字典的状态。在这里称其为字典的指纹,因为该字段的值为字典(dict结构体)中所有字段值组合在一起生成的Hash值,所以当字典中数据发生任何变化时,其值都会不同,生成算法可参见源码dict.c文件中的dictFingerprint函数。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* A fingerprint is a 64 bit number that represents the state of the dictionary +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * at a given time, it&#39;s just a few dict properties xored together. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * When an unsafe iterator is initialized, we get the dict fingerprint, and check +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * the fingerprint again when the iterator is released. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * If the two fingerprints are different it means that the user of the iterator +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * performed forbidden operations against the dictionary while iterating. */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#75af00">dictFingerprint</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">6</span><span style="color:#111">],</span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">j</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">3</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">4</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">5</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* We hash N integers by summing every successive integer with the integer +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * hashing of the previous sum. Basically: +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * Result = hash(hash(hash(int1)+int2)+int3) ... +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * This way the same set of integers in a different order will (likely) hash +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * to a different number. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">for</span> <span style="color:#111">(</span><span style="color:#111">j</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> <span style="color:#111">j</span> <span style="color:#f92672">&lt;</span> <span style="color:#ae81ff">6</span><span style="color:#111">;</span> <span style="color:#111">j</span><span style="color:#f92672">++</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">+=</span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* For the hashing step we use Tomas Wang&#39;s 64 bit integer hash. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#f92672">~</span><span style="color:#111">hash</span><span style="color:#111">)</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">21</span><span style="color:#111">);</span> <span style="color:#75715e">// hash = (hash &lt;&lt; 21) - hash - 1; +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">24</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">3</span><span style="color:#111">))</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">8</span><span style="color:#111">);</span> <span style="color:#75715e">// hash * 265 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">14</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">2</span><span style="color:#111">))</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">4</span><span style="color:#111">);</span> <span style="color:#75715e">// hash * 21 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">28</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">31</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">hash</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>根据迭代器结构中的safe字段,将迭代器分为普通迭代器和安全迭代器:</p> +<ul> +<li>普通迭代器: 只遍历数据</li> +<li>安全迭代器: 遍历的同时删除数据</li> +</ul> +<h4 id="普通迭代器">普通迭代器</h4> +<p>普通迭代器迭代字典中数据时,会对迭代器中fingerprint字段的值作严格的校验,来保证迭代过程中字典结构不发生任何变化,确保读取出的数据不出现重复</p> +<p>当Redis执行部分命令时会使用普通迭代器迭代字典数据,例如sort命令。sort命令主要作用是对给定列表、集合、有序集合的元素进行排序,如果给定的是有序集合,其成员名存储用的是字典,分值存储用的是跳跃表,则执行sort命令读取数据的时候会用到迭代器来遍历整个字典。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">set</span> <span style="color:#f92672">=</span> <span style="color:#111">((</span><span style="color:#111">zset</span><span style="color:#f92672">*</span><span style="color:#111">)</span><span style="color:#111">sortval</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ptr</span><span style="color:#111">)</span><span style="color:#f92672">-&gt;</span><span style="color:#111">dict</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">di</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">setele</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">sds</span> <span style="color:#111">sdsele</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">di</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictGetIterator</span><span style="color:#111">(</span><span style="color:#111">set</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">((</span><span style="color:#111">setele</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictNext</span><span style="color:#111">(</span><span style="color:#111">di</span><span style="color:#111">))</span> <span style="color:#f92672">!=</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">sdsele</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictGetKey</span><span style="color:#111">(</span><span style="color:#111">setele</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">obj</span> <span style="color:#f92672">=</span> <span style="color:#75af00">createStringObject</span><span style="color:#111">(</span><span style="color:#111">sdsele</span><span style="color:#111">,</span><span style="color:#75af00">sdslen</span><span style="color:#111">(</span><span style="color:#111">sdsele</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">u</span><span style="color:#111">.</span><span style="color:#111">score</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">u</span><span style="color:#111">.</span><span style="color:#111">cmpobj</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">j</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictReleaseIterator</span><span style="color:#111">(</span><span style="color:#111">di</span><span style="color:#111">);</span> +</span></span></code></pre></div><ol> +<li> +<p>调用dictGetIterator函数初始化一个普通迭代器,此时会把iter-&gt;safe值置为0,表示初始化的迭代器为普通迭代器</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">void</span> <span style="color:#75af00">dictInitIterator</span><span style="color:#111">(</span><span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">iter</span><span style="color:#111">,</span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">d</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">table</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">index</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">safe</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">nextEntry</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictGetIterator</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">iter</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">iter</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictInitIterator</span><span style="color:#111">(</span><span style="color:#111">iter</span><span style="color:#111">,</span> <span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">iter</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div></li> +<li> +<p>循环调用dictNext函数依次遍历字典中Hash表的节点,首次遍历时会通过dictFingerprint函数拿到当前字典的指纹值。</p> +</li> +<li></li> +</ol> +<h4 id="安全迭代器">安全迭代器</h4> + diff --git "a/docs/tags/\345\210\206\345\270\203\345\274\217/index.html" "b/docs/tags/\345\210\206\345\270\203\345\274\217/index.html" index 40c678f..fdf2e57 100644 --- "a/docs/tags/\345\210\206\345\270\203\345\274\217/index.html" +++ "b/docs/tags/\345\210\206\345\270\203\345\274\217/index.html" @@ -334,7 +334,7 @@

    Built with Hugo and theme Tokiwa.
    - 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git "a/docs/tags/\345\214\272\345\235\227\351\223\276/index.html" "b/docs/tags/\345\214\272\345\235\227\351\223\276/index.html" index 0d3fd7f..8ecdc1c 100644 --- "a/docs/tags/\345\214\272\345\235\227\351\223\276/index.html" +++ "b/docs/tags/\345\214\272\345\235\227\351\223\276/index.html" @@ -409,7 +409,7 @@

    Built with Hugo and theme Tokiwa.
    - 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git "a/docs/tags/\345\214\272\345\235\227\351\223\276/index.xml" "b/docs/tags/\345\214\272\345\235\227\351\223\276/index.xml" index 1d2fedf..60c68a3 100644 --- "a/docs/tags/\345\214\272\345\235\227\351\223\276/index.xml" +++ "b/docs/tags/\345\214\272\345\235\227\351\223\276/index.xml" @@ -23,6 +23,25 @@ <p>详细的内容见 <a href="https://lzphi.cn/2020/12/20/2020-12-17-Tangle-%E7%99%BD%E7%9A%AE%E4%B9%A6/">Tangle白皮书中文版</a></p> <p><strong>tangle</strong> 是 <strong>IOTA</strong> 所用的技术,为物联网和小额支付提供支持。不同于常见的区块链,它使用一个DAG(有向无环图)作为结构,这里称为Tangle。</p> <p>传统区块链系统的单链结构在交易认证,吞吐量,资源消耗等方面存在缺陷,DAG结构的区块链是一个有效的解决方案。</p> +<h2 id="fabric-白皮书">Fabric 白皮书</h2> +<p>Hyperledger Fabric 是 Linux 基金会 的 一个项目,是Hyperledger下面的一个子项目。作为一个开源联盟链,被很多项目应用。</p> +<p>它的主要特点是模块化的共识机制,相对高性能,和可以使用常规语言编写智能合约(golang)。</p> +<h3 id="概念">概念</h3> +<h4 id="联盟链">联盟链</h4> +<p>文中划分联盟链和公链的标准是: <strong>是否发币和节点身份是否可知</strong></p> +<p>状态机复制(SMR)是建设弹性应用众所周知的方式,但是如果我们把运行在区块链上的智能合约看作一种分布式应用,与传统的SMR区别在于:</p> +<ul> +<li>许多应用并发运行</li> +<li>这些应用可以被任何人动态地部署</li> +<li>这些应用的代码是不被信任的,可能有恶意</li> +</ul> +<h4 id="order-execute">order-execute</h4> +<p>现有的大部分可以运行智能合约的区块链遵循SMR实现一种order-execute的架构: 节点先将交易排序再将它们广播给其他节点,然后每个节点顺序执行。</p> +<p><img src="https://s2.loli.net/2022/12/06/B4Ns3GZAKl8dIXT.png" alt=""></p> +<p>所有节点对所有交易的顺序执行限制了性能,并且需要采取复杂的措施来防止源自不受信任的合约(例如在以太坊中使用“gas”计算运行时)的针对平台的拒绝服务攻击;智能合约很难做到并发。</p> +<p>最大的限制是交易必须是确定的,这就使得不能使用常规编程语言来实现,必须使用特定的语言。</p> +<h4 id="execute-order-validate">execute-order-validate</h4> +<p><img src="https://s2.loli.net/2022/12/07/jDBxcLmYrfXSnbl.png" alt=""></p> diff --git "a/docs/tags/\346\272\220\347\240\201\345\211\226\346\236\220/index.html" "b/docs/tags/\346\272\220\347\240\201\345\211\226\346\236\220/index.html" index 0d7e0b4..3c714e1 100644 --- "a/docs/tags/\346\272\220\347\240\201\345\211\226\346\236\220/index.html" +++ "b/docs/tags/\346\272\220\347\240\201\345\211\226\346\236\220/index.html" @@ -331,7 +331,7 @@

    Built with Hugo and theme Tokiwa.
    - 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git "a/docs/tags/\346\272\220\347\240\201\345\211\226\346\236\220/index.xml" "b/docs/tags/\346\272\220\347\240\201\345\211\226\346\236\220/index.xml" index 6cb0c0e..f2c863c 100644 --- "a/docs/tags/\346\272\220\347\240\201\345\211\226\346\236\220/index.xml" +++ "b/docs/tags/\346\272\220\347\240\201\345\211\226\346\236\220/index.xml" @@ -19,6 +19,9 @@ https://chi-kai.github.io/post/redis%E6%BA%90%E7%A0%81%E5%89%96%E6%9E%90-%E4%B8%80/ <h1 id="基础数据结构部分">基础数据结构部分</h1> <h2 id="动态字符串-sds">动态字符串 SDS</h2> +<p>实现在 sds.h/sds.c。</p> +<h3 id="设计原则">设计原则</h3> +<p>为什么不使用c语言原生的字符串操作库? c字符串用'\0'作为终止符,不能满足二进制安全,而且求字符串长度,拼接等操作都要遍历到'\0'来实现,需要自己控制内存使用,操作复杂度高。</p> <h3 id="前置知识">前置知识</h3> <p>由于我对C语言没有深入了解,有很多知识点会在前面补充。</p> <ul> @@ -56,7 +59,8 @@ </span></span><span style="display:flex;"><span> <span style="color:#75715e">// 柔性数组,没有分配之前不占内存 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">char</span> <span style="color:#111">buf</span><span style="color:#111">[];</span> </span></span><span style="display:flex;"><span><span style="color:#111">};</span> -</span></span></code></pre></div><h4 id="二进制安全">二进制安全</h4> +</span></span></code></pre></div><p>记录了已经使用的空间和分配的空间,比C字符串操作效率更高。和 C 语言中的字符串操作相比,SDS 通过记录字符数组的使用长度和分配空间大小,避免了对字符串的遍历操作,降低了操作开销,进一步就可以帮助诸多字符串操作更加高效地完成,比如创建、追加、复制、比较等。</p> +<h4 id="二进制安全">二进制安全</h4> <pre><code>什么是二进制安全?通俗地讲,C语言中,用“\0”表示字符串的结束,如果字符串中本身就有“\0”字符,字符串就会被截断,即非二进制安全;若通过某种机制,保证读写字符串时不损害其内容,则是二进制安全。在网络报文中常常需要二进制安全。 sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;,保障了二进制安全。 @@ -419,7 +423,7 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span><span style="display:flex;"><span><span style="color:#111">}</span> </span></span></code></pre></div><h2 id="压缩列表">压缩列表</h2> <p>具体的实现在ziplist.h和ziplist.c</p> -<p>压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是O(1)。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。</p> +<p>压缩列表是Redis中一种高效利用内存的数据结构,用来储存字符串和数字,它的push和pop操作都是 <strong>O(1)</strong> 。Redis的有序集合、散列和列表都直接或者间接使用了压缩列表。当有序集合或散列表的元素个数比较少,且元素都是短字符串时,Redis便使用压缩列表作为其底层数据存储结构。列表使用快速链表(quicklist)数据结构存储,而快速链表就是双向链表与压缩列表的组合。</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// ziplist 结构 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#f92672">&lt;</span><span style="color:#111">zlbytes</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zltail</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zllen</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#111">...</span> <span style="color:#f92672">&lt;</span><span style="color:#111">entry</span><span style="color:#f92672">&gt;</span> <span style="color:#f92672">&lt;</span><span style="color:#111">zlend</span><span style="color:#f92672">&gt;</span> </span></span></code></pre></div><p>这里的所有结构都是按照小端存储。</p> @@ -496,6 +500,7 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span></span><span style="display:flex;"><span><span style="color:#75715e"> is, this points to prev-entry-len field. */</span> </span></span><span style="display:flex;"><span><span style="color:#111">}</span> <span style="color:#111">zlentry</span><span style="color:#111">;</span> </span></span></code></pre></div><p>对于压缩列表的任意元素,获取前一个元素的长度、判断存储的数据类型、获取数据内容都需要经过复杂的解码运算。解码后的结果应该被缓存起来,为此定义了结构体zlentry,用于表示解码后的压缩列表元素。</p> +<p>解码操作,主要用宏实现:</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">inline</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">zipEntry</span><span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#111">p</span><span style="color:#111">,</span> <span style="color:#111">zlentry</span> <span style="color:#f92672">*</span><span style="color:#111">e</span><span style="color:#111">)</span> <span style="color:#111">{</span> </span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIP_DECODE_PREVLEN</span><span style="color:#111">(</span><span style="color:#111">p</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlen</span><span style="color:#111">);</span> </span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIP_ENTRY_ENCODING</span><span style="color:#111">(</span><span style="color:#111">p</span> <span style="color:#f92672">+</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span><span style="color:#111">,</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">encoding</span><span style="color:#111">);</span> @@ -504,7 +509,22 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">headersize</span> <span style="color:#f92672">=</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">prevrawlensize</span> <span style="color:#f92672">+</span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">lensize</span><span style="color:#111">;</span> </span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">-&gt;</span><span style="color:#111">p</span> <span style="color:#f92672">=</span> <span style="color:#111">p</span><span style="color:#111">;</span> </span></span><span style="display:flex;"><span><span style="color:#111">}</span> -</span></span></code></pre></div><h2 id="字典">字典</h2> +</span></span></code></pre></div><p>这里主要就是对字节的读取,可以去看源代码。</p> +<h3 id="操作">操作</h3> +<h4 id="创建-2">创建</h4> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Create a new empty ziplist. */</span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">// 先申请初始的空间(4+4+2+1),再对zlbytes,zltail,zllen,zlend逐个初始化 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#75af00">ziplistNew</span><span style="color:#111">(</span><span style="color:#00a8c8">void</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">int</span> <span style="color:#111">bytes</span> <span style="color:#f92672">=</span> <span style="color:#111">ZIPLIST_HEADER_SIZE</span><span style="color:#f92672">+</span><span style="color:#111">ZIPLIST_END_SIZE</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#f92672">*</span><span style="color:#111">zl</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#111">bytes</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_BYTES</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#75af00">intrev32ifbe</span><span style="color:#111">(</span><span style="color:#111">bytes</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_TAIL_OFFSET</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#75af00">intrev32ifbe</span><span style="color:#111">(</span><span style="color:#111">ZIPLIST_HEADER_SIZE</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">ZIPLIST_LENGTH</span><span style="color:#111">(</span><span style="color:#111">zl</span><span style="color:#111">)</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">zl</span><span style="color:#111">[</span><span style="color:#111">bytes</span><span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">ZIP_END</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">zl</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h4 id="插入元素">插入元素</h4> +<h2 id="字典">字典</h2> <h3 id="结构-1">结构</h3> <p>节点:</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#111">dictEntry</span> <span style="color:#111">{</span> @@ -525,17 +545,400 @@ sds使用 len 来控制字符串长度,而不是使用&quot;\0&quot;, </span></span></code></pre></div><p>可以看出是使用链表法来解决hash冲突的。</p> <div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">struct</span> <span style="color:#111">dict</span> <span style="color:#111">{</span> </span></span><span style="display:flex;"><span> <span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">;</span> <span style="color:#75715e">// 对应特定类型操作函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">// 哈希表。有两个,一个正常使用,另外一个在rehash时使用 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">// 记录每个哈希表被使用的数目。 </span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> -</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> -</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> -</span></span><span style="display:flex;"><span> </span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">rehashidx</span><span style="color:#111">;</span> <span style="color:#75715e">/* rehashing not in progress if rehashidx == -1 */</span> </span></span><span style="display:flex;"><span> </span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Keep small vars at end for optimal (minimal) struct padding */</span> </span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int16_t</span> <span style="color:#111">pauserehash</span><span style="color:#111">;</span> <span style="color:#75715e">/* If &gt;0 rehashing is paused (&lt;0 indicates coding error) */</span> -</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">/* exponent of size. (size = 1&lt;&lt;exp) */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// size 的 系数,size 是2 的N次幂 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">];</span> <span style="color:#75715e">/* exponent of size. (size = 1&lt;&lt;exp) */</span> </span></span><span style="display:flex;"><span><span style="color:#111">};</span> -</span></span></code></pre></div> +</span></span></code></pre></div><p>这里可以看到一个dictType 用来对应特定类型的操作函数,这些函数体现了面向对象编程的思想,会在后面合适的时机用到。</p> +<p>比如找个hashFunction 用来控制dict使用的hash函数,默认为siphash。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-go" data-lang="go"><span style="display:flex;"><span><span style="color:#75af00">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#75af00">dictType</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">uint64_t</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">hashFunction</span><span style="color:#111">)(</span><span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// hash函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyDup</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// key的 复制函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">valDup</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">obj</span><span style="color:#111">);</span> <span style="color:#75715e">// val 的复制函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyCompare</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key1</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key2</span><span style="color:#111">);</span> <span style="color:#75715e">// key 对比函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">keyDestructor</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">key</span><span style="color:#111">);</span> <span style="color:#75715e">// key 销毁函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75af00">void</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">valDestructor</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">,</span> <span style="color:#75af00">void</span> <span style="color:#f92672">*</span><span style="color:#75af00">obj</span><span style="color:#111">);</span> <span style="color:#75715e">// val 销毁函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">expandAllowed</span><span style="color:#111">)(</span><span style="color:#75af00">size_t</span> <span style="color:#75af00">moreMem</span><span style="color:#111">,</span> <span style="color:#75af00">double</span> <span style="color:#75af00">usedRatio</span><span style="color:#111">);</span> <span style="color:#75715e">//扩展函数 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">/* Allow a dictEntry to carry extra caller-defined metadata. The +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * extra memory is initialized to 0 when a dictEntry is allocated. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">size_t</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#75af00">dictEntryMetadataBytes</span><span style="color:#111">)(</span><span style="color:#75af00">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">d</span><span style="color:#111">);</span> <span style="color:#75715e">// 元数据 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> <span style="color:#75af00">dictType</span><span style="color:#111">;</span> +</span></span></code></pre></div><h3 id="创建-3">创建</h3> +<p>先申请空间,再初始化参数</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Reset hash table parameters already initialized with _dictInit()*/</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">htidx</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">/* Create a new hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictCreate</span><span style="color:#111">(</span><span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictInit</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">type</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">/* Initialize the hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">_dictInit</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">dictType</span> <span style="color:#f92672">*</span><span style="color:#111">type</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">0</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">type</span> <span style="color:#f92672">=</span> <span style="color:#111">type</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">pauserehash</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> <span style="color:#75715e">// 使用一些宏来反馈结果 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="增加与扩容">增加与扩容</h3> +<p>这里先提前讲一下Rehash的概念,便于理解增加扩容中的一些操作:</p> +<p>扩容后,字典容量及掩码值会发生改变,同一个键与掩码经位运算后得到的索引值就会发生改变,从而导致根据键查找不到值的情况。解决这个问题的方法是,<strong>新扩容的内存放到一个全新的Hash表中(ht[1]),并给字典打上在进行rehash操作中的标识(即rehashidx! =-1)</strong>。此后,新添加的键值对都往新的Hash表中存储;而修改、删除、查找操作需要在ht[0]、ht[1]中进行检查,然后再决定去对哪个Hash表操作。除此之外,还需要把老Hash表(ht[0])中的数据重新计算索引值后全部迁移插入到新的Hash表(ht[1])中,此迁移过程称作rehash。</p> +<p>先看增加单个entry的操作:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* Add an element to the target hash table */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">dictAdd</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">val</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictAddRaw</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">key</span><span style="color:#111">,</span><span style="color:#111">NULL</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#111">entry</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictSetVal</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#111">val</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictAddRaw</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">existing</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">index</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">htidx</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 如果正在rehash,在add时进行一步rehash,这里是将大范围的rehash分散来减小资源集中消耗 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Get the index of the new element, or -1 if +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * the element already exists. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">((</span><span style="color:#111">index</span> <span style="color:#f92672">=</span> <span style="color:#75af00">_dictKeyIndex</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#111">key</span><span style="color:#111">),</span> <span style="color:#111">existing</span><span style="color:#111">))</span> <span style="color:#f92672">==</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Allocate the memory and store the new entry. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * Insert the element in top, with the assumption that in a database +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * system it is more likely that recently added entries are accessed +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * more frequently. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">htidx</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">?</span> <span style="color:#ae81ff">1</span> <span style="color:#f92672">:</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">size_t</span> <span style="color:#111">metasize</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictMetadataSize</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">)</span> <span style="color:#f92672">+</span> <span style="color:#111">metasize</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">metasize</span> <span style="color:#f92672">&gt;</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">memset</span><span style="color:#111">(</span><span style="color:#75af00">dictMetadata</span><span style="color:#111">(</span><span style="color:#111">entry</span><span style="color:#111">),</span> <span style="color:#ae81ff">0</span><span style="color:#111">,</span> <span style="color:#111">metasize</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 插入在顶部:根据时空局限性 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">entry</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">][</span><span style="color:#111">index</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">][</span><span style="color:#111">index</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">htidx</span><span style="color:#111">]</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Set the hash entry fields. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictSetKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">entry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>可以看出,add调用了一个底层的addraw函数。addraw首先使用dictkeyindex来查找一个合适的插入位置,如果这个key已经存在就退出add操作。然后确定是否在rehash,上面我们讲过如果在rehash那么 <strong>新添加的键值对都往新的Hash表中存储</strong>。后面就申请空间在相应位置顶部插入,这是数据库时空局限性的体现。</p> +<p>这里看一下dictSetKey和dictSetVal:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">#define dictSetKey(d, entry, _key_) do { \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> if ((d)-&gt;type-&gt;keyDup) \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;key = (d)-&gt;type-&gt;keyDup((d), _key_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> else \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;key = (_key_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e">} while(0) +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> +</span></span><span style="display:flex;"><span><span style="color:#75715e">#define dictSetVal(d, entry, _val_) do { \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> if ((d)-&gt;type-&gt;valDup) \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;v.val = (d)-&gt;type-&gt;valDup((d), _val_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> else \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> (entry)-&gt;v.val = (_val_); \ +</span></span></span><span style="display:flex;"><span><span style="color:#75715e">} while(0) +</span></span></span></code></pre></div><p>可以看出是用宏的形式调用dict的dicttype函数,也就是说这些操作是可以调整的。</p> +<p>扩容操作:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// 将d扩容到2^size的大小 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">int</span> <span style="color:#75af00">_dictExpand</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">size</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span><span style="color:#f92672">*</span> <span style="color:#111">malloc_failed</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> <span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* the size is invalid if it is smaller than the number of +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * elements already inside the hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">||</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">&gt;</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* the new hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">**</span><span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#111">new_ht_size_exp</span> <span style="color:#f92672">=</span> <span style="color:#75af00">_dictNextExp</span><span style="color:#111">(</span><span style="color:#111">size</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Detect overflows */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">size_t</span> <span style="color:#111">newsize</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">1ul</span><span style="color:#f92672">&lt;&lt;</span><span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 后者判断在什么时候成立? +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">newsize</span> <span style="color:#f92672">&lt;</span> <span style="color:#111">size</span> <span style="color:#f92672">||</span> <span style="color:#111">newsize</span> <span style="color:#f92672">*</span> <span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">)</span> <span style="color:#f92672">&lt;</span> <span style="color:#111">newsize</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Rehashing to the same table size is not useful. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">new_ht_size_exp</span> <span style="color:#f92672">==</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">])</span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Allocate the new hash table and initialize all pointers to NULL */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">=</span> <span style="color:#75af00">ztrycalloc</span><span style="color:#111">(</span><span style="color:#111">newsize</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">malloc_failed</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_ERR</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> <span style="color:#00a8c8">else</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">new_ht_table</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zcalloc</span><span style="color:#111">(</span><span style="color:#111">newsize</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#111">dictEntry</span><span style="color:#f92672">*</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 新的hash表被使用的数量 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">new_ht_used</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Is this the first initialization? If so it&#39;s not really a rehashing +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * we just set the first hash table so that it can accept keys. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Prepare a second hash table for incremental rehashing */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_size_exp</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_used</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">new_ht_table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">DICT_OK</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>首先判断是否在rehash,在rehash中不能扩容。然后创建一个新的hash table,这个newsize是2的n次幂。expand操作在刚开始初始化时会使用,也会在这里做一个判断。更常用的是在扩容后进行rehash操作。</p> +<p>获得size的函数:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">// 确保hash cap 为2的N次幂 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">signed</span> <span style="color:#00a8c8">char</span> <span style="color:#75af00">_dictNextExp</span><span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">char</span> <span style="color:#111">e</span> <span style="color:#f92672">=</span> <span style="color:#111">DICT_HT_INITIAL_EXP</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">size</span> <span style="color:#f92672">&gt;=</span> <span style="color:#111">LONG_MAX</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">(</span><span style="color:#ae81ff">8</span><span style="color:#f92672">*</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 1 &lt;&lt; e == 1 * 2^e +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">// 找到一个大于size 的2^e +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#ae81ff">1</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(((</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#ae81ff">1</span><span style="color:#f92672">&lt;&lt;</span><span style="color:#111">e</span><span style="color:#111">)</span> <span style="color:#f92672">&gt;=</span> <span style="color:#111">size</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">e</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">e</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="渐进式rehash">渐进式Rehash</h3> +<p>直接看函数:</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">int</span> <span style="color:#75af00">dictRehash</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">n</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">empty_visits</span> <span style="color:#f92672">=</span> <span style="color:#111">n</span><span style="color:#f92672">*</span><span style="color:#ae81ff">10</span><span style="color:#111">;</span> <span style="color:#75715e">/* Max number of empty buckets to visit. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">n</span><span style="color:#f92672">--</span> <span style="color:#f92672">&amp;&amp;</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">!=</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">de</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">nextde</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Note that rehashidx can&#39;t overflow as we are sure there are more +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * elements because ht[0].used != 0 */</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">assert</span><span style="color:#111">(</span><span style="color:#75af00">DICTHT_SIZE</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">])</span> <span style="color:#f92672">&gt;</span> <span style="color:#111">(</span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span><span style="color:#111">)</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">--</span><span style="color:#111">empty_visits</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Move all the keys in this bucket from the old to the new hash HT */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">de</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">uint64_t</span> <span style="color:#111">h</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#111">nextde</span> <span style="color:#f92672">=</span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Get the index in the new hash table */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">h</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span><span style="color:#111">)</span> <span style="color:#f92672">&amp;</span> <span style="color:#75af00">DICTHT_SIZE_MASK</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">][</span><span style="color:#111">h</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">][</span><span style="color:#111">h</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">de</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span><span style="color:#f92672">--</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">de</span> <span style="color:#f92672">=</span> <span style="color:#111">nextde</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">][</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Check if we already rehashed the whole table... */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">zfree</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Copy the new ht onto the old one */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">_dictReset</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">rehashidx</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* More to rehash... */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>rehash除了扩容时会触发,缩容时也会触发。Redis整个rehash的实现,主要分为如下几步完成。</p> +<ol> +<li>给Hash表ht[1]申请足够的空间;扩容时空间大小为当前容量<em>2,即d-&gt;ht[0]. used</em>2;当使用量不到总空间10%时,则进行缩容。缩容时空间大小则为能恰好包含d-&gt;ht[0].used个节点的2^N次方幂整数,并把字典中字段rehashidx标识为0</li> +<li>进行rehash操作调用的是dictRehash函数,重新计算ht[0]中每个键的Hash值与索引值(重新计算就叫rehash),依次添加到新的Hash表ht[1],并把老Hash表中该键值对删除。把字典中字段rehashidx字段修改为Hash表ht[0]中正在进行rehash操作节点的索引值.</li> +<li>rehash操作后,清空ht[0],然后对调一下ht[1]与ht[0]的值,并把字典中rehashidx字段标识为-1。</li> +</ol> +<p>我们知道,Redis可以提供高性能的线上服务,而且是单进程模式,当数据库中键值对数量达到了百万、千万、亿级别时,整个rehash过程将非常缓慢,如果不优化rehash过程,可能会造成很严重的服务不可用现象。Redis优化的思想很巧妙,利用分而治之的思想了进行rehash操作,大致的步骤如下。</p> +<p>执行插入、删除、查找、修改等操作前,都先判断当前字典rehash操作是否在进行中,进行中则调用dictRehashStep函数进行rehash操作(每次只对1个节点进行rehash操作,共执行1次)。除这些操作之外,当服务空闲时,如果当前字典也需要进行rehsh操作,则会调用incrementallyRehash函数进行批量rehash操作(每次对100个节点进行rehash操作,共执行1毫秒)。在经历N次rehash操作后,整个ht[0]的数据都会迁移到ht[1]中,这样做的好处就把是本应集中处理的时间分散到了上百万、千万、亿次操作中,所以其耗时可忽略不计。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* This function performs just a step of rehashing, and only if hashing has +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * not been paused for our hash table. When we have iterators in the +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * middle of a rehashing we can&#39;t mess with the two hash tables otherwise +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * some elements can be missed or duplicated. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * This function is called by common lookup or update operations in the +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * dictionary so that the hash table automatically migrates from H1 to H2 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * while it is actively used. */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#00a8c8">void</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">pauserehash</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#75af00">dictRehash</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span><span style="color:#ae81ff">1</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="删除-1">删除</h3> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">static</span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictGenericDelete</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#00a8c8">const</span> <span style="color:#00a8c8">void</span> <span style="color:#f92672">*</span><span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#00a8c8">int</span> <span style="color:#111">nofree</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">uint64_t</span> <span style="color:#111">h</span><span style="color:#111">,</span> <span style="color:#111">idx</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">he</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">prevHe</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">table</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* dict is empty */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictSize</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#f92672">==</span> <span style="color:#ae81ff">0</span><span style="color:#111">)</span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#75af00">_dictRehashStep</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">h</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictHashKey</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">for</span> <span style="color:#111">(</span><span style="color:#111">table</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> <span style="color:#111">table</span> <span style="color:#f92672">&lt;=</span> <span style="color:#ae81ff">1</span><span style="color:#111">;</span> <span style="color:#111">table</span><span style="color:#f92672">++</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">idx</span> <span style="color:#f92672">=</span> <span style="color:#111">h</span> <span style="color:#f92672">&amp;</span> <span style="color:#75af00">DICTHT_SIZE_MASK</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">]);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">he</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">][</span><span style="color:#111">idx</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// 查找 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">while</span><span style="color:#111">(</span><span style="color:#111">he</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">key</span><span style="color:#f92672">==</span><span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span> <span style="color:#f92672">||</span> <span style="color:#75af00">dictCompareKeys</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">key</span><span style="color:#111">,</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">key</span><span style="color:#111">))</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* Unlink the element from the list */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#111">prevHe</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">else</span> <span style="color:#75715e">// 在bucket顶部,直接略过 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">][</span><span style="color:#111">idx</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#111">nofree</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictFreeUnlinkedEntry</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">,</span> <span style="color:#111">he</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#111">table</span><span style="color:#111">]</span><span style="color:#f92672">--</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">he</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">prevHe</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">he</span> <span style="color:#f92672">=</span> <span style="color:#111">he</span><span style="color:#f92672">-&gt;</span><span style="color:#111">next</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">if</span> <span style="color:#111">(</span><span style="color:#f92672">!</span><span style="color:#75af00">dictIsRehashing</span><span style="color:#111">(</span><span style="color:#111">d</span><span style="color:#111">))</span> <span style="color:#00a8c8">break</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> <span style="color:#75715e">/* not found */</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><h3 id="遍历">遍历</h3> +<p>遍历Redis整个数据库主要有两种方式:全遍历(例如keys命令)、间断遍历(hscan命令):</p> +<ul> +<li>全遍历: 一次命令执行就遍历完整个数据库。</li> +<li>间断遍历: 每次命令执行只取部分数据,分多次遍历。</li> +</ul> +<p>迭代器——可在容器(容器可为字典、链表等数据结构)上遍访的接口,设计人员无须关心容器的内容,调用迭代器固定的接口就可遍历数据,在很多高级语言中都有实现。</p> +<p>字典迭代器主要用于迭代字典这个数据结构中的数据,既然是迭代字典中的数据,必然会出现一个问题,迭代过程中,如果发生了数据增删,则可能导致字典触发rehash操作,或迭代开始时字典正在进行rehash操作,从而导致一条数据可能多次遍历到。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">typedef</span> <span style="color:#00a8c8">struct</span> <span style="color:#111">dictIterator</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">long</span> <span style="color:#111">index</span><span style="color:#111">;</span> <span style="color:#75715e">// 迭代hash中的索引值 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#75715e">// safe 为1表示是安全迭代器,可以在add,find等rehash场景中使用 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#00a8c8">int</span> <span style="color:#111">table</span><span style="color:#111">,</span> <span style="color:#111">safe</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">// entry 当前读取节点,nextEntry entry 节点的next字段 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">entry</span><span style="color:#111">,</span> <span style="color:#f92672">*</span><span style="color:#111">nextEntry</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* unsafe iterator fingerprint for misuse detection. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#111">fingerprint</span><span style="color:#111">;</span><span style="color:#75715e">// 字典指纹,字典发生改变随之改变 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span><span style="color:#111">}</span> <span style="color:#111">dictIterator</span><span style="color:#111">;</span> +</span></span></code></pre></div><p>fingerprint字段是一个64位的整数,表示在给定时间内字典的状态。在这里称其为字典的指纹,因为该字段的值为字典(dict结构体)中所有字段值组合在一起生成的Hash值,所以当字典中数据发生任何变化时,其值都会不同,生成算法可参见源码dict.c文件中的dictFingerprint函数。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#75715e">/* A fingerprint is a 64 bit number that represents the state of the dictionary +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * at a given time, it&#39;s just a few dict properties xored together. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * When an unsafe iterator is initialized, we get the dict fingerprint, and check +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * the fingerprint again when the iterator is released. +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * If the two fingerprints are different it means that the user of the iterator +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * performed forbidden operations against the dictionary while iterating. */</span> +</span></span><span style="display:flex;"><span><span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#75af00">dictFingerprint</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">unsigned</span> <span style="color:#00a8c8">long</span> <span style="color:#00a8c8">long</span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">6</span><span style="color:#111">],</span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">int</span> <span style="color:#111">j</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">2</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">0</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">3</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#00a8c8">long</span><span style="color:#111">)</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_table</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">4</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_size_exp</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#ae81ff">5</span><span style="color:#111">]</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ht_used</span><span style="color:#111">[</span><span style="color:#ae81ff">1</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* We hash N integers by summing every successive integer with the integer +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * hashing of the previous sum. Basically: +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * Result = hash(hash(hash(int1)+int2)+int3) ... +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * This way the same set of integers in a different order will (likely) hash +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"> * to a different number. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">for</span> <span style="color:#111">(</span><span style="color:#111">j</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> <span style="color:#111">j</span> <span style="color:#f92672">&lt;</span> <span style="color:#ae81ff">6</span><span style="color:#111">;</span> <span style="color:#111">j</span><span style="color:#f92672">++</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">+=</span> <span style="color:#111">integers</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">];</span> +</span></span><span style="display:flex;"><span> <span style="color:#75715e">/* For the hashing step we use Tomas Wang&#39;s 64 bit integer hash. */</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#f92672">~</span><span style="color:#111">hash</span><span style="color:#111">)</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">21</span><span style="color:#111">);</span> <span style="color:#75715e">// hash = (hash &lt;&lt; 21) - hash - 1; +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">24</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">3</span><span style="color:#111">))</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">8</span><span style="color:#111">);</span> <span style="color:#75715e">// hash * 265 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">14</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">2</span><span style="color:#111">))</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">4</span><span style="color:#111">);</span> <span style="color:#75715e">// hash * 21 +</span></span></span><span style="display:flex;"><span><span style="color:#75715e"></span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">^</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&gt;&gt;</span> <span style="color:#ae81ff">28</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">hash</span> <span style="color:#f92672">=</span> <span style="color:#111">hash</span> <span style="color:#f92672">+</span> <span style="color:#111">(</span><span style="color:#111">hash</span> <span style="color:#f92672">&lt;&lt;</span> <span style="color:#ae81ff">31</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">hash</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div><p>根据迭代器结构中的safe字段,将迭代器分为普通迭代器和安全迭代器:</p> +<ul> +<li>普通迭代器: 只遍历数据</li> +<li>安全迭代器: 遍历的同时删除数据</li> +</ul> +<h4 id="普通迭代器">普通迭代器</h4> +<p>普通迭代器迭代字典中数据时,会对迭代器中fingerprint字段的值作严格的校验,来保证迭代过程中字典结构不发生任何变化,确保读取出的数据不出现重复</p> +<p>当Redis执行部分命令时会使用普通迭代器迭代字典数据,例如sort命令。sort命令主要作用是对给定列表、集合、有序集合的元素进行排序,如果给定的是有序集合,其成员名存储用的是字典,分值存储用的是跳跃表,则执行sort命令读取数据的时候会用到迭代器来遍历整个字典。</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">set</span> <span style="color:#f92672">=</span> <span style="color:#111">((</span><span style="color:#111">zset</span><span style="color:#f92672">*</span><span style="color:#111">)</span><span style="color:#111">sortval</span><span style="color:#f92672">-&gt;</span><span style="color:#111">ptr</span><span style="color:#111">)</span><span style="color:#f92672">-&gt;</span><span style="color:#111">dict</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">di</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictEntry</span> <span style="color:#f92672">*</span><span style="color:#111">setele</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">sds</span> <span style="color:#111">sdsele</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">di</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictGetIterator</span><span style="color:#111">(</span><span style="color:#111">set</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">while</span><span style="color:#111">((</span><span style="color:#111">setele</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictNext</span><span style="color:#111">(</span><span style="color:#111">di</span><span style="color:#111">))</span> <span style="color:#f92672">!=</span> <span style="color:#111">NULL</span><span style="color:#111">)</span> <span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">sdsele</span> <span style="color:#f92672">=</span> <span style="color:#75af00">dictGetKey</span><span style="color:#111">(</span><span style="color:#111">setele</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">obj</span> <span style="color:#f92672">=</span> <span style="color:#75af00">createStringObject</span><span style="color:#111">(</span><span style="color:#111">sdsele</span><span style="color:#111">,</span><span style="color:#75af00">sdslen</span><span style="color:#111">(</span><span style="color:#111">sdsele</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">u</span><span style="color:#111">.</span><span style="color:#111">score</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">vector</span><span style="color:#111">[</span><span style="color:#111">j</span><span style="color:#111">].</span><span style="color:#111">u</span><span style="color:#111">.</span><span style="color:#111">cmpobj</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">j</span><span style="color:#f92672">++</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictReleaseIterator</span><span style="color:#111">(</span><span style="color:#111">di</span><span style="color:#111">);</span> +</span></span></code></pre></div><ol> +<li> +<p>调用dictGetIterator函数初始化一个普通迭代器,此时会把iter-&gt;safe值置为0,表示初始化的迭代器为普通迭代器</p> +<div class="highlight"><pre tabindex="0" style="color:#272822;background-color:#fafafa;-moz-tab-size:4;-o-tab-size:4;tab-size:4;"><code class="language-c" data-lang="c"><span style="display:flex;"><span><span style="color:#00a8c8">void</span> <span style="color:#75af00">dictInitIterator</span><span style="color:#111">(</span><span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">iter</span><span style="color:#111">,</span> <span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">d</span> <span style="color:#f92672">=</span> <span style="color:#111">d</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">table</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">index</span> <span style="color:#f92672">=</span> <span style="color:#f92672">-</span><span style="color:#ae81ff">1</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">safe</span> <span style="color:#f92672">=</span> <span style="color:#ae81ff">0</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">entry</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">iter</span><span style="color:#f92672">-&gt;</span><span style="color:#111">nextEntry</span> <span style="color:#f92672">=</span> <span style="color:#111">NULL</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span><span style="display:flex;"><span> +</span></span><span style="display:flex;"><span><span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#75af00">dictGetIterator</span><span style="color:#111">(</span><span style="color:#111">dict</span> <span style="color:#f92672">*</span><span style="color:#111">d</span><span style="color:#111">)</span> +</span></span><span style="display:flex;"><span><span style="color:#111">{</span> +</span></span><span style="display:flex;"><span> <span style="color:#111">dictIterator</span> <span style="color:#f92672">*</span><span style="color:#111">iter</span> <span style="color:#f92672">=</span> <span style="color:#75af00">zmalloc</span><span style="color:#111">(</span><span style="color:#00a8c8">sizeof</span><span style="color:#111">(</span><span style="color:#f92672">*</span><span style="color:#111">iter</span><span style="color:#111">));</span> +</span></span><span style="display:flex;"><span> <span style="color:#75af00">dictInitIterator</span><span style="color:#111">(</span><span style="color:#111">iter</span><span style="color:#111">,</span> <span style="color:#111">d</span><span style="color:#111">);</span> +</span></span><span style="display:flex;"><span> <span style="color:#00a8c8">return</span> <span style="color:#111">iter</span><span style="color:#111">;</span> +</span></span><span style="display:flex;"><span><span style="color:#111">}</span> +</span></span></code></pre></div></li> +<li> +<p>循环调用dictNext函数依次遍历字典中Hash表的节点,首次遍历时会通过dictFingerprint函数拿到当前字典的指纹值。</p> +</li> +<li></li> +</ol> +<h4 id="安全迭代器">安全迭代器</h4> + diff --git "a/docs/tags/\350\201\224\351\202\246\345\255\246\344\271\240/index.html" "b/docs/tags/\350\201\224\351\202\246\345\255\246\344\271\240/index.html" index a2d6af3..b0f8168 100644 --- "a/docs/tags/\350\201\224\351\202\246\345\255\246\344\271\240/index.html" +++ "b/docs/tags/\350\201\224\351\202\246\345\255\246\344\271\240/index.html" @@ -370,7 +370,7 @@

    Built with Hugo and theme Tokiwa.
    - 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git "a/docs/tags/\350\201\224\351\202\246\345\255\246\344\271\240/index.xml" "b/docs/tags/\350\201\224\351\202\246\345\255\246\344\271\240/index.xml" index 0f35df0..5851e88 100644 --- "a/docs/tags/\350\201\224\351\202\246\345\255\246\344\271\240/index.xml" +++ "b/docs/tags/\350\201\224\351\202\246\345\255\246\344\271\240/index.xml" @@ -179,6 +179,8 @@ discriminative aggregation (<strong>判别聚合</strong>)</p> <h4 id="算法流程">算法流程</h4> <p><img src="https://s2.loli.net/2022/11/15/kxSZgVusbUM29hY.png" alt=""></p> <p>每轮开始时,服务器先检查客户端的模型,根据给定的超参数$\tau$和滞后容忍算法来分配模型。服务器收集上传的更新,错过上次更新的节点会被优先采集。等采集到的更新满足预先设置的标准后,执行三步合并,然后更新缓存状态。</p> +<h2 id="iot-22a-blockchain-based-model-migration-approach-for-secure-and-sustainable-federated-learning-in-iot-systems">IOT ‘22《A Blockchain-based Model Migration Approach for Secure and Sustainable Federated Learning in IoT Systems》</h2> +<h3 id="背景-2">背景</h3> diff --git "a/docs/tags/\350\256\272\346\226\207\351\230\205\350\257\273/index.html" "b/docs/tags/\350\256\272\346\226\207\351\230\205\350\257\273/index.html" index 2b5ad13..6e13439 100644 --- "a/docs/tags/\350\256\272\346\226\207\351\230\205\350\257\273/index.html" +++ "b/docs/tags/\350\256\272\346\226\207\351\230\205\350\257\273/index.html" @@ -529,7 +529,7 @@

    Built with Hugo and theme Tokiwa.
    - 19 pages, 18926 words in total. + 19 pages, 23968 words in total. diff --git "a/docs/tags/\350\256\272\346\226\207\351\230\205\350\257\273/index.xml" "b/docs/tags/\350\256\272\346\226\207\351\230\205\350\257\273/index.xml" index 6f25958..b15572d 100644 --- "a/docs/tags/\350\256\272\346\226\207\351\230\205\350\257\273/index.xml" +++ "b/docs/tags/\350\256\272\346\226\207\351\230\205\350\257\273/index.xml" @@ -23,6 +23,25 @@ <p>详细的内容见 <a href="https://lzphi.cn/2020/12/20/2020-12-17-Tangle-%E7%99%BD%E7%9A%AE%E4%B9%A6/">Tangle白皮书中文版</a></p> <p><strong>tangle</strong> 是 <strong>IOTA</strong> 所用的技术,为物联网和小额支付提供支持。不同于常见的区块链,它使用一个DAG(有向无环图)作为结构,这里称为Tangle。</p> <p>传统区块链系统的单链结构在交易认证,吞吐量,资源消耗等方面存在缺陷,DAG结构的区块链是一个有效的解决方案。</p> +<h2 id="fabric-白皮书">Fabric 白皮书</h2> +<p>Hyperledger Fabric 是 Linux 基金会 的 一个项目,是Hyperledger下面的一个子项目。作为一个开源联盟链,被很多项目应用。</p> +<p>它的主要特点是模块化的共识机制,相对高性能,和可以使用常规语言编写智能合约(golang)。</p> +<h3 id="概念">概念</h3> +<h4 id="联盟链">联盟链</h4> +<p>文中划分联盟链和公链的标准是: <strong>是否发币和节点身份是否可知</strong></p> +<p>状态机复制(SMR)是建设弹性应用众所周知的方式,但是如果我们把运行在区块链上的智能合约看作一种分布式应用,与传统的SMR区别在于:</p> +<ul> +<li>许多应用并发运行</li> +<li>这些应用可以被任何人动态地部署</li> +<li>这些应用的代码是不被信任的,可能有恶意</li> +</ul> +<h4 id="order-execute">order-execute</h4> +<p>现有的大部分可以运行智能合约的区块链遵循SMR实现一种order-execute的架构: 节点先将交易排序再将它们广播给其他节点,然后每个节点顺序执行。</p> +<p><img src="https://s2.loli.net/2022/12/06/B4Ns3GZAKl8dIXT.png" alt=""></p> +<p>所有节点对所有交易的顺序执行限制了性能,并且需要采取复杂的措施来防止源自不受信任的合约(例如在以太坊中使用“gas”计算运行时)的针对平台的拒绝服务攻击;智能合约很难做到并发。</p> +<p>最大的限制是交易必须是确定的,这就使得不能使用常规编程语言来实现,必须使用特定的语言。</p> +<h4 id="execute-order-validate">execute-order-validate</h4> +<p><img src="https://s2.loli.net/2022/12/07/jDBxcLmYrfXSnbl.png" alt=""></p> @@ -398,6 +417,8 @@ discriminative aggregation (<strong>判别聚合</strong>)</p> <h4 id="算法流程">算法流程</h4> <p><img src="https://s2.loli.net/2022/11/15/kxSZgVusbUM29hY.png" alt=""></p> <p>每轮开始时,服务器先检查客户端的模型,根据给定的超参数$\tau$和滞后容忍算法来分配模型。服务器收集上传的更新,错过上次更新的节点会被优先采集。等采集到的更新满足预先设置的标准后,执行三步合并,然后更新缓存状态。</p> +<h2 id="iot-22a-blockchain-based-model-migration-approach-for-secure-and-sustainable-federated-learning-in-iot-systems">IOT ‘22《A Blockchain-based Model Migration Approach for Secure and Sustainable Federated Learning in IoT Systems》</h2> +<h3 id="背景-2">背景</h3>