/* The size of a ziplist header: two 32 bit integers for the total * bytes count and last item offset. One 16 bit integer for the number * of items field. */ #define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t))
/* Size of the "end of ziplist" entry. Just one byte. */ #define ZIPLIST_END_SIZE (sizeof(uint8_t))
/* We use this function to receive information about a ziplist entry. * Note that this is not how the data is actually encoded, is just what we * get filled by a function in order to operate more easily. */ typedefstructzlentry { /* prevrawlen编码后的长度 */ unsignedint prevrawlensize; /* Bytes used to encode the previous entry len*/ /* prevrawlen的值 */ unsignedint prevrawlen; /* Previous entry len. */ /* encoding的长度 */ unsignedint lensize; /* Bytes used to encode this entry type/len. For example strings have a 1, 2 or 5 bytes header. Integers always use a single byte.*/ /* 数据编码后的长度 */ unsignedint len; /* Bytes used to represent the actual entry. For strings this is just the string length while for integers it is 1, 2, 3, 4, 8 or 0 (for 4 bit immediate) depending on the number range. */ unsignedint headersize; /* prevrawlensize + lensize. */ unsignedchar encoding; /* Set to ZIP_STR_* or ZIP_INT_* depending on the entry encoding. However for 4 bits immediate integers this can assume a range of values and must be range-checked. */ unsignedchar *p; /* Pointer to the very start of the entry, that is, this points to prev-entry-len field. */ } zlentry;
/* Encode the length of the previous entry and write it to "p". This only * uses the larger encoding (required in __ziplistCascadeUpdate). */ intzipStorePrevEntryLengthLarge(unsignedchar *p, unsignedint len){ uint32_t u32; if (p != NULL) { p[0] = ZIP_BIG_PREVLEN; u32 = len; memcpy(p + 1, &u32, sizeof(u32)); memrev32ifbe(p + 1); } return1 + sizeof(uint32_t); }
/* Encode the length of the previous entry and write it to "p". Return the * number of bytes needed to encode this length if "p" is NULL. */ unsignedintzipStorePrevEntryLength(unsignedchar *p, unsignedint len){ if (p == NULL) { return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(uint32_t) + 1; } else { if (len < ZIP_BIG_PREVLEN) { p[0] = len; return1; } else { return zipStorePrevEntryLengthLarge(p, len); } } }
/* Insert item at "p". */ /* zl: 被插入的ziplist对象 * p: 插入的地址 * s: 插入的值 * slen: s的长度 * 首先获取s编码成entry的的信息: prevlen、总长度reqlen * 然后获取要将后续entry往后移动的距离,将其复制过去,并进行级联更新 * 最后填入entry的信息 */ unsignedchar *__ziplistInsert(unsignedchar *zl, unsignedchar *p, unsignedchar *s, unsignedint slen) { size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, newlen; unsignedint prevlensize, prevlen = 0; size_t offset; int nextdiff = 0; unsignedchar encoding = 0; longlong value = 123456789; /* initialized to avoid warning. Using a value that is easy to see if for some reason we use it uninitialized. */ zlentry tail;
/* Find out prevlen for the entry that is inserted. */ if (p[0] != ZIP_END) { /* 获取p指向entry的prevlen */ ZIP_DECODE_PREVLEN(p, prevlensize, prevlen); } else { /* 获取最后一个entry的len */ unsignedchar *ptail = ZIPLIST_ENTRY_TAIL(zl); if (ptail[0] != ZIP_END) { prevlen = zipRawEntryLengthSafe(zl, curlen, ptail); } }
/* 获取reqlen,先获取数据所需编码长度 */ /* See if the entry can be encoded * 尝试转成数字编码,成功则value和encoding转成对应值*/ if (zipTryEncoding(s, slen, &value, &encoding)) { /* 'encoding' is set to the appropriate integer encoding */ reqlen = zipIntSize(encoding); } else { /* 'encoding' is untouched, however zipStoreEntryEncoding will use the * string length to figure out how to encode it. */ reqlen = slen; } /* We need space for both the length of the previous entry and * the length of the payload. */ /* 加上prevlen的长度 */ reqlen += zipStorePrevEntryLength(NULL, prevlen); /* 加上encoding的长度 */ reqlen += zipStoreEntryEncoding(NULL, encoding, slen);
/* When the insert position is not equal to the tail, we need to * make sure that the next entry can hold this entry's length in * its prevlen field. */ int forcelarge = 0; /* zipPrevLenByteDiff 得到表示reqlen所需的prevlenszie - p.prevlenszie*/ nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p, reqlen) : 0; if (nextdiff == -4 && reqlen < 4) { nextdiff = 0; forcelarge = 1; }
/* Store offset because a realloc may change the address of zl. */ offset = p - zl; newlen = curlen + reqlen + nextdiff; /* 调整分配的空间 */ zl = ziplistResize(zl, newlen); p = zl + offset;
/* Apply memory move when necessary and update tail offset. */ if (p[0] != ZIP_END) { /* Subtract one because of the ZIP_END bytes */ /* 移动后续entry的内容 */ memmove(p + reqlen, p - nextdiff, curlen - offset - 1 + nextdiff);
/* Encode this entry's raw length in the next entry. */ if (forcelarge) zipStorePrevEntryLengthLarge(p + reqlen, reqlen); else zipStorePrevEntryLength(p + reqlen, reqlen);
/* When the tail contains more than one entry, we need to take * "nextdiff" in account as well. Otherwise, a change in the * size of prevlen doesn't have an effect on the *tail* offset. */ assert(zipEntrySafe(zl, newlen, p + reqlen, &tail, 1)); if (p[reqlen + tail.headersize + tail.len] != ZIP_END) { ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)) + nextdiff); } } else { /* This element will be the new tail. */ ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p - zl); }
/* When nextdiff != 0, the raw length of the next entry has changed, so * we need to cascade the update throughout the ziplist */ if (nextdiff != 0) { offset = p - zl; zl = __ziplistCascadeUpdate(zl, p + reqlen); p = zl + offset; }
/* Write the entry */ p += zipStorePrevEntryLength(p, prevlen); p += zipStoreEntryEncoding(p, encoding, slen); if (ZIP_IS_STR(encoding)) { memcpy(p, s, slen); } else { zipSaveInteger(p, value, encoding); } ZIPLIST_INCR_LENGTH(zl, 1); return zl; }
/* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */ /* 在zl指向的ziplist中,从p指向的entry开始删除num个entry */ unsignedchar *__ziplistDelete(unsignedchar *zl, unsignedchar *p, unsignedint num) { unsignedint i, totlen, deleted = 0; size_t offset; int nextdiff = 0; zlentry first, tail; size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl)); /* 将p指向的entry信息填入first */ zipEntry(p, &first); /* no need for "safe" variant since the input pointer validated by the function that returned it. */ /* 将p指向删除m个entry之后的entry地址 */ for (i = 0; p[0] != ZIP_END && i < num; i++) { p += zipRawEntryLengthSafe(zl, zlbytes, p); deleted++; }
assert(p >= first.p); /* 需要删除的长度 */ totlen = p - first.p; if (totlen > 0) { uint32_t set_tail; if (p[0] != ZIP_END) { /* Storing `prevrawlen` in this entry may increase or decrease the * number of bytes required compare to the current `prevrawlen`. * There always is room to store this, because it was previously * stored by an entry that is now being deleted. */ /* nextdiff = first.prevrawlensize - zlentry(p).prevrawlensize */ nextdiff = zipPrevLenByteDiff(p, first.prevrawlen);
/* Note that there is always space when p jumps backward: if * the new previous entry is large, one of the deleted elements * had a 5 bytes prevlen header, so there is for sure at least * 5 bytes free and we need just 4. */ p -= nextdiff; assert(p >= first.p && p < zl + zlbytes - 1); /* 更新p的prevlen */ zipStorePrevEntryLength(p, first.prevrawlen);
/* When the tail contains more than one entry, we need to take * "nextdiff" in account as well. Otherwise, a change in the * size of prevlen doesn't have an effect on the *tail* offset. */ assert(zipEntrySafe(zl, zlbytes, p, &tail, 1)); if (p[tail.headersize + tail.len] != ZIP_END) { set_tail = set_tail + nextdiff; }
/* Move tail to the front of the ziplist */ /* since we asserted that p >= first.p. we know totlen >= 0, * so we know that p > first.p and this is guaranteed not to reach * beyond the allocation, even if the entries lens are corrupted. */ /* 获取需要移动的字节数 */ size_t bytes_to_move = zlbytes - (p - zl) - 1; memmove(first.p, p, bytes_to_move); } else { /* The entire tail was deleted. No need to move memory. */ set_tail = (first.p - zl) - first.prevrawlen; }
/* Update record count */ ZIPLIST_INCR_LENGTH(zl, -deleted);
/* Set the tail offset computed above */ assert(set_tail <= zlbytes - ZIPLIST_END_SIZE); ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(set_tail);
/* When nextdiff != 0, the raw length of the next entry has changed, so * we need to cascade the update throughout the ziplist */ if (nextdiff != 0) zl = __ziplistCascadeUpdate(zl, p); } return zl; }
/* The pointer "p" points to the first entry that does NOT need to be * updated, i.e. consecutive fields MAY need an update. */ unsignedchar *__ziplistCascadeUpdate(unsignedchar *zl, unsignedchar *p) { zlentry cur; size_t prevlen, prevlensize, prevoffset; /* Informat of the last changed entry. */ size_t firstentrylen; /* Used to handle insert at head. */ size_t rawlen, curlen = intrev32ifbe(ZIPLIST_BYTES(zl)); size_t extra = 0, cnt = 0, offset; size_t delta = 4; /* Extra bytes needed to update a entry's prevlen (5-1). */ unsignedchar *tail = zl + intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl));
/* Empty ziplist */ if (p[0] == ZIP_END) return zl; /* 获取p指向entry的信息 */ zipEntry(p, &cur); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */ firstentrylen = prevlen = cur.headersize + cur.len; /* 获取prevlen占的字节数 */ prevlensize = zipStorePrevEntryLength(NULL, prevlen); prevoffset = p - zl; /* 指向第一个需要更新的entry */ p += prevlen;
/* Iterate ziplist to find out how many extra bytes do we need to update it. */ while (p[0] != ZIP_END) { assert(zipEntrySafe(zl, curlen, p, &cur, 0));
/* Abort when "prevlen" has not changed. */ if (cur.prevrawlen == prevlen) break;
/* Abort when entry's "prevlensize" is big enough. */ if (cur.prevrawlensize >= prevlensize) { /* (5,5) (1,1)*/ if (cur.prevrawlensize == prevlensize) { zipStorePrevEntryLength(p, prevlen); } else { /* This would result in shrinking, which we want to avoid. * So, set "prevlen" in the available bytes. */ /* 为了防止继续更新下去,将1个字节的prevlen存成5个字节 */ /* (5,1) */ zipStorePrevEntryLengthLarge(p, prevlen); } break; }
/* cur.prevrawlen means cur is the former head entry. */ /* prevrawlensize只剩下(1,5)了,也就是从1增到5导致的级联更新 */ assert(cur.prevrawlen == 0 || cur.prevrawlen + delta == prevlen);
/* Update prev entry's info and advance the cursor. */ /* 为下一轮循环更新数据 */ rawlen = cur.headersize + cur.len; prevlen = rawlen + delta; prevlensize = zipStorePrevEntryLength(NULL, prevlen); prevoffset = p - zl; p += rawlen; /* extra存放增长的字节数 */ extra += delta; cnt++; }
/* Extra bytes is zero all update has been done(or no need to update). */ /* 不是由prevrawlensize增长导致的级联更新已经结束了 */ if (extra == 0) return zl;
/* Update tail offset after loop. */ if (tail == zl + prevoffset) { /* When the the last entry we need to update is also the tail, update * tail offset unless this is the only entry that was updated (so the * tail offset didn't change). */ /* tail不关心最后一个entry的长度 */ if (extra - delta != 0) { ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe( intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)) + extra - delta); } } else { /* Update the tail offset in cases where the last entry we updated is not the tail. */ ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)) + extra); }
/* Now "p" points at the first unchanged byte in original ziplist, * move data after that to new ziplist. */ offset = p - zl; zl = ziplistResize(zl, curlen + extra); p = zl + offset; memmove(p + extra, p, curlen - offset - 1); p += extra;
/* Iterate all entries that need to be updated tail to head. */ /* 由后往前复制entry,p指向的是第一个不需要更改的entry,prevoffset当前需要修改的entry的偏移位置 */ while (cnt) { zipEntry(zl + prevoffset, &cur); /* no need for "safe" variant since we already iterated on all these entries above. */ rawlen = cur.headersize + cur.len; /* Move entry to tail and reset prevlen. */ /* 减cur.prevrawlensize是因为prevlen不需要复制过去,是需要修改的 */ memmove(p - (rawlen - cur.prevrawlensize), zl + prevoffset + cur.prevrawlensize, rawlen - cur.prevrawlensize); p -= (rawlen + delta); if (cur.prevrawlen == 0) { /* "cur" is the previous head entry, update its prevlen with * firstentrylen. */ zipStorePrevEntryLength(p, firstentrylen); } else { /* An entry's prevlen can only increment 4 bytes. */ zipStorePrevEntryLength(p, cur.prevrawlen + delta); } /* Foward to previous entry. */ prevoffset -= cur.prevrawlen; cnt--; } return zl; }
/* quicklistNode is a 32 byte struct describing a ziplist for a quicklist. * We use bit fields keep the quicklistNode at 32 bytes. * count: 16 bits, max 65536 (max zl bytes is 65k, so max count actually < 32k). * encoding: 2 bits, RAW=1, LZF=2. * container: 2 bits, NONE=1, ZIPLIST=2. * recompress: 1 bit, bool, true if node is temporary decompressed for usage. * attempted_compress: 1 bit, boolean, used for verifying during testing. * extra: 10 bits, free for future use; pads out the remainder of 32 bits */ typedef struct quicklistNode { struct quicklistNode *prev; struct quicklistNode *next; unsigned char *zl; unsigned int sz; /* ziplist size in bytes */ unsigned int count : 16; /* count of items in ziplist */ unsigned int encoding : 2; /* RAW==1 or LZF==2 */ unsigned int container : 2; /* NONE==1 or ZIPLIST==2 */ unsigned int recompress : 1; /* was this node previous compressed? */ unsigned int attempted_compress : 1; /* node can't compress; too small */ unsigned int extra : 10; /* more bits to steal for future usage */ } quicklistNode;
#if UINTPTR_MAX == 0xffffffff /* 32-bit */ # define QL_FILL_BITS 14 # define QL_COMP_BITS 14 # define QL_BM_BITS 4 #elif UINTPTR_MAX == 0xffffffffffffffff /* 64-bit */ # define QL_FILL_BITS 16 # define QL_COMP_BITS 16 # define QL_BM_BITS 4 /* we can encode more, but we rather limit the user since they cause performance degradation. */ #else # error unknown arch bits count #endif
/* quicklist is a 40 byte struct (on 64-bit systems) describing a quicklist. * 'count' is the number of total entries. * 'len' is the number of quicklist nodes. * 'compress' is: 0 if compression disabled, otherwise it's the number * of quicklistNodes to leave uncompressed at ends of quicklist. * 'fill' is the user-requested (or default) fill factor. * 'bookmakrs are an optional feature that is used by realloc this struct, * so that they don't consume memory when not used. */ typedef struct quicklist { quicklistNode *head; quicklistNode *tail; unsigned long count; /* total count of all entries in all ziplists */ unsigned long len; /* number of quicklistNodes */ int fill : QL_FILL_BITS; /* fill factor for individual nodes */ unsigned int compress : QL_COMP_BITS; /* depth of end nodes not to compress;0=off */ unsigned int bookmark_count: QL_BM_BITS; quicklistBookmark bookmarks[]; } quicklist;
/* pop from quicklist and return result in 'data' ptr. Value of 'data' * is the return value of 'saver' function pointer if the data is NOT a number. * * If the quicklist element is a long long, then the return value is returned in * 'sval'. * * Return value of 0 means no elements available. * Return value of 1 means check 'data' and 'sval' for values. * If 'data' is set, use 'data' and 'sz'. Otherwise, use 'sval'. */ int quicklistPopCustom(quicklist *quicklist, int where, unsigned char **data, unsigned int *sz, long long *sval, void *(*saver)(unsigned char *data, unsigned int sz)) { unsigned char *p; unsigned char *vstr; unsigned int vlen; long long vlong; int pos = (where == QUICKLIST_HEAD) ? 0 : -1;
if (quicklist->count == 0) return 0;
if (data) *data = NULL; if (sz) *sz = 0; if (sval) *sval = -123456789;
/* 根据pos去遍历ziplist,获取pos地址 */ p = ziplistIndex(node->zl, pos); /* 根据p的地址,获取值,如果是字符串encoding,则存入vstr,并将长度存入vlen,否则将数字存入vlong */ if (ziplistGet(p, &vstr, &vlen, &vlong)) { if (vstr) { if (data) *data = saver(vstr, vlen); if (sz) *sz = vlen; } else { if (data) *data = NULL; if (sval) *sval = vlong; } /* 将该节点删除 */ quicklistDelIndex(quicklist, node, &p); return 1; } return 0; }
/* Return a malloc'd copy of data passed in */ REDIS_STATIC void *_quicklistSaver(unsigned char *data, unsigned int sz) { unsigned char *vstr; if (data) { vstr = zmalloc(sz); memcpy(vstr, data, sz); return vstr; } return NULL; }
/* Default pop function * * Returns malloc'd value from quicklist */ int quicklistPop(quicklist *quicklist, int where, unsigned char **data, unsigned int *sz, long long *slong) { unsigned char *vstr; unsigned int vlen; long long vlong; if (quicklist->count == 0) return 0; int ret = quicklistPopCustom(quicklist, where, &vstr, &vlen, &vlong, _quicklistSaver); if (data) *data = vstr; if (slong) *slong = vlong; if (sz) *sz = vlen; return ret; }