/* $NetBSD: rbt.c,v 1.1.2.2 2024/02/24 13:07:00 martin Exp $ */ /* * Copyright (C) Internet Systems Consortium, Inc. ("ISC") * * SPDX-License-Identifier: MPL-2.0 * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, you can obtain one at https://mozilla.org/MPL/2.0/. * * See the COPYRIGHT file distributed with this work for additional * information regarding copyright ownership. */ /*! \file */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*% * This define is so dns/name.h (included by dns/fixedname.h) uses more * efficient macro calls instead of functions for a few operations. */ #define DNS_NAME_USEINLINE 1 #define ALIGNMENT_SIZE 8U /* see lib/isc/mem.c */ #include #include #include #include #include #include #define CHECK(x) \ do { \ result = (x); \ if (result != ISC_R_SUCCESS) \ goto cleanup; \ } while (0) #define RBT_MAGIC ISC_MAGIC('R', 'B', 'T', '+') #define VALID_RBT(rbt) ISC_MAGIC_VALID(rbt, RBT_MAGIC) /* * XXXDCL Since parent pointers were added in again, I could remove all of the * chain junk, and replace with dns_rbt_firstnode, _previousnode, _nextnode, * _lastnode. This would involve pretty major change to the API. */ #define CHAIN_MAGIC ISC_MAGIC('0', '-', '0', '-') #define VALID_CHAIN(chain) ISC_MAGIC_VALID(chain, CHAIN_MAGIC) #define RBT_HASH_MIN_BITS 4 #define RBT_HASH_MAX_BITS 32 #define RBT_HASH_OVERCOMMIT 3 #define RBT_HASH_BUCKETSIZE 4096 /* FIXME: What would be a good value here? */ #ifdef RBT_MEM_TEST #undef RBT_HASH_SIZE #define RBT_HASH_SIZE 2 /*%< To give the reallocation code a workout. */ #endif /* ifdef RBT_MEM_TEST */ #define GOLDEN_RATIO_32 0x61C88647 #define HASHSIZE(bits) (UINT64_C(1) << (bits)) static uint32_t hash_32(uint32_t val, unsigned int bits) { REQUIRE(bits <= RBT_HASH_MAX_BITS); /* High bits are more random. */ return (val * GOLDEN_RATIO_32 >> (32 - bits)); } struct dns_rbt { unsigned int magic; isc_mem_t *mctx; dns_rbtnode_t *root; void (*data_deleter)(void *, void *); void *deleter_arg; unsigned int nodecount; uint16_t hashbits; uint16_t maxhashbits; dns_rbtnode_t **hashtable; void *mmap_location; }; #define RED 0 #define BLACK 1 /* * This is the header for map-format RBT images. It is populated, * and then written, as the LAST thing done to the file before returning. * Writing this last (with zeros in the header area initially) will ensure * that the header is only valid when the RBT image is also valid. */ typedef struct file_header file_header_t; /* Pad to 32 bytes */ static char FILE_VERSION[32] = "\0"; /* Header length, always the same size regardless of structure size */ #define HEADER_LENGTH 1024 struct file_header { char version1[32]; uint64_t first_node_offset; /* usually 1024 */ /* * information about the system on which the map file was generated * will be used to tell if we can load the map file or not */ uint32_t ptrsize; unsigned int bigendian : 1; /* big or little endian system */ unsigned int rdataset_fixed : 1; /* compiled with * --enable-rrset-fixed */ unsigned int nodecount; /* shadow from rbt structure */ uint64_t crc; char version2[32]; /* repeated; must match version1 */ }; /* * The following declarations are for the serialization of an RBT: * * step one: write out a zeroed header of 1024 bytes * step two: walk the tree in a depth-first, left-right-down order, writing * out the nodes, reserving space as we go, correcting addresses to point * at the proper offset in the file, and setting a flag for each pointer to * indicate that it is a reference to a location in the file, rather than in * memory. * step three: write out the header, adding the information that will be * needed to re-create the tree object itself. * * The RBTDB object will do this three times, once for each of the three * RBT objects it contains. * * Note: 'file' must point an actual open file that can be mmapped * and fseeked, not to a pipe or stream */ static isc_result_t dns_rbt_zero_header(FILE *file); static isc_result_t write_header(FILE *file, dns_rbt_t *rbt, uint64_t first_node_offset, uint64_t crc); static bool match_header_version(file_header_t *header); static isc_result_t serialize_node(FILE *file, dns_rbtnode_t *node, uintptr_t left, uintptr_t right, uintptr_t down, uintptr_t parent, uintptr_t data, uint64_t *crc); static isc_result_t serialize_nodes(FILE *file, dns_rbtnode_t *node, uintptr_t parent, dns_rbtdatawriter_t datawriter, void *writer_arg, uintptr_t *where, uint64_t *crc); #define ADJUST_ADDRESS(address, relative, header) \ if (address != NULL && header != NULL) { \ address += relative * (uintptr_t)header; \ } /* * The following functions allow you to get the actual address of a pointer * without having to use an if statement to check to see if that address is * relative or not */ static dns_rbtnode_t * getparent(dns_rbtnode_t *node, file_header_t *header) { char *adjusted_address = (char *)(node->parent); ADJUST_ADDRESS(adjusted_address, node->parent_is_relative, header); return ((dns_rbtnode_t *)adjusted_address); } static dns_rbtnode_t * getleft(dns_rbtnode_t *node, file_header_t *header) { char *adjusted_address = (char *)(node->left); ADJUST_ADDRESS(adjusted_address, node->left_is_relative, header); return ((dns_rbtnode_t *)adjusted_address); } static dns_rbtnode_t * getright(dns_rbtnode_t *node, file_header_t *header) { char *adjusted_address = (char *)(node->right); ADJUST_ADDRESS(adjusted_address, node->right_is_relative, header); return ((dns_rbtnode_t *)adjusted_address); } static dns_rbtnode_t * getdown(dns_rbtnode_t *node, file_header_t *header) { char *adjusted_address = (char *)(node->down); ADJUST_ADDRESS(adjusted_address, node->down_is_relative, header); return ((dns_rbtnode_t *)adjusted_address); } static dns_rbtnode_t * getdata(dns_rbtnode_t *node, file_header_t *header) { char *adjusted_address = (char *)(node->data); ADJUST_ADDRESS(adjusted_address, node->data_is_relative, header); return ((dns_rbtnode_t *)adjusted_address); } /*% * Elements of the rbtnode structure. */ #define PARENT(node) ((node)->parent) #define LEFT(node) ((node)->left) #define RIGHT(node) ((node)->right) #define DOWN(node) ((node)->down) #define UPPERNODE(node) ((node)->uppernode) #define DATA(node) ((node)->data) #define IS_EMPTY(node) ((node)->data == NULL) #define HASHNEXT(node) ((node)->hashnext) #define HASHVAL(node) ((node)->hashval) #define COLOR(node) ((node)->color) #define NAMELEN(node) ((node)->namelen) #define OLDNAMELEN(node) ((node)->oldnamelen) #define OFFSETLEN(node) ((node)->offsetlen) #define ATTRS(node) ((node)->attributes) #define IS_ROOT(node) ((node)->is_root) #define FINDCALLBACK(node) ((node)->find_callback) #define WANTEMPTYDATA_OR_DATA(options, node) \ ((options & DNS_RBTFIND_EMPTYDATA) != 0 || DATA(node) != NULL) /*% * Structure elements from the rbtdb.c, not * used as part of the rbt.c algorithms. */ #define DIRTY(node) ((node)->dirty) #define WILD(node) ((node)->wild) #define LOCKNUM(node) ((node)->locknum) /*% * The variable length stuff stored after the node has the following * structure. * * <name_data>{1..255}<oldoffsetlen>{1}<offsets>{1..128} * * <name_data> contains the name of the node when it was created. * <oldoffsetlen> contains the length of <offsets> when the node * was created. * <offsets> contains the offsets into name for each label when the node * was created. */ #define NAME(node) ((unsigned char *)((node) + 1)) #define OFFSETS(node) (NAME(node) + OLDNAMELEN(node) + 1) #define OLDOFFSETLEN(node) (OFFSETS(node)[-1]) #define NODE_SIZE(node) \ (sizeof(*node) + OLDNAMELEN(node) + OLDOFFSETLEN(node) + 1) /*% * Color management. */ #define IS_RED(node) ((node) != NULL && (node)->color == RED) #define IS_BLACK(node) ((node) == NULL || (node)->color == BLACK) #define MAKE_RED(node) ((node)->color = RED) #define MAKE_BLACK(node) ((node)->color = BLACK) /*% * Chain management. * * The "ancestors" member of chains were removed, with their job now * being wholly handled by parent pointers (which didn't exist, because * of memory concerns, when chains were first implemented). */ #define ADD_LEVEL(chain, node) \ do { \ INSIST((chain)->level_count < DNS_RBT_LEVELBLOCK); \ (chain)->levels[(chain)->level_count++] = (node); \ } while (0) /*% * The following macros directly access normally private name variables. * These macros are used to avoid a lot of function calls in the critical * path of the tree traversal code. */ static void NODENAME(dns_rbtnode_t *node, dns_name_t *name) { name->length = NAMELEN(node); name->labels = OFFSETLEN(node); name->ndata = NAME(node); name->offsets = OFFSETS(node); name->attributes = ATTRS(node); name->attributes |= DNS_NAMEATTR_READONLY; } #ifdef DEBUG #define inline /* * A little something to help out in GDB. */ dns_name_t Name(dns_rbtnode_t *node); dns_name_t Name(dns_rbtnode_t *node) { dns_name_t name; dns_name_init(&name, NULL); if (node != NULL) { NODENAME(node, &name); } return (name); } static void hexdump(const char *desc, void *blob, size_t size) { char hexdump[BUFSIZ * 2 + 1]; isc_buffer_t b; isc_region_t r; isc_result_t result; size_t bytes; uint8_t *data = blob; fprintf(stderr, "%s: ", desc); do { isc_buffer_init(&b, hexdump, sizeof(hexdump)); r.base = data; r.length = bytes = (size > BUFSIZ) ? BUFSIZ : size; result = isc_hex_totext(&r, 0, "", &b); RUNTIME_CHECK(result == ISC_R_SUCCESS); isc_buffer_putuint8(&b, 0); fprintf(stderr, "%s", hexdump); data += bytes; size -= bytes; } while (size > 0); fprintf(stderr, "\n"); } #endif /* DEBUG */ /* * Upper node is the parent of the root of the passed node's * subtree. The passed node must not be NULL. */ static dns_rbtnode_t * get_upper_node(dns_rbtnode_t *node) { return (UPPERNODE(node)); } static void fixup_uppernodes_helper(dns_rbtnode_t *node, dns_rbtnode_t *uppernode) { if (node == NULL) { return; } UPPERNODE(node) = uppernode; fixup_uppernodes_helper(LEFT(node), uppernode); fixup_uppernodes_helper(RIGHT(node), uppernode); fixup_uppernodes_helper(DOWN(node), node); } /* * This function is used to fixup uppernode members of all dns_rbtnodes * after deserialization. */ static void fixup_uppernodes(dns_rbt_t *rbt) { fixup_uppernodes_helper(rbt->root, NULL); } size_t dns__rbtnode_getdistance(dns_rbtnode_t *node) { size_t nodes = 1; while (node != NULL) { if (IS_ROOT(node)) { break; } nodes++; node = PARENT(node); } return (nodes); } /* * Forward declarations. */ static isc_result_t create_node(isc_mem_t *mctx, const dns_name_t *name, dns_rbtnode_t **nodep); static isc_result_t inithash(dns_rbt_t *rbt); static void hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name); static void unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *node); static uint32_t rehash_bits(dns_rbt_t *rbt, size_t newcount); static void rehash(dns_rbt_t *rbt, uint32_t newbits); static void maybe_rehash(dns_rbt_t *rbt, size_t size); static void rotate_left(dns_rbtnode_t *node, dns_rbtnode_t **rootp); static void rotate_right(dns_rbtnode_t *node, dns_rbtnode_t **rootp); static void addonlevel(dns_rbtnode_t *node, dns_rbtnode_t *current, int order, dns_rbtnode_t **rootp); static void deletefromlevel(dns_rbtnode_t *item, dns_rbtnode_t **rootp); static isc_result_t treefix(dns_rbt_t *rbt, void *base, size_t size, dns_rbtnode_t *n, const dns_name_t *name, dns_rbtdatafixer_t datafixer, void *fixer_arg, uint64_t *crc); static void deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, bool unhash, dns_rbtnode_t **nodep); static void printnodename(dns_rbtnode_t *node, bool quoted, FILE *f); static void freenode(dns_rbt_t *rbt, dns_rbtnode_t **nodep); static isc_result_t dns_rbt_zero_header(FILE *file) { /* * Write out a zeroed header as a placeholder. Doing this ensures * that the file will not read while it is partially written, should * writing fail or be interrupted. */ char buffer[HEADER_LENGTH]; isc_result_t result; memset(buffer, 0, HEADER_LENGTH); result = isc_stdio_write(buffer, 1, HEADER_LENGTH, file, NULL); if (result != ISC_R_SUCCESS) { return (result); } result = fflush(file); if (result != ISC_R_SUCCESS) { return (result); } return (ISC_R_SUCCESS); } static isc_once_t once = ISC_ONCE_INIT; static void init_file_version(void) { int n; memset(FILE_VERSION, 0, sizeof(FILE_VERSION)); n = snprintf(FILE_VERSION, sizeof(FILE_VERSION), "RBT Image %s %s", dns_major, dns_mapapi); INSIST(n > 0 && (unsigned int)n < sizeof(FILE_VERSION)); } /* * Write out the real header, including NodeDump version information * and the offset of the first node. * * Any information stored in the rbt object itself should be stored * here. */ static isc_result_t write_header(FILE *file, dns_rbt_t *rbt, uint64_t first_node_offset, uint64_t crc) { file_header_t header; isc_result_t result; off_t location; RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS); memset(&header, 0, sizeof(file_header_t)); memmove(header.version1, FILE_VERSION, sizeof(header.version1)); memmove(header.version2, FILE_VERSION, sizeof(header.version2)); header.first_node_offset = first_node_offset; header.ptrsize = (uint32_t)sizeof(void *); header.bigendian = (1 == htonl(1)) ? 1 : 0; #ifdef DNS_RDATASET_FIXED header.rdataset_fixed = 1; #else /* ifdef DNS_RDATASET_FIXED */ header.rdataset_fixed = 0; #endif /* ifdef DNS_RDATASET_FIXED */ header.nodecount = rbt->nodecount; header.crc = crc; CHECK(isc_stdio_tell(file, &location)); location = dns_rbt_serialize_align(location); CHECK(isc_stdio_seek(file, location, SEEK_SET)); CHECK(isc_stdio_write(&header, 1, sizeof(file_header_t), file, NULL)); CHECK(fflush(file)); /* Ensure we are always at the end of the file. */ CHECK(isc_stdio_seek(file, 0, SEEK_END)); cleanup: return (result); } static bool match_header_version(file_header_t *header) { RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS); if (memcmp(header->version1, FILE_VERSION, sizeof(header->version1)) != 0 || memcmp(header->version2, FILE_VERSION, sizeof(header->version1)) != 0) { return (false); } return (true); } unsigned int dns__rbtnode_namelen(dns_rbtnode_t *node) { dns_name_t current; unsigned int len = 0; REQUIRE(DNS_RBTNODE_VALID(node)); dns_name_init(¤t, NULL); do { if (node != NULL) { NODENAME(node, ¤t); len += current.length; } else { len += 1; break; } node = get_upper_node(node); } while (!dns_name_isabsolute(¤t)); return (len); } static isc_result_t serialize_node(FILE *file, dns_rbtnode_t *node, uintptr_t left, uintptr_t right, uintptr_t down, uintptr_t parent, uintptr_t data, uint64_t *crc) { isc_result_t result; dns_rbtnode_t temp_node; off_t file_position; unsigned char *node_data = NULL; size_t datasize; #ifdef DEBUG dns_name_t nodename; #endif /* ifdef DEBUG */ INSIST(node != NULL); CHECK(isc_stdio_tell(file, &file_position)); file_position = dns_rbt_serialize_align(file_position); CHECK(isc_stdio_seek(file, file_position, SEEK_SET)); temp_node = *node; temp_node.down_is_relative = 0; temp_node.left_is_relative = 0; temp_node.right_is_relative = 0; temp_node.parent_is_relative = 0; temp_node.data_is_relative = 0; temp_node.is_mmapped = 1; /* * If the next node is not NULL, calculate the next node's location * in the file. Note that this will have to change when the data * structure changes, and it also assumes that we always write the * nodes out in list order (which we currently do.) */ if (temp_node.parent != NULL) { temp_node.parent = (dns_rbtnode_t *)(parent); temp_node.parent_is_relative = 1; } if (temp_node.left != NULL) { temp_node.left = (dns_rbtnode_t *)(left); temp_node.left_is_relative = 1; } if (temp_node.right != NULL) { temp_node.right = (dns_rbtnode_t *)(right); temp_node.right_is_relative = 1; } if (temp_node.down != NULL) { temp_node.down = (dns_rbtnode_t *)(down); temp_node.down_is_relative = 1; } if (temp_node.data != NULL) { temp_node.data = (dns_rbtnode_t *)(data); temp_node.data_is_relative = 1; } temp_node.fullnamelen = dns__rbtnode_namelen(node); node_data = (unsigned char *)node + sizeof(dns_rbtnode_t); datasize = NODE_SIZE(node) - sizeof(dns_rbtnode_t); CHECK(isc_stdio_write(&temp_node, 1, sizeof(dns_rbtnode_t), file, NULL)); CHECK(isc_stdio_write(node_data, 1, datasize, file, NULL)); #ifdef DEBUG dns_name_init(&nodename, NULL); NODENAME(node, &nodename); fprintf(stderr, "serialize "); dns_name_print(&nodename, stderr); fprintf(stderr, "\n"); hexdump("node header", (unsigned char *)&temp_node, sizeof(dns_rbtnode_t)); hexdump("node data", node_data, datasize); #endif /* ifdef DEBUG */ isc_crc64_update(crc, (const uint8_t *)&temp_node, sizeof(dns_rbtnode_t)); isc_crc64_update(crc, (const uint8_t *)node_data, datasize); cleanup: return (result); } static isc_result_t serialize_nodes(FILE *file, dns_rbtnode_t *node, uintptr_t parent, dns_rbtdatawriter_t datawriter, void *writer_arg, uintptr_t *where, uint64_t *crc) { uintptr_t left = 0, right = 0, down = 0, data = 0; off_t location = 0, offset_adjust; isc_result_t result; if (node == NULL) { if (where != NULL) { *where = 0; } return (ISC_R_SUCCESS); } /* Reserve space for current node. */ CHECK(isc_stdio_tell(file, &location)); location = dns_rbt_serialize_align(location); CHECK(isc_stdio_seek(file, location, SEEK_SET)); offset_adjust = dns_rbt_serialize_align(location + NODE_SIZE(node)); CHECK(isc_stdio_seek(file, offset_adjust, SEEK_SET)); /* * Serialize the rest of the tree. * * WARNING: A change in the order (from left, right, down) * will break the way the crc hash is computed. */ CHECK(serialize_nodes(file, getleft(node, NULL), location, datawriter, writer_arg, &left, crc)); CHECK(serialize_nodes(file, getright(node, NULL), location, datawriter, writer_arg, &right, crc)); CHECK(serialize_nodes(file, getdown(node, NULL), location, datawriter, writer_arg, &down, crc)); if (node->data != NULL) { off_t ret; CHECK(isc_stdio_tell(file, &ret)); ret = dns_rbt_serialize_align(ret); CHECK(isc_stdio_seek(file, ret, SEEK_SET)); data = ret; datawriter(file, node->data, writer_arg, crc); } /* Seek back to reserved space. */ CHECK(isc_stdio_seek(file, location, SEEK_SET)); /* Serialize the current node. */ CHECK(serialize_node(file, node, left, right, down, parent, data, crc)); /* Ensure we are always at the end of the file. */ CHECK(isc_stdio_seek(file, 0, SEEK_END)); if (where != NULL) { *where = (uintptr_t)location; } cleanup: return (result); } off_t dns_rbt_serialize_align(off_t target) { off_t offset = target % 8; if (offset == 0) { return (target); } else { return (target + 8 - offset); } } isc_result_t dns_rbt_serialize_tree(FILE *file, dns_rbt_t *rbt, dns_rbtdatawriter_t datawriter, void *writer_arg, off_t *offset) { isc_result_t result; off_t header_position, node_position, end_position; uint64_t crc; REQUIRE(file != NULL); CHECK(isc_file_isplainfilefd(fileno(file))); isc_crc64_init(&crc); CHECK(isc_stdio_tell(file, &header_position)); /* Write dummy header */ CHECK(dns_rbt_zero_header(file)); /* Serialize nodes */ CHECK(isc_stdio_tell(file, &node_position)); CHECK(serialize_nodes(file, rbt->root, 0, datawriter, writer_arg, NULL, &crc)); CHECK(isc_stdio_tell(file, &end_position)); if (node_position == end_position) { CHECK(isc_stdio_seek(file, header_position, SEEK_SET)); *offset = 0; return (ISC_R_SUCCESS); } isc_crc64_final(&crc); #ifdef DEBUG hexdump("serializing CRC", (unsigned char *)&crc, sizeof(crc)); #endif /* ifdef DEBUG */ /* Serialize header */ CHECK(isc_stdio_seek(file, header_position, SEEK_SET)); CHECK(write_header(file, rbt, HEADER_LENGTH, crc)); /* Ensure we are always at the end of the file. */ CHECK(isc_stdio_seek(file, 0, SEEK_END)); *offset = dns_rbt_serialize_align(header_position); cleanup: return (result); } #define CONFIRM(a) \ do { \ if (!(a)) { \ result = ISC_R_INVALIDFILE; \ goto cleanup; \ } \ } while (0) static isc_result_t treefix(dns_rbt_t *rbt, void *base, size_t filesize, dns_rbtnode_t *n, const dns_name_t *name, dns_rbtdatafixer_t datafixer, void *fixer_arg, uint64_t *crc) { isc_result_t result = ISC_R_SUCCESS; dns_fixedname_t fixed; dns_name_t nodename, *fullname = NULL; unsigned char *node_data = NULL; dns_rbtnode_t header; size_t nodemax = filesize - sizeof(dns_rbtnode_t); size_t datasize; if (n == NULL) { return (ISC_R_SUCCESS); } #define CHECK_ALIGNMENT(n) \ (((uintptr_t)n & ~((uintptr_t)ALIGNMENT_SIZE - 1)) == (uintptr_t)n) CONFIRM((void *)n >= base); CONFIRM((size_t)((char *)n - (char *)base) <= nodemax); CONFIRM(CHECK_ALIGNMENT(n)); CONFIRM(DNS_RBTNODE_VALID(n)); dns_name_init(&nodename, NULL); NODENAME(n, &nodename); fullname = &nodename; CONFIRM(dns_name_isvalid(fullname)); if (!dns_name_isabsolute(&nodename)) { fullname = dns_fixedname_initname(&fixed); CHECK(dns_name_concatenate(&nodename, name, fullname, NULL)); } /* memorize header contents prior to fixup */ memmove(&header, n, sizeof(header)); if (n->left_is_relative) { CONFIRM(n->left <= (dns_rbtnode_t *)nodemax); n->left = getleft(n, rbt->mmap_location); n->left_is_relative = 0; CONFIRM(CHECK_ALIGNMENT(n->left)); CONFIRM(DNS_RBTNODE_VALID(n->left)); } else { CONFIRM(n->left == NULL); } if (n->right_is_relative) { CONFIRM(n->right <= (dns_rbtnode_t *)nodemax); n->right = getright(n, rbt->mmap_location); n->right_is_relative = 0; CONFIRM(CHECK_ALIGNMENT(n->right)); CONFIRM(DNS_RBTNODE_VALID(n->right)); } else { CONFIRM(n->right == NULL); } if (n->down_is_relative) { CONFIRM(n->down <= (dns_rbtnode_t *)nodemax); n->down = getdown(n, rbt->mmap_location); n->down_is_relative = 0; CONFIRM(n->down > (dns_rbtnode_t *)n); CONFIRM(CHECK_ALIGNMENT(n->down)); CONFIRM(DNS_RBTNODE_VALID(n->down)); } else { CONFIRM(n->down == NULL); } if (n->parent_is_relative) { CONFIRM(n->parent <= (dns_rbtnode_t *)nodemax); n->parent = getparent(n, rbt->mmap_location); n->parent_is_relative = 0; CONFIRM(n->parent < (dns_rbtnode_t *)n); CONFIRM(CHECK_ALIGNMENT(n->parent)); CONFIRM(DNS_RBTNODE_VALID(n->parent)); } else { CONFIRM(n->parent == NULL); } if (n->data_is_relative) { CONFIRM(n->data <= (void *)filesize); n->data = getdata(n, rbt->mmap_location); n->data_is_relative = 0; CONFIRM(n->data > (void *)n); CONFIRM(CHECK_ALIGNMENT(n->data)); } else { CONFIRM(n->data == NULL); } hash_node(rbt, n, fullname); /* a change in the order (from left, right, down) will break hashing*/ if (n->left != NULL) { CHECK(treefix(rbt, base, filesize, n->left, name, datafixer, fixer_arg, crc)); } if (n->right != NULL) { CHECK(treefix(rbt, base, filesize, n->right, name, datafixer, fixer_arg, crc)); } if (n->down != NULL) { CHECK(treefix(rbt, base, filesize, n->down, fullname, datafixer, fixer_arg, crc)); } if (datafixer != NULL && n->data != NULL) { CHECK(datafixer(n, base, filesize, fixer_arg, crc)); } rbt->nodecount++; node_data = (unsigned char *)n + sizeof(dns_rbtnode_t); datasize = NODE_SIZE(n) - sizeof(dns_rbtnode_t); #ifdef DEBUG fprintf(stderr, "deserialize "); dns_name_print(&nodename, stderr); fprintf(stderr, "\n"); hexdump("node header", (unsigned char *)&header, sizeof(dns_rbtnode_t)); hexdump("node data", node_data, datasize); #endif /* ifdef DEBUG */ isc_crc64_update(crc, (const uint8_t *)&header, sizeof(dns_rbtnode_t)); isc_crc64_update(crc, (const uint8_t *)node_data, datasize); cleanup: return (result); } isc_result_t dns_rbt_deserialize_tree(void *base_address, size_t filesize, off_t header_offset, isc_mem_t *mctx, dns_rbtdeleter_t deleter, void *deleter_arg, dns_rbtdatafixer_t datafixer, void *fixer_arg, dns_rbtnode_t **originp, dns_rbt_t **rbtp) { isc_result_t result = ISC_R_SUCCESS; file_header_t *header; dns_rbt_t *rbt = NULL; uint64_t crc; unsigned int host_big_endian; REQUIRE(originp == NULL || *originp == NULL); REQUIRE(rbtp != NULL && *rbtp == NULL); isc_crc64_init(&crc); CHECK(dns_rbt_create(mctx, deleter, deleter_arg, &rbt)); rbt->mmap_location = base_address; header = (file_header_t *)((char *)base_address + header_offset); if (!match_header_version(header)) { result = ISC_R_INVALIDFILE; goto cleanup; } #ifdef DNS_RDATASET_FIXED if (header->rdataset_fixed != 1) { result = ISC_R_INVALIDFILE; goto cleanup; } #else /* ifdef DNS_RDATASET_FIXED */ if (header->rdataset_fixed != 0) { result = ISC_R_INVALIDFILE; goto cleanup; } #endif /* ifdef DNS_RDATASET_FIXED */ if (header->ptrsize != (uint32_t)sizeof(void *)) { result = ISC_R_INVALIDFILE; goto cleanup; } host_big_endian = (1 == htonl(1)); if (header->bigendian != host_big_endian) { result = ISC_R_INVALIDFILE; goto cleanup; } /* Copy other data items from the header into our rbt. */ rbt->root = (dns_rbtnode_t *)((char *)base_address + header_offset + header->first_node_offset); if ((header->nodecount * sizeof(dns_rbtnode_t)) > filesize) { result = ISC_R_INVALIDFILE; goto cleanup; } maybe_rehash(rbt, header->nodecount); CHECK(treefix(rbt, base_address, filesize, rbt->root, dns_rootname, datafixer, fixer_arg, &crc)); isc_crc64_final(&crc); #ifdef DEBUG hexdump("deserializing CRC", (unsigned char *)&crc, sizeof(crc)); #endif /* ifdef DEBUG */ /* Check file hash */ if (header->crc != crc) { result = ISC_R_INVALIDFILE; goto cleanup; } if (header->nodecount != rbt->nodecount) { result = ISC_R_INVALIDFILE; goto cleanup; } fixup_uppernodes(rbt); *rbtp = rbt; if (originp != NULL) { *originp = rbt->root; } cleanup: if (result != ISC_R_SUCCESS && rbt != NULL) { rbt->root = NULL; rbt->nodecount = 0; dns_rbt_destroy(&rbt); } return (result); } /* * Initialize a red/black tree of trees. */ isc_result_t dns_rbt_create(isc_mem_t *mctx, dns_rbtdeleter_t deleter, void *deleter_arg, dns_rbt_t **rbtp) { isc_result_t result; dns_rbt_t *rbt; REQUIRE(mctx != NULL); REQUIRE(rbtp != NULL && *rbtp == NULL); REQUIRE(deleter == NULL ? deleter_arg == NULL : 1); rbt = isc_mem_get(mctx, sizeof(*rbt)); rbt->mctx = NULL; isc_mem_attach(mctx, &rbt->mctx); rbt->data_deleter = deleter; rbt->deleter_arg = deleter_arg; rbt->root = NULL; rbt->nodecount = 0; rbt->hashtable = NULL; rbt->hashbits = 0; rbt->maxhashbits = RBT_HASH_MAX_BITS; rbt->mmap_location = NULL; result = inithash(rbt); if (result != ISC_R_SUCCESS) { isc_mem_putanddetach(&rbt->mctx, rbt, sizeof(*rbt)); return (result); } rbt->magic = RBT_MAGIC; *rbtp = rbt; return (ISC_R_SUCCESS); } /* * Deallocate a red/black tree of trees. */ void dns_rbt_destroy(dns_rbt_t **rbtp) { RUNTIME_CHECK(dns_rbt_destroy2(rbtp, 0) == ISC_R_SUCCESS); } isc_result_t dns_rbt_destroy2(dns_rbt_t **rbtp, unsigned int quantum) { dns_rbt_t *rbt; REQUIRE(rbtp != NULL && VALID_RBT(*rbtp)); rbt = *rbtp; deletetreeflat(rbt, quantum, false, &rbt->root); if (rbt->root != NULL) { return (ISC_R_QUOTA); } *rbtp = NULL; INSIST(rbt->nodecount == 0); rbt->mmap_location = NULL; if (rbt->hashtable != NULL) { size_t size = HASHSIZE(rbt->hashbits) * sizeof(dns_rbtnode_t *); isc_mem_put(rbt->mctx, rbt->hashtable, size); } rbt->magic = 0; isc_mem_putanddetach(&rbt->mctx, rbt, sizeof(*rbt)); return (ISC_R_SUCCESS); } unsigned int dns_rbt_nodecount(dns_rbt_t *rbt) { REQUIRE(VALID_RBT(rbt)); return (rbt->nodecount); } size_t dns_rbt_hashsize(dns_rbt_t *rbt) { REQUIRE(VALID_RBT(rbt)); return (1 << rbt->hashbits); } isc_result_t dns_rbt_adjusthashsize(dns_rbt_t *rbt, size_t size) { REQUIRE(VALID_RBT(rbt)); if (size > 0) { /* * Setting a new, finite size limit was requested for the RBT. * Estimate how many hash table slots are needed for the * requested size and how many bits would be needed to index * those hash table slots, then rehash the RBT if necessary. * Note that the hash table can only grow, it is not shrunk if * the requested size limit is lower than the current one. */ size_t newsize = size / RBT_HASH_BUCKETSIZE; rbt->maxhashbits = rehash_bits(rbt, newsize); maybe_rehash(rbt, newsize); } else { /* * Setting an infinite size limit was requested for the RBT. * Increase the maximum allowed number of hash table slots to * 2^32, which enables the hash table to grow as nodes are * added to the RBT without immediately preallocating 2^32 hash * table slots. */ rbt->maxhashbits = RBT_HASH_MAX_BITS; } return (ISC_R_SUCCESS); } static isc_result_t chain_name(dns_rbtnodechain_t *chain, dns_name_t *name, bool include_chain_end) { dns_name_t nodename; isc_result_t result = ISC_R_SUCCESS; int i; dns_name_init(&nodename, NULL); if (include_chain_end && chain->end != NULL) { NODENAME(chain->end, &nodename); dns_name_copynf(&nodename, name); } else { dns_name_reset(name); } for (i = (int)chain->level_count - 1; i >= 0; i--) { NODENAME(chain->levels[i], &nodename); result = dns_name_concatenate(name, &nodename, name, NULL); if (result != ISC_R_SUCCESS) { return (result); } } return (result); } static isc_result_t move_chain_to_last(dns_rbtnodechain_t *chain, dns_rbtnode_t *node) { do { /* * Go as far right and then down as much as possible, * as long as the rightmost node has a down pointer. */ while (RIGHT(node) != NULL) { node = RIGHT(node); } if (DOWN(node) == NULL) { break; } ADD_LEVEL(chain, node); node = DOWN(node); } while (1); chain->end = node; return (ISC_R_SUCCESS); } /* * Add 'name' to tree, initializing its data pointer with 'data'. */ isc_result_t dns_rbt_addnode(dns_rbt_t *rbt, const dns_name_t *name, dns_rbtnode_t **nodep) { /* * Does this thing have too many variables or what? */ dns_rbtnode_t **root, *parent, *child, *current, *new_current; dns_name_t *add_name, *new_name, current_name, *prefix, *suffix; dns_fixedname_t fixedcopy, fixedprefix, fixedsuffix, fnewname; dns_offsets_t current_offsets; dns_namereln_t compared; isc_result_t result = ISC_R_SUCCESS; unsigned int level_count; unsigned int common_labels; unsigned int nlabels, hlabels; int order; REQUIRE(VALID_RBT(rbt)); REQUIRE(dns_name_isabsolute(name)); REQUIRE(nodep != NULL && *nodep == NULL); /* * Dear future BIND developer, * * After you have tried attempting to optimize this routine by * using the hashtable and have realized your folly, please * append another cross ("X") below as a warning to the next * future BIND developer: * * Number of victim developers: X * * I wish the past developer had included such a notice. * * Long form: Unlike dns_rbt_findnode(), this function does not * lend itself to be optimized using the hashtable: * * 1. In the subtree where the insertion occurs, this function * needs to have the insertion point and the order where the * lookup terminated (i.e., at the insertion point where left or * right child is NULL). This cannot be determined from the * hashtable, so at least in that subtree, a BST O(log N) lookup * is necessary. * * 2. Our RBT nodes contain not only single labels but label * sequences to optimize space usage. So at every level, we have * to look for a match in the hashtable for all superdomains in * the rest of the name we're searching. This is an O(N) * operation at least, here N being the label size of name, each * of which is a hashtable lookup involving dns_name_equal() * comparisons. */ /* * Create a copy of the name so the original name structure is * not modified. */ add_name = dns_fixedname_initname(&fixedcopy); INSIST(add_name != NULL); dns_name_clone(name, add_name); if (ISC_UNLIKELY(rbt->root == NULL)) { result = create_node(rbt->mctx, add_name, &new_current); if (result == ISC_R_SUCCESS) { rbt->nodecount++; new_current->is_root = 1; UPPERNODE(new_current) = NULL; rbt->root = new_current; *nodep = new_current; hash_node(rbt, new_current, name); } return (result); } level_count = 0; prefix = dns_fixedname_initname(&fixedprefix); suffix = dns_fixedname_initname(&fixedsuffix); INSIST(prefix != NULL); INSIST(suffix != NULL); root = &rbt->root; INSIST(IS_ROOT(*root)); parent = NULL; current = NULL; child = *root; dns_name_init(¤t_name, current_offsets); new_name = dns_fixedname_initname(&fnewname); nlabels = dns_name_countlabels(name); hlabels = 0; do { current = child; NODENAME(current, ¤t_name); compared = dns_name_fullcompare(add_name, ¤t_name, &order, &common_labels); if (compared == dns_namereln_equal) { *nodep = current; result = ISC_R_EXISTS; break; } if (compared == dns_namereln_none) { if (order < 0) { parent = current; child = LEFT(current); } else if (order > 0) { parent = current; child = RIGHT(current); } } else { /* * This name has some suffix in common with the * name at the current node. If the name at * the current node is shorter, that means the * new name should be in a subtree. If the * name at the current node is longer, that means * the down pointer to this tree should point * to a new tree that has the common suffix, and * the non-common parts of these two names should * start a new tree. */ hlabels += common_labels; if (compared == dns_namereln_subdomain) { /* * All of the existing labels are in common, * so the new name is in a subtree. * Whack off the common labels for the * not-in-common part to be searched for * in the next level. */ dns_name_split(add_name, common_labels, add_name, NULL); /* * Follow the down pointer (possibly NULL). */ root = &DOWN(current); INSIST(*root == NULL || (IS_ROOT(*root) && PARENT(*root) == current)); parent = NULL; child = DOWN(current); INSIST(level_count < DNS_RBT_LEVELBLOCK); level_count++; } else { /* * The number of labels in common is fewer * than the number of labels at the current * node, so the current node must be adjusted * to have just the common suffix, and a down * pointer made to a new tree. */ INSIST(compared == dns_namereln_commonancestor || compared == dns_namereln_contains); /* * Ensure the number of levels in the tree * does not exceed the number of logical * levels allowed by DNSSEC. * * XXXDCL need a better error result? */ if (level_count >= DNS_RBT_LEVELBLOCK) { result = ISC_R_NOSPACE; break; } /* * Split the name into two parts, a prefix * which is the not-in-common parts of the * two names and a suffix that is the common * parts of them. */ dns_name_split(¤t_name, common_labels, prefix, suffix); result = create_node(rbt->mctx, suffix, &new_current); if (result != ISC_R_SUCCESS) { break; } /* * Reproduce the tree attributes of the * current node. */ new_current->is_root = current->is_root; if (current->nsec == DNS_RBT_NSEC_HAS_NSEC) { new_current->nsec = DNS_RBT_NSEC_NORMAL; } else { new_current->nsec = current->nsec; } PARENT(new_current) = PARENT(current); LEFT(new_current) = LEFT(current); RIGHT(new_current) = RIGHT(current); COLOR(new_current) = COLOR(current); /* * Fix pointers that were to the current node. */ if (parent != NULL) { if (LEFT(parent) == current) { LEFT(parent) = new_current; } else { RIGHT(parent) = new_current; } } if (LEFT(new_current) != NULL) { PARENT(LEFT(new_current)) = new_current; } if (RIGHT(new_current) != NULL) { PARENT(RIGHT(new_current)) = new_current; } if (*root == current) { *root = new_current; } NAMELEN(current) = prefix->length; OFFSETLEN(current) = prefix->labels; /* * Set up the new root of the next level. * By definition it will not be the top * level tree, so clear DNS_NAMEATTR_ABSOLUTE. */ current->is_root = 1; PARENT(current) = new_current; DOWN(new_current) = current; root = &DOWN(new_current); UPPERNODE(new_current) = UPPERNODE(current); UPPERNODE(current) = new_current; INSIST(level_count < DNS_RBT_LEVELBLOCK); level_count++; LEFT(current) = NULL; RIGHT(current) = NULL; MAKE_BLACK(current); ATTRS(current) &= ~DNS_NAMEATTR_ABSOLUTE; rbt->nodecount++; dns_name_getlabelsequence(name, nlabels - hlabels, hlabels, new_name); hash_node(rbt, new_current, new_name); if (common_labels == dns_name_countlabels(add_name)) { /* * The name has been added by pushing * the not-in-common parts down to * a new level. */ *nodep = new_current; return (ISC_R_SUCCESS); } else { /* * The current node has no data, * because it is just a placeholder. * Its data pointer is already NULL * from create_node()), so there's * nothing more to do to it. */ /* * The not-in-common parts of the new * name will be inserted into the new * level following this loop (unless * result != ISC_R_SUCCESS, which * is tested after the loop ends). */ dns_name_split(add_name, common_labels, add_name, NULL); break; } } } } while (ISC_LIKELY(child != NULL)); if (ISC_LIKELY(result == ISC_R_SUCCESS)) { result = create_node(rbt->mctx, add_name, &new_current); } if (ISC_LIKELY(result == ISC_R_SUCCESS)) { if (*root == NULL) { UPPERNODE(new_current) = current; } else { UPPERNODE(new_current) = PARENT(*root); } addonlevel(new_current, current, order, root); rbt->nodecount++; *nodep = new_current; hash_node(rbt, new_current, name); } return (result); } /* * Add a name to the tree of trees, associating it with some data. */ isc_result_t dns_rbt_addname(dns_rbt_t *rbt, const dns_name_t *name, void *data) { isc_result_t result; dns_rbtnode_t *node; REQUIRE(VALID_RBT(rbt)); REQUIRE(dns_name_isabsolute(name)); node = NULL; result = dns_rbt_addnode(rbt, name, &node); /* * dns_rbt_addnode will report the node exists even when * it does not have data associated with it, but the * dns_rbt_*name functions all behave depending on whether * there is data associated with a node. */ if (result == ISC_R_SUCCESS || (result == ISC_R_EXISTS && DATA(node) == NULL)) { DATA(node) = data; result = ISC_R_SUCCESS; } return (result); } /* * Find the node for "name" in the tree of trees. */ isc_result_t dns_rbt_findnode(dns_rbt_t *rbt, const dns_name_t *name, dns_name_t *foundname, dns_rbtnode_t **node, dns_rbtnodechain_t *chain, unsigned int options, dns_rbtfindcallback_t callback, void *callback_arg) { dns_rbtnode_t *current, *last_compared; dns_rbtnodechain_t localchain; dns_name_t *search_name, current_name, *callback_name; dns_fixedname_t fixedcallbackname, fixedsearchname; dns_namereln_t compared; isc_result_t result, saved_result; unsigned int common_labels; unsigned int hlabels = 0; int order; REQUIRE(VALID_RBT(rbt)); REQUIRE(dns_name_isabsolute(name)); REQUIRE(node != NULL && *node == NULL); REQUIRE((options & (DNS_RBTFIND_NOEXACT | DNS_RBTFIND_NOPREDECESSOR)) != (DNS_RBTFIND_NOEXACT | DNS_RBTFIND_NOPREDECESSOR)); /* * If there is a chain it needs to appear to be in a sane state, * otherwise a chain is still needed to generate foundname and * callback_name. */ if (chain == NULL) { options |= DNS_RBTFIND_NOPREDECESSOR; chain = &localchain; dns_rbtnodechain_init(chain); } else { dns_rbtnodechain_reset(chain); } if (ISC_UNLIKELY(rbt->root == NULL)) { return (ISC_R_NOTFOUND); } /* * Appease GCC about variables it incorrectly thinks are * possibly used uninitialized. */ compared = dns_namereln_none; last_compared = NULL; order = 0; callback_name = dns_fixedname_initname(&fixedcallbackname); /* * search_name is the name segment being sought in each tree level. * By using a fixedname, the search_name will definitely have offsets * for use by any splitting. * By using dns_name_clone, no name data should be copied thanks to * the lack of bitstring labels. */ search_name = dns_fixedname_initname(&fixedsearchname); INSIST(search_name != NULL); dns_name_clone(name, search_name); dns_name_init(¤t_name, NULL); saved_result = ISC_R_SUCCESS; current = rbt->root; while (ISC_LIKELY(current != NULL)) { NODENAME(current, ¤t_name); compared = dns_name_fullcompare(search_name, ¤t_name, &order, &common_labels); /* * last_compared is used as a shortcut to start (or * continue rather) finding the stop-node of the search * when hashing was used (see much below in this * function). */ last_compared = current; if (compared == dns_namereln_equal) { break; } if (compared == dns_namereln_none) { /* * Here, current is pointing at a subtree root * node. We try to find a matching node using * the hashtable. We can get one of 3 results * here: (a) we locate the matching node, (b) we * find a node to which the current node has a * subdomain relation, (c) we fail to find (a) * or (b). */ dns_name_t hash_name; dns_rbtnode_t *hnode; dns_rbtnode_t *up_current; unsigned int nlabels; unsigned int tlabels = 1; uint32_t hash; /* * The case of current not being a subtree root, * that means a left or right pointer was * followed, only happens when the algorithm * fell through to the traditional binary search * because of a bitstring label. Since we * dropped the bitstring support, this should * not happen. */ INSIST(IS_ROOT(current)); nlabels = dns_name_countlabels(search_name); /* * current is the root of the current level, so * its parent is the same as its "up" pointer. */ up_current = PARENT(current); dns_name_init(&hash_name, NULL); hashagain: /* * Compute the hash over the full absolute * name. Look for the smallest suffix match at * this tree level (hlevel), and then at every * iteration, look for the next smallest suffix * match (add another subdomain label to the * absolute name being hashed). */ dns_name_getlabelsequence(name, nlabels - tlabels, hlabels + tlabels, &hash_name); hash = dns_name_fullhash(&hash_name, false); dns_name_getlabelsequence(search_name, nlabels - tlabels, tlabels, &hash_name); /* * Walk all the nodes in the hash bucket pointed * by the computed hash value. */ for (hnode = rbt->hashtable[hash_32(hash, rbt->hashbits)]; hnode != NULL; hnode = hnode->hashnext) { dns_name_t hnode_name; if (ISC_LIKELY(hash != HASHVAL(hnode))) { continue; } /* * This checks that the hashed label * sequence being looked up is at the * same tree level, so that we don't * match a labelsequence from some other * subdomain. */ if (ISC_LIKELY(get_upper_node(hnode) != up_current)) { continue; } dns_name_init(&hnode_name, NULL); NODENAME(hnode, &hnode_name); if (ISC_LIKELY(dns_name_equal(&hnode_name, &hash_name))) { break; } } if (hnode != NULL) { current = hnode; /* * This is an optimization. If hashing found * the right node, the next call to * dns_name_fullcompare() would obviously * return _equal or _subdomain. Determine * which of those would be the case by * checking if the full name was hashed. Then * make it look like dns_name_fullcompare * was called and jump to the right place. */ if (tlabels == nlabels) { compared = dns_namereln_equal; break; } else { common_labels = tlabels; compared = dns_namereln_subdomain; goto subdomain; } } if (tlabels++ < nlabels) { goto hashagain; } /* * All of the labels have been tried against the hash * table. Since we dropped the support of bitstring * labels, the name isn't in the table. */ current = NULL; continue; } else { /* * The names have some common suffix labels. * * If the number in common are equal in length to * the current node's name length, then follow the * down pointer and search in the new tree. */ if (compared == dns_namereln_subdomain) { subdomain: /* * Whack off the current node's common parts * for the name to search in the next level. */ dns_name_split(search_name, common_labels, search_name, NULL); hlabels += common_labels; /* * This might be the closest enclosing name. */ if (WANTEMPTYDATA_OR_DATA(options, current)) { *node = current; } /* * Point the chain to the next level. This * needs to be done before 'current' is pointed * there because the callback in the next * block of code needs the current 'current', * but in the event the callback requests that * the search be stopped then the * DNS_R_PARTIALMATCH code at the end of this * function needs the chain pointed to the * next level. */ ADD_LEVEL(chain, current); /* * The caller may want to interrupt the * downward search when certain special nodes * are traversed. If this is a special node, * the callback is used to learn what the * caller wants to do. */ if (callback != NULL && FINDCALLBACK(current)) { result = chain_name( chain, callback_name, false); if (result != ISC_R_SUCCESS) { dns_rbtnodechain_reset(chain); return (result); } result = (callback)(current, callback_name, callback_arg); if (result != DNS_R_CONTINUE) { saved_result = result; /* * Treat this node as if it * had no down pointer. */ current = NULL; break; } } /* * Finally, head to the next tree level. */ current = DOWN(current); } else { /* * Though there are labels in common, the * entire name at this node is not common * with the search name so the search * name does not exist in the tree. */ INSIST(compared == dns_namereln_commonancestor || compared == dns_namereln_contains); current = NULL; } } } /* * If current is not NULL, NOEXACT is not disallowing exact matches, * and either the node has data or an empty node is ok, return * ISC_R_SUCCESS to indicate an exact match. */ if (current != NULL && (options & DNS_RBTFIND_NOEXACT) == 0 && WANTEMPTYDATA_OR_DATA(options, current)) { /* * Found an exact match. */ chain->end = current; chain->level_matches = chain->level_count; if (foundname != NULL) { result = chain_name(chain, foundname, true); } else { result = ISC_R_SUCCESS; } if (result == ISC_R_SUCCESS) { *node = current; result = saved_result; } else { *node = NULL; } } else { /* * Did not find an exact match (or did not want one). */ if (*node != NULL) { /* * ... but found a partially matching superdomain. * Unwind the chain to the partial match node * to set level_matches to the level above the node, * and then to derive the name. * * chain->level_count is guaranteed to be at least 1 * here because by definition of finding a superdomain, * the chain is pointed to at least the first subtree. */ chain->level_matches = chain->level_count - 1; while (chain->levels[chain->level_matches] != *node) { INSIST(chain->level_matches > 0); chain->level_matches--; } if (foundname != NULL) { unsigned int saved_count = chain->level_count; chain->level_count = chain->level_matches + 1; result = chain_name(chain, foundname, false); chain->level_count = saved_count; } else { result = ISC_R_SUCCESS; } if (result == ISC_R_SUCCESS) { result = DNS_R_PARTIALMATCH; } } else { result = ISC_R_NOTFOUND; } if (current != NULL) { /* * There was an exact match but either * DNS_RBTFIND_NOEXACT was set, or * DNS_RBTFIND_EMPTYDATA was set and the node had no * data. A policy decision was made to set the * chain to the exact match, but this is subject * to change if it becomes apparent that something * else would be more useful. It is important that * this case is handled here, because the predecessor * setting code below assumes the match was not exact. */ INSIST(((options & DNS_RBTFIND_NOEXACT) != 0) || ((options & DNS_RBTFIND_EMPTYDATA) == 0 && DATA(current) == NULL)); chain->end = current; } else if ((options & DNS_RBTFIND_NOPREDECESSOR) != 0) { /* * Ensure the chain points nowhere. */ chain->end = NULL; } else { /* * Since there was no exact match, the chain argument * needs to be pointed at the DNSSEC predecessor of * the search name. */ if (compared == dns_namereln_subdomain) { /* * Attempted to follow a down pointer that was * NULL, which means the searched for name was * a subdomain of a terminal name in the tree. * Since there are no existing subdomains to * order against, the terminal name is the * predecessor. */ INSIST(chain->level_count > 0); INSIST(chain->level_matches < chain->level_count); chain->end = chain->levels[--chain->level_count]; } else { isc_result_t result2; /* * Point current to the node that stopped * the search. * * With the hashing modification that has been * added to the algorithm, the stop node of a * standard binary search is not known. So it * has to be found. There is probably a more * clever way of doing this. * * The assignment of current to NULL when * the relationship is *not* dns_namereln_none, * even though it later gets set to the same * last_compared anyway, is simply to not push * the while loop in one more level of * indentation. */ if (compared == dns_namereln_none) { current = last_compared; } else { current = NULL; } while (current != NULL) { NODENAME(current, ¤t_name); compared = dns_name_fullcompare( search_name, ¤t_name, &order, &common_labels); POST(compared); last_compared = current; /* * Standard binary search movement. */ if (order < 0) { current = LEFT(current); } else { current = RIGHT(current); } } current = last_compared; /* * Reached a point within a level tree that * positively indicates the name is not * present, but the stop node could be either * less than the desired name (order > 0) or * greater than the desired name (order < 0). * * If the stop node is less, it is not * necessarily the predecessor. If the stop * node has a down pointer, then the real * predecessor is at the end of a level below * (not necessarily the next level). * Move down levels until the rightmost node * does not have a down pointer. * * When the stop node is greater, it is * the successor. All the logic for finding * the predecessor is handily encapsulated * in dns_rbtnodechain_prev. In the event * that the search name is less than anything * else in the tree, the chain is reset. * XXX DCL What is the best way for the caller * to know that the search name has * no predecessor? */ if (order > 0) { if (DOWN(current) != NULL) { ADD_LEVEL(chain, current); result2 = move_chain_to_last( chain, DOWN(current)); if (result2 != ISC_R_SUCCESS) { result = result2; } } else { /* * Ah, the pure and simple * case. The stop node is the * predecessor. */ chain->end = current; } } else { INSIST(order < 0); chain->end = current; result2 = dns_rbtnodechain_prev( chain, NULL, NULL); if (result2 == ISC_R_SUCCESS || result2 == DNS_R_NEWORIGIN) { /* Nothing. */ } else if (result2 == ISC_R_NOMORE) { /* * There is no predecessor. */ dns_rbtnodechain_reset(chain); } else { result = result2; } } } } } ENSURE(*node == NULL || DNS_RBTNODE_VALID(*node)); return (result); } /* * Get the data pointer associated with 'name'. */ isc_result_t dns_rbt_findname(dns_rbt_t *rbt, const dns_name_t *name, unsigned int options, dns_name_t *foundname, void **data) { dns_rbtnode_t *node = NULL; isc_result_t result; REQUIRE(data != NULL && *data == NULL); result = dns_rbt_findnode(rbt, name, foundname, &node, NULL, options, NULL, NULL); if (node != NULL && WANTEMPTYDATA_OR_DATA(options, node)) { *data = DATA(node); } else { result = ISC_R_NOTFOUND; } return (result); } /* * Delete a name from the tree of trees. */ isc_result_t dns_rbt_deletename(dns_rbt_t *rbt, const dns_name_t *name, bool recurse) { dns_rbtnode_t *node = NULL; isc_result_t result; REQUIRE(VALID_RBT(rbt)); REQUIRE(dns_name_isabsolute(name)); /* * First, find the node. * * When searching, the name might not have an exact match: * consider a.b.a.com, b.b.a.com and c.b.a.com as the only * elements of a tree, which would make layer 1 a single * node tree of "b.a.com" and layer 2 a three node tree of * a, b, and c. Deleting a.com would find only a partial depth * match in the first layer. Should it be a requirement that * that the name to be deleted have data? For now, it is. * * ->dirty, ->locknum and ->references are ignored; they are * solely the province of rbtdb.c. */ result = dns_rbt_findnode(rbt, name, NULL, &node, NULL, DNS_RBTFIND_NOOPTIONS, NULL, NULL); if (result == ISC_R_SUCCESS) { if (DATA(node) != NULL) { result = dns_rbt_deletenode(rbt, node, recurse); } else { result = ISC_R_NOTFOUND; } } else if (result == DNS_R_PARTIALMATCH) { result = ISC_R_NOTFOUND; } return (result); } /* * Remove a node from the tree of trees. * * NOTE WELL: deletion is *not* symmetric with addition; that is, reversing * a sequence of additions to be deletions will not generally get the * tree back to the state it started in. For example, if the addition * of "b.c" caused the node "a.b.c" to be split, pushing "a" to its own level, * then the subsequent deletion of "b.c" will not cause "a" to be pulled up, * restoring "a.b.c". The RBT *used* to do this kind of rejoining, but it * turned out to be a bad idea because it could corrupt an active nodechain * that had "b.c" as one of its levels -- and the RBT has no idea what * nodechains are in use by callers, so it can't even *try* to helpfully * fix them up (which would probably be doomed to failure anyway). * * Similarly, it is possible to leave the tree in a state where a supposedly * deleted node still exists. The first case of this is obvious; take * the tree which has "b.c" on one level, pointing to "a". Now deleted "b.c". * It was just established in the previous paragraph why we can't pull "a" * back up to its parent level. But what happens when "a" then gets deleted? * "b.c" is left hanging around without data or children. This condition * is actually pretty easy to detect, but ... should it really be removed? * Is a chain pointing to it? An iterator? Who knows! (Note that the * references structure member cannot be looked at because it is private to * rbtdb.) This is ugly and makes me unhappy, but after hours of trying to * make it more aesthetically proper and getting nowhere, this is the way it * is going to stay until such time as it proves to be a *real* problem. * * Finally, for reference, note that the original routine that did node * joining was called join_nodes(). It has been excised, living now only * in the CVS history, but comments have been left behind that point to it just * in case someone wants to muck with this some more. * * The one positive aspect of all of this is that joining used to have a * case where it might fail. Without trying to join, now this function always * succeeds. It still returns isc_result_t, though, so the API wouldn't change. */ isc_result_t dns_rbt_deletenode(dns_rbt_t *rbt, dns_rbtnode_t *node, bool recurse) { dns_rbtnode_t *parent; REQUIRE(VALID_RBT(rbt)); REQUIRE(DNS_RBTNODE_VALID(node)); INSIST(rbt->nodecount != 0); if (DOWN(node) != NULL) { if (recurse) { PARENT(DOWN(node)) = NULL; deletetreeflat(rbt, 0, true, &DOWN(node)); } else { if (DATA(node) != NULL && rbt->data_deleter != NULL) { rbt->data_deleter(DATA(node), rbt->deleter_arg); } DATA(node) = NULL; /* * Since there is at least one node below this one and * no recursion was requested, the deletion is * complete. The down node from this node might be all * by itself on a single level, so join_nodes() could * be used to collapse the tree (with all the caveats * of the comment at the start of this function). * But join_nodes() function has now been removed. */ return (ISC_R_SUCCESS); } } /* * Note the node that points to the level of the node * that is being deleted. If the deleted node is the * top level, parent will be set to NULL. */ parent = get_upper_node(node); /* * This node now has no down pointer, so now it needs * to be removed from this level. */ deletefromlevel(node, parent == NULL ? &rbt->root : &DOWN(parent)); if (DATA(node) != NULL && rbt->data_deleter != NULL) { rbt->data_deleter(DATA(node), rbt->deleter_arg); } unhash_node(rbt, node); #if DNS_RBT_USEMAGIC node->magic = 0; #endif /* if DNS_RBT_USEMAGIC */ isc_refcount_destroy(&node->references); freenode(rbt, &node); /* * This function never fails. */ return (ISC_R_SUCCESS); } void dns_rbt_namefromnode(dns_rbtnode_t *node, dns_name_t *name) { REQUIRE(DNS_RBTNODE_VALID(node)); REQUIRE(name != NULL); REQUIRE(name->offsets == NULL); NODENAME(node, name); } isc_result_t dns_rbt_fullnamefromnode(dns_rbtnode_t *node, dns_name_t *name) { dns_name_t current; isc_result_t result; REQUIRE(DNS_RBTNODE_VALID(node)); REQUIRE(name != NULL); REQUIRE(name->buffer != NULL); dns_name_init(¤t, NULL); dns_name_reset(name); do { INSIST(node != NULL); NODENAME(node, ¤t); result = dns_name_concatenate(name, ¤t, name, NULL); if (result != ISC_R_SUCCESS) { break; } node = get_upper_node(node); } while (!dns_name_isabsolute(name)); return (result); } char * dns_rbt_formatnodename(dns_rbtnode_t *node, char *printname, unsigned int size) { dns_fixedname_t fixedname; dns_name_t *name; isc_result_t result; REQUIRE(DNS_RBTNODE_VALID(node)); REQUIRE(printname != NULL); name = dns_fixedname_initname(&fixedname); result = dns_rbt_fullnamefromnode(node, name); if (result == ISC_R_SUCCESS) { dns_name_format(name, printname, size); } else { snprintf(printname, size, "", dns_result_totext(result)); } return (printname); } static isc_result_t create_node(isc_mem_t *mctx, const dns_name_t *name, dns_rbtnode_t **nodep) { dns_rbtnode_t *node; isc_region_t region; unsigned int labels; size_t nodelen; REQUIRE(name->offsets != NULL); dns_name_toregion(name, ®ion); labels = dns_name_countlabels(name); ENSURE(labels > 0); /* * Allocate space for the node structure, the name, and the offsets. */ nodelen = sizeof(dns_rbtnode_t) + region.length + labels + 1; node = isc_mem_get(mctx, nodelen); memset(node, 0, nodelen); node->is_root = 0; PARENT(node) = NULL; RIGHT(node) = NULL; LEFT(node) = NULL; DOWN(node) = NULL; DATA(node) = NULL; node->is_mmapped = 0; node->down_is_relative = 0; node->left_is_relative = 0; node->right_is_relative = 0; node->parent_is_relative = 0; node->data_is_relative = 0; node->rpz = 0; HASHNEXT(node) = NULL; HASHVAL(node) = 0; ISC_LINK_INIT(node, deadlink); ISC_LINK_INIT(node, prunelink); LOCKNUM(node) = 0; WILD(node) = 0; DIRTY(node) = 0; isc_refcount_init(&node->references, 0); node->find_callback = 0; node->nsec = DNS_RBT_NSEC_NORMAL; MAKE_BLACK(node); /* * The following is stored to make reconstructing a name from the * stored value in the node easy: the length of the name, the number * of labels, whether the name is absolute or not, the name itself, * and the name's offsets table. * * XXX RTH * The offsets table could be made smaller by eliminating the * first offset, which is always 0. This requires changes to * lib/dns/name.c. * * Note: OLDOFFSETLEN *must* be assigned *after* OLDNAMELEN is assigned * as it uses OLDNAMELEN. */ OLDNAMELEN(node) = NAMELEN(node) = region.length; OLDOFFSETLEN(node) = OFFSETLEN(node) = labels; ATTRS(node) = name->attributes; memmove(NAME(node), region.base, region.length); memmove(OFFSETS(node), name->offsets, labels); #if DNS_RBT_USEMAGIC node->magic = DNS_RBTNODE_MAGIC; #endif /* if DNS_RBT_USEMAGIC */ *nodep = node; return (ISC_R_SUCCESS); } /* * Add a node to the hash table */ static void hash_add_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name) { uint32_t hash; REQUIRE(name != NULL); HASHVAL(node) = dns_name_fullhash(name, false); hash = hash_32(HASHVAL(node), rbt->hashbits); HASHNEXT(node) = rbt->hashtable[hash]; rbt->hashtable[hash] = node; } /* * Initialize hash table */ static isc_result_t inithash(dns_rbt_t *rbt) { size_t size; rbt->hashbits = RBT_HASH_MIN_BITS; size = HASHSIZE(rbt->hashbits) * sizeof(dns_rbtnode_t *); rbt->hashtable = isc_mem_get(rbt->mctx, size); memset(rbt->hashtable, 0, size); return (ISC_R_SUCCESS); } static uint32_t rehash_bits(dns_rbt_t *rbt, size_t newcount) { uint32_t newbits = rbt->hashbits; while (newcount >= HASHSIZE(newbits) && newbits < RBT_HASH_MAX_BITS) { newbits += 1; } return (newbits); } /* * Rebuild the hashtable to reduce the load factor */ static void rehash(dns_rbt_t *rbt, uint32_t newbits) { uint32_t oldbits; size_t oldsize; dns_rbtnode_t **oldtable; size_t newsize; REQUIRE(rbt->hashbits <= rbt->maxhashbits); REQUIRE(newbits <= rbt->maxhashbits); oldbits = rbt->hashbits; oldsize = HASHSIZE(oldbits); oldtable = rbt->hashtable; rbt->hashbits = newbits; newsize = HASHSIZE(rbt->hashbits); rbt->hashtable = isc_mem_get(rbt->mctx, newsize * sizeof(dns_rbtnode_t *)); memset(rbt->hashtable, 0, newsize * sizeof(dns_rbtnode_t *)); for (size_t i = 0; i < oldsize; i++) { dns_rbtnode_t *node; dns_rbtnode_t *nextnode; for (node = oldtable[i]; node != NULL; node = nextnode) { uint32_t hash = hash_32(HASHVAL(node), rbt->hashbits); nextnode = HASHNEXT(node); HASHNEXT(node) = rbt->hashtable[hash]; rbt->hashtable[hash] = node; } } isc_mem_put(rbt->mctx, oldtable, oldsize * sizeof(dns_rbtnode_t *)); } static void maybe_rehash(dns_rbt_t *rbt, size_t newcount) { uint32_t newbits = rehash_bits(rbt, newcount); if (rbt->hashbits < newbits && newbits <= rbt->maxhashbits) { rehash(rbt, newbits); } } /* * Add a node to the hash table. Rehash the hashtable if the node count * rises above a critical level. */ static void hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name) { REQUIRE(DNS_RBTNODE_VALID(node)); if (rbt->nodecount >= (HASHSIZE(rbt->hashbits) * RBT_HASH_OVERCOMMIT)) { maybe_rehash(rbt, rbt->nodecount); } hash_add_node(rbt, node, name); } /* * Remove a node from the hash table */ static void unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *node) { uint32_t bucket; dns_rbtnode_t *bucket_node; REQUIRE(DNS_RBTNODE_VALID(node)); bucket = hash_32(HASHVAL(node), rbt->hashbits); bucket_node = rbt->hashtable[bucket]; if (bucket_node == node) { rbt->hashtable[bucket] = HASHNEXT(node); } else { while (HASHNEXT(bucket_node) != node) { INSIST(HASHNEXT(bucket_node) != NULL); bucket_node = HASHNEXT(bucket_node); } HASHNEXT(bucket_node) = HASHNEXT(node); } } static void rotate_left(dns_rbtnode_t *node, dns_rbtnode_t **rootp) { dns_rbtnode_t *child; REQUIRE(DNS_RBTNODE_VALID(node)); REQUIRE(rootp != NULL); child = RIGHT(node); INSIST(child != NULL); RIGHT(node) = LEFT(child); if (LEFT(child) != NULL) { PARENT(LEFT(child)) = node; } LEFT(child) = node; PARENT(child) = PARENT(node); if (IS_ROOT(node)) { *rootp = child; child->is_root = 1; node->is_root = 0; } else { if (LEFT(PARENT(node)) == node) { LEFT(PARENT(node)) = child; } else { RIGHT(PARENT(node)) = child; } } PARENT(node) = child; } static void rotate_right(dns_rbtnode_t *node, dns_rbtnode_t **rootp) { dns_rbtnode_t *child; REQUIRE(DNS_RBTNODE_VALID(node)); REQUIRE(rootp != NULL); child = LEFT(node); INSIST(child != NULL); LEFT(node) = RIGHT(child); if (RIGHT(child) != NULL) { PARENT(RIGHT(child)) = node; } RIGHT(child) = node; PARENT(child) = PARENT(node); if (IS_ROOT(node)) { *rootp = child; child->is_root = 1; node->is_root = 0; } else { if (LEFT(PARENT(node)) == node) { LEFT(PARENT(node)) = child; } else { RIGHT(PARENT(node)) = child; } } PARENT(node) = child; } /* * This is the real workhorse of the insertion code, because it does the * true red/black tree on a single level. */ static void addonlevel(dns_rbtnode_t *node, dns_rbtnode_t *current, int order, dns_rbtnode_t **rootp) { dns_rbtnode_t *child, *root, *parent, *grandparent; dns_name_t add_name, current_name; dns_offsets_t add_offsets, current_offsets; REQUIRE(rootp != NULL); REQUIRE(DNS_RBTNODE_VALID(node) && LEFT(node) == NULL && RIGHT(node) == NULL); REQUIRE(current != NULL); root = *rootp; if (root == NULL) { /* * First node of a level. */ MAKE_BLACK(node); node->is_root = 1; PARENT(node) = current; *rootp = node; return; } child = root; POST(child); dns_name_init(&add_name, add_offsets); NODENAME(node, &add_name); dns_name_init(¤t_name, current_offsets); NODENAME(current, ¤t_name); if (order < 0) { INSIST(LEFT(current) == NULL); LEFT(current) = node; } else { INSIST(RIGHT(current) == NULL); RIGHT(current) = node; } INSIST(PARENT(node) == NULL); PARENT(node) = current; MAKE_RED(node); while (node != root && IS_RED(PARENT(node))) { /* * XXXDCL could do away with separate parent and grandparent * variables. They are vestiges of the days before parent * pointers. However, they make the code a little clearer. */ parent = PARENT(node); grandparent = PARENT(parent); if (parent == LEFT(grandparent)) { child = RIGHT(grandparent); if (child != NULL && IS_RED(child)) { MAKE_BLACK(parent); MAKE_BLACK(child); MAKE_RED(grandparent); node = grandparent; } else { if (node == RIGHT(parent)) { rotate_left(parent, &root); node = parent; parent = PARENT(node); grandparent = PARENT(parent); } MAKE_BLACK(parent); MAKE_RED(grandparent); rotate_right(grandparent, &root); } } else { child = LEFT(grandparent); if (child != NULL && IS_RED(child)) { MAKE_BLACK(parent); MAKE_BLACK(child); MAKE_RED(grandparent); node = grandparent; } else { if (node == LEFT(parent)) { rotate_right(parent, &root); node = parent; parent = PARENT(node); grandparent = PARENT(parent); } MAKE_BLACK(parent); MAKE_RED(grandparent); rotate_left(grandparent, &root); } } } MAKE_BLACK(root); ENSURE(IS_ROOT(root)); *rootp = root; return; } /* * This is the real workhorse of the deletion code, because it does the * true red/black tree on a single level. */ static void deletefromlevel(dns_rbtnode_t *item, dns_rbtnode_t **rootp) { dns_rbtnode_t *child, *sibling, *parent; dns_rbtnode_t *successor; REQUIRE(item != NULL); /* * Verify that the parent history is (apparently) correct. */ INSIST((IS_ROOT(item) && *rootp == item) || (!IS_ROOT(item) && (LEFT(PARENT(item)) == item || RIGHT(PARENT(item)) == item))); child = NULL; if (LEFT(item) == NULL) { if (RIGHT(item) == NULL) { if (IS_ROOT(item)) { /* * This is the only item in the tree. */ *rootp = NULL; return; } } else { /* * This node has one child, on the right. */ child = RIGHT(item); } } else if (RIGHT(item) == NULL) { /* * This node has one child, on the left. */ child = LEFT(item); } else { dns_rbtnode_t *saved_parent, *saved_right; int saved_color; /* * This node has two children, so it cannot be directly * deleted. Find its immediate in-order successor and * move it to this location, then do the deletion at the * old site of the successor. */ successor = RIGHT(item); while (LEFT(successor) != NULL) { successor = LEFT(successor); } /* * The successor cannot possibly have a left child; * if there is any child, it is on the right. */ if (RIGHT(successor) != NULL) { child = RIGHT(successor); } /* * Swap the two nodes; it would be simpler to just replace * the value being deleted with that of the successor, * but this rigamarole is done so the caller has complete * control over the pointers (and memory allocation) of * all of nodes. If just the key value were removed from * the tree, the pointer to the node would be unchanged. */ /* * First, put the successor in the tree location of the * node to be deleted. Save its existing tree pointer * information, which will be needed when linking up * delete to the successor's old location. */ saved_parent = PARENT(successor); saved_right = RIGHT(successor); saved_color = COLOR(successor); if (IS_ROOT(item)) { *rootp = successor; successor->is_root = true; item->is_root = false; } else if (LEFT(PARENT(item)) == item) { LEFT(PARENT(item)) = successor; } else { RIGHT(PARENT(item)) = successor; } PARENT(successor) = PARENT(item); LEFT(successor) = LEFT(item); RIGHT(successor) = RIGHT(item); COLOR(successor) = COLOR(item); if (LEFT(successor) != NULL) { PARENT(LEFT(successor)) = successor; } if (RIGHT(successor) != successor) { PARENT(RIGHT(successor)) = successor; } /* * Now relink the node to be deleted into the * successor's previous tree location. */ INSIST(!IS_ROOT(item)); if (saved_parent == item) { /* * Node being deleted was successor's parent. */ RIGHT(successor) = item; PARENT(item) = successor; } else { LEFT(saved_parent) = item; PARENT(item) = saved_parent; } /* * Original location of successor node has no left. */ LEFT(item) = NULL; RIGHT(item) = saved_right; COLOR(item) = saved_color; } /* * Remove the node by removing the links from its parent. */ if (!IS_ROOT(item)) { if (LEFT(PARENT(item)) == item) { LEFT(PARENT(item)) = child; } else { RIGHT(PARENT(item)) = child; } if (child != NULL) { PARENT(child) = PARENT(item); } } else { /* * This is the root being deleted, and at this point * it is known to have just one child. */ *rootp = child; child->is_root = 1; PARENT(child) = PARENT(item); } /* * Fix color violations. */ if (IS_BLACK(item)) { /* cppcheck-suppress nullPointerRedundantCheck symbolName=item */ parent = PARENT(item); while (child != *rootp && IS_BLACK(child)) { INSIST(child == NULL || !IS_ROOT(child)); if (LEFT(parent) == child) { sibling = RIGHT(parent); if (IS_RED(sibling)) { MAKE_BLACK(sibling); MAKE_RED(parent); rotate_left(parent, rootp); sibling = RIGHT(parent); } INSIST(sibling != NULL); /* cppcheck-suppress nullPointerRedundantCheck * symbolName=sibling */ if (IS_BLACK(LEFT(sibling)) && IS_BLACK(RIGHT(sibling))) { MAKE_RED(sibling); child = parent; } else { if (IS_BLACK(RIGHT(sibling))) { MAKE_BLACK(LEFT(sibling)); MAKE_RED(sibling); rotate_right(sibling, rootp); sibling = RIGHT(parent); } COLOR(sibling) = COLOR(parent); MAKE_BLACK(parent); INSIST(RIGHT(sibling) != NULL); MAKE_BLACK(RIGHT(sibling)); rotate_left(parent, rootp); child = *rootp; } } else { /* * Child is parent's right child. * Everything is done the same as above, * except mirrored. */ sibling = LEFT(parent); if (IS_RED(sibling)) { MAKE_BLACK(sibling); MAKE_RED(parent); rotate_right(parent, rootp); sibling = LEFT(parent); } INSIST(sibling != NULL); /* cppcheck-suppress nullPointerRedundantCheck * symbolName=sibling */ if (IS_BLACK(LEFT(sibling)) && IS_BLACK(RIGHT(sibling))) { MAKE_RED(sibling); child = parent; } else { if (IS_BLACK(LEFT(sibling))) { MAKE_BLACK(RIGHT(sibling)); MAKE_RED(sibling); rotate_left(sibling, rootp); sibling = LEFT(parent); } COLOR(sibling) = COLOR(parent); MAKE_BLACK(parent); INSIST(LEFT(sibling) != NULL); MAKE_BLACK(LEFT(sibling)); rotate_right(parent, rootp); child = *rootp; } } parent = PARENT(child); } if (IS_RED(child)) { MAKE_BLACK(child); } } } static void freenode(dns_rbt_t *rbt, dns_rbtnode_t **nodep) { dns_rbtnode_t *node = *nodep; *nodep = NULL; if (node->is_mmapped == 0) { isc_mem_put(rbt->mctx, node, NODE_SIZE(node)); } rbt->nodecount--; } static void deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, bool unhash, dns_rbtnode_t **nodep) { dns_rbtnode_t *root = *nodep; while (root != NULL) { /* * If there is a left, right or down node, walk into it * and iterate. */ if (LEFT(root) != NULL) { dns_rbtnode_t *node = root; root = LEFT(root); LEFT(node) = NULL; } else if (RIGHT(root) != NULL) { dns_rbtnode_t *node = root; root = RIGHT(root); RIGHT(node) = NULL; } else if (DOWN(root) != NULL) { dns_rbtnode_t *node = root; root = DOWN(root); DOWN(node) = NULL; } else { /* * There are no left, right or down nodes, so we * can free this one and go back to its parent. */ dns_rbtnode_t *node = root; root = PARENT(root); if (rbt->data_deleter != NULL && DATA(node) != NULL) { rbt->data_deleter(DATA(node), rbt->deleter_arg); } if (unhash) { unhash_node(rbt, node); } /* * Note: we don't call unhash_node() here as we * are destroying the complete RBT tree. */ #if DNS_RBT_USEMAGIC node->magic = 0; #endif /* if DNS_RBT_USEMAGIC */ freenode(rbt, &node); if (quantum != 0 && --quantum == 0) { break; } } } *nodep = root; } static size_t getheight_helper(dns_rbtnode_t *node) { size_t dl, dr; size_t this_height, down_height; if (node == NULL) { return (0); } dl = getheight_helper(LEFT(node)); dr = getheight_helper(RIGHT(node)); this_height = ISC_MAX(dl + 1, dr + 1); down_height = getheight_helper(DOWN(node)); return (ISC_MAX(this_height, down_height)); } size_t dns__rbt_getheight(dns_rbt_t *rbt) { return (getheight_helper(rbt->root)); } static bool check_properties_helper(dns_rbtnode_t *node) { if (node == NULL) { return (true); } if (IS_RED(node)) { /* Root nodes must be BLACK. */ if (IS_ROOT(node)) { return (false); } /* Both children of RED nodes must be BLACK. */ if (IS_RED(LEFT(node)) || IS_RED(RIGHT(node))) { return (false); } } /* cppcheck-suppress nullPointerRedundantCheck symbolName=node */ if ((DOWN(node) != NULL) && (!IS_ROOT(DOWN(node)))) { return (false); } if (IS_ROOT(node)) { if ((PARENT(node) != NULL) && (DOWN(PARENT(node)) != node)) { return (false); } if (get_upper_node(node) != PARENT(node)) { return (false); } } /* If node is assigned to the down_ pointer of its parent, it is * a subtree root and must have the flag set. */ if (((!PARENT(node)) || (DOWN(PARENT(node)) == node)) && (!IS_ROOT(node))) { return (false); } /* Repeat tests with this node's children. */ return (check_properties_helper(LEFT(node)) && check_properties_helper(RIGHT(node)) && check_properties_helper(DOWN(node))); } static bool check_black_distance_helper(dns_rbtnode_t *node, size_t *distance) { size_t dl, dr, dd; if (node == NULL) { *distance = 1; return (true); } /* cppcheck-suppress nullPointerRedundantCheck symbolName=node */ if (!check_black_distance_helper(LEFT(node), &dl)) { return (false); } /* cppcheck-suppress nullPointerRedundantCheck symbolName=node */ if (!check_black_distance_helper(RIGHT(node), &dr)) { return (false); } /* cppcheck-suppress nullPointerRedundantCheck symbolName=node */ if (!check_black_distance_helper(DOWN(node), &dd)) { return (false); } /* Left and right side black node counts must match. */ if (dl != dr) { return (false); } if (IS_BLACK(node)) { dl++; } *distance = dl; return (true); } bool dns__rbt_checkproperties(dns_rbt_t *rbt) { size_t dd; if (!check_properties_helper(rbt->root)) { return (false); } /* Path from a given node to all its leaves must contain the * same number of BLACK child nodes. This is done separately * here instead of inside check_properties_helper() as * it would take (n log n) complexity otherwise. */ return (check_black_distance_helper(rbt->root, &dd)); } static void dns_rbt_indent(FILE *f, int depth) { int i; fprintf(f, "%4d ", depth); for (i = 0; i < depth; i++) { fprintf(f, "- "); } } void dns_rbt_printnodeinfo(dns_rbtnode_t *n, FILE *f) { if (n == NULL) { fprintf(f, "Null node\n"); return; } fprintf(f, "Node info for nodename: "); printnodename(n, true, f); fprintf(f, "\n"); fprintf(f, "n = %p\n", n); fprintf(f, "Relative pointers: %s%s%s%s%s\n", (n->parent_is_relative == 1 ? " P" : ""), (n->right_is_relative == 1 ? " R" : ""), (n->left_is_relative == 1 ? " L" : ""), (n->down_is_relative == 1 ? " D" : ""), (n->data_is_relative == 1 ? " T" : "")); fprintf(f, "node lock address = %u\n", n->locknum); fprintf(f, "Parent: %p\n", n->parent); fprintf(f, "Right: %p\n", n->right); fprintf(f, "Left: %p\n", n->left); fprintf(f, "Down: %p\n", n->down); fprintf(f, "Data: %p\n", n->data); } static void printnodename(dns_rbtnode_t *node, bool quoted, FILE *f) { isc_region_t r; dns_name_t name; char buffer[DNS_NAME_FORMATSIZE]; dns_offsets_t offsets; r.length = NAMELEN(node); r.base = NAME(node); dns_name_init(&name, offsets); dns_name_fromregion(&name, &r); dns_name_format(&name, buffer, sizeof(buffer)); if (quoted) { fprintf(f, "\"%s\"", buffer); } else { fprintf(f, "%s", buffer); } } static void print_text_helper(dns_rbtnode_t *root, dns_rbtnode_t *parent, int depth, const char *direction, void (*data_printer)(FILE *, void *), FILE *f) { dns_rbt_indent(f, depth); if (root != NULL) { printnodename(root, true, f); /* * Don't use IS_RED(root) as it tests for 'root != NULL' * and cppcheck produces false positives. */ fprintf(f, " (%s, %s", direction, COLOR(root) == RED ? "RED" : "BLACK"); if ((!IS_ROOT(root) && PARENT(root) != parent) || (IS_ROOT(root) && depth > 0 && DOWN(PARENT(root)) != root)) { fprintf(f, " (BAD parent pointer! -> "); if (PARENT(root) != NULL) { printnodename(PARENT(root), true, f); } else { fprintf(f, "NULL"); } fprintf(f, ")"); } fprintf(f, ")"); if (root->data != NULL && data_printer != NULL) { fprintf(f, " data@%p: ", root->data); data_printer(f, root->data); } fprintf(f, "\n"); depth++; /* * Don't use IS_RED(root) as it tests for 'root != NULL' * and cppcheck produces false positives. */ if (COLOR(root) == RED && IS_RED(LEFT(root))) { fprintf(f, "** Red/Red color violation on left\n"); } print_text_helper(LEFT(root), root, depth, "left", data_printer, f); /* * Don't use IS_RED(root) as cppcheck produces false positives. */ if (COLOR(root) == RED && IS_RED(RIGHT(root))) { fprintf(f, "** Red/Red color violation on right\n"); } print_text_helper(RIGHT(root), root, depth, "right", data_printer, f); print_text_helper(DOWN(root), NULL, depth, "down", data_printer, f); } else { fprintf(f, "NULL (%s)\n", direction); } } void dns_rbt_printtext(dns_rbt_t *rbt, void (*data_printer)(FILE *, void *), FILE *f) { REQUIRE(VALID_RBT(rbt)); print_text_helper(rbt->root, NULL, 0, "root", data_printer, f); } static int print_dot_helper(dns_rbtnode_t *node, unsigned int *nodecount, bool show_pointers, FILE *f) { unsigned int l, r, d; if (node == NULL) { return (0); } l = print_dot_helper(LEFT(node), nodecount, show_pointers, f); r = print_dot_helper(RIGHT(node), nodecount, show_pointers, f); d = print_dot_helper(DOWN(node), nodecount, show_pointers, f); *nodecount += 1; fprintf(f, "node%u[label = \" | ", *nodecount); printnodename(node, false, f); fprintf(f, "|"); if (show_pointers) { fprintf(f, "| n=%p| p=%p", node, PARENT(node)); } fprintf(f, "\"] ["); if (IS_RED(node)) { fprintf(f, "color=red"); } else { fprintf(f, "color=black"); } /* XXXMUKS: verify that IS_ROOT() indicates subtree root and not * forest root. */ if (IS_ROOT(node)) { fprintf(f, ",penwidth=3"); } if (IS_EMPTY(node)) { fprintf(f, ",style=filled,fillcolor=lightgrey"); } fprintf(f, "];\n"); if (LEFT(node) != NULL) { fprintf(f, "\"node%u\":f0 -> \"node%u\":f1;\n", *nodecount, l); } if (DOWN(node) != NULL) { fprintf(f, "\"node%u\":f1 -> \"node%u\":f1 [penwidth=5];\n", *nodecount, d); } if (RIGHT(node) != NULL) { fprintf(f, "\"node%u\":f2 -> \"node%u\":f1;\n", *nodecount, r); } return (*nodecount); } void dns_rbt_printdot(dns_rbt_t *rbt, bool show_pointers, FILE *f) { unsigned int nodecount = 0; REQUIRE(VALID_RBT(rbt)); fprintf(f, "digraph g {\n"); fprintf(f, "node [shape = record,height=.1];\n"); print_dot_helper(rbt->root, &nodecount, show_pointers, f); fprintf(f, "}\n"); } /* * Chain Functions */ void dns_rbtnodechain_init(dns_rbtnodechain_t *chain) { REQUIRE(chain != NULL); /* * Initialize 'chain'. */ chain->end = NULL; chain->level_count = 0; chain->level_matches = 0; memset(chain->levels, 0, sizeof(chain->levels)); chain->magic = CHAIN_MAGIC; } isc_result_t dns_rbtnodechain_current(dns_rbtnodechain_t *chain, dns_name_t *name, dns_name_t *origin, dns_rbtnode_t **node) { isc_result_t result = ISC_R_SUCCESS; REQUIRE(VALID_CHAIN(chain)); if (node != NULL) { *node = chain->end; } if (chain->end == NULL) { return (ISC_R_NOTFOUND); } if (name != NULL) { NODENAME(chain->end, name); if (chain->level_count == 0) { /* * Names in the top level tree are all absolute. * Always make 'name' relative. */ INSIST(dns_name_isabsolute(name)); /* * This is cheaper than dns_name_getlabelsequence(). */ name->labels--; name->length--; name->attributes &= ~DNS_NAMEATTR_ABSOLUTE; } } if (origin != NULL) { if (chain->level_count > 0) { result = chain_name(chain, origin, false); } else { dns_name_copynf(dns_rootname, origin); } } return (result); } isc_result_t dns_rbtnodechain_prev(dns_rbtnodechain_t *chain, dns_name_t *name, dns_name_t *origin) { dns_rbtnode_t *current, *previous, *predecessor; isc_result_t result = ISC_R_SUCCESS; bool new_origin = false; REQUIRE(VALID_CHAIN(chain) && chain->end != NULL); predecessor = NULL; current = chain->end; if (LEFT(current) != NULL) { /* * Moving left one then right as far as possible is the * previous node, at least for this level. */ current = LEFT(current); while (RIGHT(current) != NULL) { current = RIGHT(current); } predecessor = current; } else { /* * No left links, so move toward the root. If at any point on * the way there the link from parent to child is a right * link, then the parent is the previous node, at least * for this level. */ while (!IS_ROOT(current)) { previous = current; current = PARENT(current); if (RIGHT(current) == previous) { predecessor = current; break; } } } if (predecessor != NULL) { /* * Found a predecessor node in this level. It might not * really be the predecessor, however. */ if (DOWN(predecessor) != NULL) { /* * The predecessor is really down at least one level. * Go down and as far right as possible, and repeat * as long as the rightmost node has a down pointer. */ do { /* * XXX DCL Need to do something about origins * here. See whether to go down, and if so * whether it is truly what Bob calls a * new origin. */ ADD_LEVEL(chain, predecessor); predecessor = DOWN(predecessor); /* XXX DCL duplicated from above; clever * way to unduplicate? */ while (RIGHT(predecessor) != NULL) { predecessor = RIGHT(predecessor); } } while (DOWN(predecessor) != NULL); /* XXX DCL probably needs work on the concept */ if (origin != NULL) { new_origin = true; } } } else if (chain->level_count > 0) { /* * Dang, didn't find a predecessor in this level. * Got to the root of this level without having traversed * any right links. Ascend the tree one level; the * node that points to this tree is the predecessor. */ INSIST(chain->level_count > 0 && IS_ROOT(current)); predecessor = chain->levels[--chain->level_count]; /* XXX DCL probably needs work on the concept */ /* * Don't declare an origin change when the new origin is "." * at the top level tree, because "." is declared as the origin * for the second level tree. */ if (origin != NULL && (chain->level_count > 0 || OFFSETLEN(predecessor) > 1)) { new_origin = true; } } if (predecessor != NULL) { chain->end = predecessor; if (new_origin) { result = dns_rbtnodechain_current(chain, name, origin, NULL); if (result == ISC_R_SUCCESS) { result = DNS_R_NEWORIGIN; } } else { result = dns_rbtnodechain_current(chain, name, NULL, NULL); } } else { result = ISC_R_NOMORE; } return (result); } isc_result_t dns_rbtnodechain_down(dns_rbtnodechain_t *chain, dns_name_t *name, dns_name_t *origin) { dns_rbtnode_t *current, *successor; isc_result_t result = ISC_R_SUCCESS; bool new_origin = false; REQUIRE(VALID_CHAIN(chain) && chain->end != NULL); successor = NULL; current = chain->end; if (DOWN(current) != NULL) { /* * Don't declare an origin change when the new origin is "." * at the second level tree, because "." is already declared * as the origin for the top level tree. */ if (chain->level_count > 0 || OFFSETLEN(current) > 1) { new_origin = true; } ADD_LEVEL(chain, current); current = DOWN(current); while (LEFT(current) != NULL) { current = LEFT(current); } successor = current; } if (successor != NULL) { chain->end = successor; /* * It is not necessary to use dns_rbtnodechain_current like * the other functions because this function will never * find a node in the topmost level. This is because the * root level will never be more than one name, and everything * in the megatree is a successor to that node, down at * the second level or below. */ if (name != NULL) { NODENAME(chain->end, name); } if (new_origin) { if (origin != NULL) { result = chain_name(chain, origin, false); } if (result == ISC_R_SUCCESS) { result = DNS_R_NEWORIGIN; } } else { result = ISC_R_SUCCESS; } } else { result = ISC_R_NOMORE; } return (result); } isc_result_t dns_rbtnodechain_nextflat(dns_rbtnodechain_t *chain, dns_name_t *name) { dns_rbtnode_t *current, *previous, *successor; isc_result_t result = ISC_R_SUCCESS; REQUIRE(VALID_CHAIN(chain) && chain->end != NULL); successor = NULL; current = chain->end; if (RIGHT(current) == NULL) { while (!IS_ROOT(current)) { previous = current; current = PARENT(current); if (LEFT(current) == previous) { successor = current; break; } } } else { current = RIGHT(current); while (LEFT(current) != NULL) { current = LEFT(current); } successor = current; } if (successor != NULL) { chain->end = successor; if (name != NULL) { NODENAME(chain->end, name); } result = ISC_R_SUCCESS; } else { result = ISC_R_NOMORE; } return (result); } isc_result_t dns_rbtnodechain_next(dns_rbtnodechain_t *chain, dns_name_t *name, dns_name_t *origin) { dns_rbtnode_t *current, *previous, *successor; isc_result_t result = ISC_R_SUCCESS; bool new_origin = false; REQUIRE(VALID_CHAIN(chain) && chain->end != NULL); successor = NULL; current = chain->end; /* * If there is a level below this node, the next node is the leftmost * node of the next level. */ if (DOWN(current) != NULL) { /* * Don't declare an origin change when the new origin is "." * at the second level tree, because "." is already declared * as the origin for the top level tree. */ if (chain->level_count > 0 || OFFSETLEN(current) > 1) { new_origin = true; } ADD_LEVEL(chain, current); current = DOWN(current); while (LEFT(current) != NULL) { current = LEFT(current); } successor = current; } else if (RIGHT(current) == NULL) { /* * The successor is up, either in this level or a previous one. * Head back toward the root of the tree, looking for any path * that was via a left link; the successor is the node that has * that left link. In the event the root of the level is * reached without having traversed any left links, ascend one * level and look for either a right link off the point of * ascent, or search for a left link upward again, repeating * ascends until either case is true. */ do { while (!IS_ROOT(current)) { previous = current; current = PARENT(current); if (LEFT(current) == previous) { successor = current; break; } } if (successor == NULL) { /* * Reached the root without having traversed * any left pointers, so this level is done. */ if (chain->level_count == 0) { /* * If the tree we are iterating over * was modified since this chain was * initialized in a way that caused * node splits to occur, "current" may * now be pointing to a root node which * appears to be at level 0, but still * has a parent. If that happens, * abort. Otherwise, we are done * looking for a successor as we really * reached the root node on level 0. */ INSIST(PARENT(current) == NULL); break; } current = chain->levels[--chain->level_count]; new_origin = true; if (RIGHT(current) != NULL) { break; } } } while (successor == NULL); } if (successor == NULL && RIGHT(current) != NULL) { current = RIGHT(current); while (LEFT(current) != NULL) { current = LEFT(current); } successor = current; } if (successor != NULL) { /* * If we determine that the current node is the successor to * itself, we will run into an infinite loop, so abort instead. */ INSIST(chain->end != successor); chain->end = successor; /* * It is not necessary to use dns_rbtnodechain_current like * the other functions because this function will never * find a node in the topmost level. This is because the * root level will never be more than one name, and everything * in the megatree is a successor to that node, down at * the second level or below. */ if (name != NULL) { NODENAME(chain->end, name); } if (new_origin) { if (origin != NULL) { result = chain_name(chain, origin, false); } if (result == ISC_R_SUCCESS) { result = DNS_R_NEWORIGIN; } } else { result = ISC_R_SUCCESS; } } else { result = ISC_R_NOMORE; } return (result); } isc_result_t dns_rbtnodechain_first(dns_rbtnodechain_t *chain, dns_rbt_t *rbt, dns_name_t *name, dns_name_t *origin) { isc_result_t result; REQUIRE(VALID_RBT(rbt)); REQUIRE(VALID_CHAIN(chain)); dns_rbtnodechain_reset(chain); chain->end = rbt->root; result = dns_rbtnodechain_current(chain, name, origin, NULL); if (result == ISC_R_SUCCESS) { result = DNS_R_NEWORIGIN; } return (result); } isc_result_t dns_rbtnodechain_last(dns_rbtnodechain_t *chain, dns_rbt_t *rbt, dns_name_t *name, dns_name_t *origin) { isc_result_t result; REQUIRE(VALID_RBT(rbt)); REQUIRE(VALID_CHAIN(chain)); dns_rbtnodechain_reset(chain); result = move_chain_to_last(chain, rbt->root); if (result != ISC_R_SUCCESS) { return (result); } result = dns_rbtnodechain_current(chain, name, origin, NULL); if (result == ISC_R_SUCCESS) { result = DNS_R_NEWORIGIN; } return (result); } void dns_rbtnodechain_reset(dns_rbtnodechain_t *chain) { REQUIRE(VALID_CHAIN(chain)); /* * Free any dynamic storage associated with 'chain', and then * reinitialize 'chain'. */ chain->end = NULL; chain->level_count = 0; chain->level_matches = 0; } void dns_rbtnodechain_invalidate(dns_rbtnodechain_t *chain) { /* * Free any dynamic storage associated with 'chain', and then * invalidate 'chain'. */ dns_rbtnodechain_reset(chain); chain->magic = 0; } /* XXXMUKS: * * - worth removing inline as static functions are inlined automatically * where suitable by modern compilers. * - bump the size of dns_rbt.nodecount to size_t. * - the dumpfile header also contains a nodecount that is unsigned * int. If large files (> 2^32 nodes) are to be supported, the * allocation for this field should be increased. */