/* $NetBSD: adb.c,v 1.9.2.2 2024/02/25 15:46:47 martin Exp $ */ /* * Copyright (C) Internet Systems Consortium, Inc. ("ISC") * * SPDX-License-Identifier: MPL-2.0 * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, you can obtain one at https://mozilla.org/MPL/2.0/. * * See the COPYRIGHT file distributed with this work for additional * information regarding copyright ownership. */ /*! \file * * \note * In finds, if task == NULL, no events will be generated, and no events * have been sent. If task != NULL but taskaction == NULL, an event has been * posted but not yet freed. If neither are NULL, no event was posted. * */ #include #include #include #include #include #include #include #include #include #include /* Required for HP/UX (and others?) */ #include #include #include #include #include #include #include #include #include #include #include #include #define DNS_ADB_MAGIC ISC_MAGIC('D', 'a', 'd', 'b') #define DNS_ADB_VALID(x) ISC_MAGIC_VALID(x, DNS_ADB_MAGIC) #define DNS_ADBNAME_MAGIC ISC_MAGIC('a', 'd', 'b', 'N') #define DNS_ADBNAME_VALID(x) ISC_MAGIC_VALID(x, DNS_ADBNAME_MAGIC) #define DNS_ADBNAMEHOOK_MAGIC ISC_MAGIC('a', 'd', 'N', 'H') #define DNS_ADBNAMEHOOK_VALID(x) ISC_MAGIC_VALID(x, DNS_ADBNAMEHOOK_MAGIC) #define DNS_ADBLAMEINFO_MAGIC ISC_MAGIC('a', 'd', 'b', 'Z') #define DNS_ADBLAMEINFO_VALID(x) ISC_MAGIC_VALID(x, DNS_ADBLAMEINFO_MAGIC) #define DNS_ADBENTRY_MAGIC ISC_MAGIC('a', 'd', 'b', 'E') #define DNS_ADBENTRY_VALID(x) ISC_MAGIC_VALID(x, DNS_ADBENTRY_MAGIC) #define DNS_ADBFETCH_MAGIC ISC_MAGIC('a', 'd', 'F', '4') #define DNS_ADBFETCH_VALID(x) ISC_MAGIC_VALID(x, DNS_ADBFETCH_MAGIC) #define DNS_ADBFETCH6_MAGIC ISC_MAGIC('a', 'd', 'F', '6') #define DNS_ADBFETCH6_VALID(x) ISC_MAGIC_VALID(x, DNS_ADBFETCH6_MAGIC) /*! * For type 3 negative cache entries, we will remember that the address is * broken for this long. XXXMLG This is also used for actual addresses, too. * The intent is to keep us from constantly asking about A/AAAA records * if the zone has extremely low TTLs. */ #define ADB_CACHE_MINIMUM 10 /*%< seconds */ #define ADB_CACHE_MAXIMUM 86400 /*%< seconds (86400 = 24 hours) */ #define ADB_ENTRY_WINDOW 1800 /*%< seconds */ /*% * The period in seconds after which an ADB name entry is regarded as stale * and forced to be cleaned up. * TODO: This should probably be configurable at run-time. */ #ifndef ADB_STALE_MARGIN #define ADB_STALE_MARGIN 1800 #endif /* ifndef ADB_STALE_MARGIN */ #define FREE_ITEMS 64 /*%< free count for memory pools */ #define FILL_COUNT 16 /*%< fill count for memory pools */ #define DNS_ADB_INVALIDBUCKET (-1) /*%< invalid bucket address */ #define DNS_ADB_MINADBSIZE (1024U * 1024U) /*%< 1 Megabyte */ typedef ISC_LIST(dns_adbname_t) dns_adbnamelist_t; typedef struct dns_adbnamehook dns_adbnamehook_t; typedef ISC_LIST(dns_adbnamehook_t) dns_adbnamehooklist_t; typedef struct dns_adblameinfo dns_adblameinfo_t; typedef ISC_LIST(dns_adbentry_t) dns_adbentrylist_t; typedef struct dns_adbfetch dns_adbfetch_t; typedef struct dns_adbfetch6 dns_adbfetch6_t; /*% dns adb structure */ struct dns_adb { unsigned int magic; isc_mutex_t lock; isc_mutex_t reflock; /*%< Covers irefcnt, erefcnt */ isc_mutex_t overmemlock; /*%< Covers overmem */ isc_mem_t *mctx; isc_mem_t *hmctx; dns_view_t *view; isc_taskmgr_t *taskmgr; isc_task_t *task; isc_task_t *excl; isc_interval_t tick_interval; int next_cleanbucket; unsigned int irefcnt; unsigned int erefcnt; isc_refcount_t ahrefcnt; isc_refcount_t nhrefcnt; /*! * Bucketized locks and lists for names. * * XXXRTH Have a per-bucket structure that contains all of these? */ unsigned int nnames; isc_mutex_t namescntlock; unsigned int namescnt; dns_adbnamelist_t *names; dns_adbnamelist_t *deadnames; isc_mutex_t *namelocks; bool *name_sd; unsigned int *name_refcnt; /*! * Bucketized locks and lists for entries. * * XXXRTH Have a per-bucket structure that contains all of these? */ unsigned int nentries; isc_mutex_t entriescntlock; unsigned int entriescnt; dns_adbentrylist_t *entries; dns_adbentrylist_t *deadentries; isc_mutex_t *entrylocks; bool *entry_sd; /*%< shutting down */ unsigned int *entry_refcnt; isc_event_t cevent; bool cevent_out; atomic_bool shutting_down; isc_eventlist_t whenshutdown; isc_event_t growentries; bool growentries_sent; isc_event_t grownames; bool grownames_sent; uint32_t quota; uint32_t atr_freq; double atr_low; double atr_high; double atr_discount; }; /* * XXXMLG Document these structures. */ /*% dns_adbname structure */ struct dns_adbname { unsigned int magic; dns_name_t name; dns_adb_t *adb; unsigned int partial_result; unsigned int flags; int lock_bucket; dns_name_t target; isc_stdtime_t expire_target; isc_stdtime_t expire_v4; isc_stdtime_t expire_v6; unsigned int chains; dns_adbnamehooklist_t v4; dns_adbnamehooklist_t v6; dns_adbfetch_t *fetch_a; dns_adbfetch_t *fetch_aaaa; unsigned int fetch_err; unsigned int fetch6_err; dns_adbfindlist_t finds; /* for LRU-based management */ isc_stdtime_t last_used; ISC_LINK(dns_adbname_t) plink; }; /*% The adbfetch structure */ struct dns_adbfetch { unsigned int magic; dns_fetch_t *fetch; dns_rdataset_t rdataset; unsigned int depth; }; /*% * This is a small widget that dangles off a dns_adbname_t. It contains a * pointer to the address information about this host, and a link to the next * namehook that will contain the next address this host has. */ struct dns_adbnamehook { unsigned int magic; dns_adbentry_t *entry; ISC_LINK(dns_adbnamehook_t) plink; }; /*% * This is a small widget that holds qname-specific information about an * address. Currently limited to lameness, but could just as easily be * extended to other types of information about zones. */ struct dns_adblameinfo { unsigned int magic; dns_name_t qname; dns_rdatatype_t qtype; isc_stdtime_t lame_timer; ISC_LINK(dns_adblameinfo_t) plink; }; /*% * An address entry. It holds quite a bit of information about addresses, * including edns state (in "flags"), rtt, and of course the address of * the host. */ struct dns_adbentry { unsigned int magic; int lock_bucket; unsigned int refcnt; unsigned int nh; unsigned int flags; unsigned int srtt; uint16_t udpsize; unsigned int completed; unsigned int timeouts; unsigned char plain; unsigned char plainto; unsigned char edns; unsigned char ednsto; uint8_t mode; atomic_uint_fast32_t quota; atomic_uint_fast32_t active; double atr; isc_sockaddr_t sockaddr; unsigned char *cookie; uint16_t cookielen; isc_stdtime_t expires; isc_stdtime_t lastage; /*%< * A nonzero 'expires' field indicates that the entry should * persist until that time. This allows entries found * using dns_adb_findaddrinfo() to persist for a limited time * even though they are not necessarily associated with a * name. */ ISC_LIST(dns_adblameinfo_t) lameinfo; ISC_LINK(dns_adbentry_t) plink; }; /* * Internal functions (and prototypes). */ static dns_adbname_t * new_adbname(dns_adb_t *, const dns_name_t *); static void free_adbname(dns_adb_t *, dns_adbname_t **); static dns_adbnamehook_t * new_adbnamehook(dns_adb_t *, dns_adbentry_t *); static void free_adbnamehook(dns_adb_t *, dns_adbnamehook_t **); static dns_adblameinfo_t * new_adblameinfo(dns_adb_t *, const dns_name_t *, dns_rdatatype_t); static void free_adblameinfo(dns_adb_t *, dns_adblameinfo_t **); static dns_adbentry_t * new_adbentry(dns_adb_t *); static void free_adbentry(dns_adb_t *, dns_adbentry_t **); static dns_adbfind_t * new_adbfind(dns_adb_t *); static bool free_adbfind(dns_adb_t *, dns_adbfind_t **); static dns_adbaddrinfo_t * new_adbaddrinfo(dns_adb_t *, dns_adbentry_t *, in_port_t); static dns_adbfetch_t * new_adbfetch(dns_adb_t *); static void free_adbfetch(dns_adb_t *, dns_adbfetch_t **); static dns_adbname_t * find_name_and_lock(dns_adb_t *, const dns_name_t *, unsigned int, int *); static dns_adbentry_t * find_entry_and_lock(dns_adb_t *, const isc_sockaddr_t *, int *, isc_stdtime_t); static void dump_adb(dns_adb_t *, FILE *, bool debug, isc_stdtime_t); static void print_dns_name(FILE *, const dns_name_t *); static void print_namehook_list(FILE *, const char *legend, dns_adb_t *adb, dns_adbnamehooklist_t *list, bool debug, isc_stdtime_t now); static void print_find_list(FILE *, dns_adbname_t *); static void print_fetch_list(FILE *, dns_adbname_t *); static bool dec_adb_irefcnt(dns_adb_t *); static void inc_adb_irefcnt(dns_adb_t *); static void inc_adb_erefcnt(dns_adb_t *); static void inc_entry_refcnt(dns_adb_t *, dns_adbentry_t *, bool); static bool dec_entry_refcnt(dns_adb_t *, bool, dns_adbentry_t *, bool, isc_stdtime_t); static void violate_locking_hierarchy(isc_mutex_t *, isc_mutex_t *); static bool clean_namehooks(dns_adb_t *, dns_adbnamehooklist_t *); static void clean_target(dns_adb_t *, dns_name_t *); static void clean_finds_at_name(dns_adbname_t *, isc_eventtype_t, unsigned int); static bool check_expire_namehooks(dns_adbname_t *, isc_stdtime_t); static bool check_expire_entry(dns_adb_t *, dns_adbentry_t **, isc_stdtime_t); static void cancel_fetches_at_name(dns_adbname_t *); static isc_result_t dbfind_name(dns_adbname_t *, isc_stdtime_t, dns_rdatatype_t); static isc_result_t fetch_name(dns_adbname_t *, bool, unsigned int, isc_counter_t *qc, dns_rdatatype_t); static void check_exit(dns_adb_t *); static void destroy(dns_adb_t *); static bool shutdown_names(dns_adb_t *); static bool shutdown_entries(dns_adb_t *); static void link_name(dns_adb_t *, int, dns_adbname_t *); static bool unlink_name(dns_adb_t *, dns_adbname_t *); static void link_entry(dns_adb_t *, int, dns_adbentry_t *); static bool unlink_entry(dns_adb_t *, dns_adbentry_t *); static bool kill_name(dns_adbname_t **, isc_eventtype_t); static void water(void *, int); static void dump_entry(FILE *, dns_adb_t *, dns_adbentry_t *, bool, isc_stdtime_t); static void adjustsrtt(dns_adbaddrinfo_t *addr, unsigned int rtt, unsigned int factor, isc_stdtime_t now); static void shutdown_task(isc_task_t *task, isc_event_t *ev); static void log_quota(dns_adbentry_t *entry, const char *fmt, ...) ISC_FORMAT_PRINTF(2, 3); /* * Private flag(s) for adbfind objects. These are used internally and * are not meant to be seen or used by the caller; however, we use the * same flags field as for DNS_ADBFIND_xxx flags, so we must be careful * that there is no overlap between these values and those. To make it * easier, we will number these starting from the most significant bit * instead of the least significant. */ enum { FIND_EVENT_SENT = 1 << 31, FIND_EVENT_FREED = 1 << 30, }; #define FIND_EVENTSENT(h) (((h)->flags & FIND_EVENT_SENT) != 0) #define FIND_EVENTFREED(h) (((h)->flags & FIND_EVENT_FREED) != 0) /* * Private flag(s) for adbname objects. */ enum { NAME_IS_DEAD = 1 << 31, NAME_NEEDS_POKE = 1 << 30, }; #define NAME_DEAD(n) (((n)->flags & NAME_IS_DEAD) != 0) #define NAME_NEEDSPOKE(n) (((n)->flags & NAME_NEEDS_POKE) != 0) #define NAME_GLUEOK(n) (((n)->flags & DNS_ADBFIND_GLUEOK) != 0) #define NAME_HINTOK(n) (((n)->flags & DNS_ADBFIND_HINTOK) != 0) /* * Private flag(s) for adbentry objects. Note that these will also * be used for addrinfo flags, and in resolver.c we'll use the same * field for FCTX_ADDRINFO_xxx flags to store information about remote * servers, so we must be careful that there is no overlap between * these values and those. To make it easier, we will number these * starting from the most significant bit instead of the least * significant. */ enum { ENTRY_IS_DEAD = 1 << 31, }; /* * To the name, address classes are all that really exist. If it has a * V6 address it doesn't care if it came from a AAAA query. */ #define NAME_HAS_V4(n) (!ISC_LIST_EMPTY((n)->v4)) #define NAME_HAS_V6(n) (!ISC_LIST_EMPTY((n)->v6)) #define NAME_HAS_ADDRS(n) (NAME_HAS_V4(n) || NAME_HAS_V6(n)) /* * Fetches are broken out into A and AAAA types. In some cases, * however, it makes more sense to test for a particular class of fetches, * like V4 or V6 above. */ #define NAME_FETCH_A(n) ((n)->fetch_a != NULL) #define NAME_FETCH_AAAA(n) ((n)->fetch_aaaa != NULL) #define NAME_FETCH(n) (NAME_FETCH_A(n) || NAME_FETCH_AAAA(n)) /* * Find options and tests to see if there are addresses on the list. */ #define FIND_WANTEVENT(fn) (((fn)->options & DNS_ADBFIND_WANTEVENT) != 0) #define FIND_WANTEMPTYEVENT(fn) (((fn)->options & DNS_ADBFIND_EMPTYEVENT) != 0) #define FIND_AVOIDFETCHES(fn) (((fn)->options & DNS_ADBFIND_AVOIDFETCHES) != 0) #define FIND_STARTATZONE(fn) (((fn)->options & DNS_ADBFIND_STARTATZONE) != 0) #define FIND_HINTOK(fn) (((fn)->options & DNS_ADBFIND_HINTOK) != 0) #define FIND_GLUEOK(fn) (((fn)->options & DNS_ADBFIND_GLUEOK) != 0) #define FIND_HAS_ADDRS(fn) (!ISC_LIST_EMPTY((fn)->list)) #define FIND_RETURNLAME(fn) (((fn)->options & DNS_ADBFIND_RETURNLAME) != 0) #define FIND_NOFETCH(fn) (((fn)->options & DNS_ADBFIND_NOFETCH) != 0) /* * These are currently used on simple unsigned ints, so they are * not really associated with any particular type. */ #define WANT_INET(x) (((x) & DNS_ADBFIND_INET) != 0) #define WANT_INET6(x) (((x) & DNS_ADBFIND_INET6) != 0) #define EXPIRE_OK(exp, now) ((exp == INT_MAX) || (exp < now)) /* * Find out if the flags on a name (nf) indicate if it is a hint or * glue, and compare this to the appropriate bits set in o, to see if * this is ok. */ #define GLUE_OK(nf, o) (!NAME_GLUEOK(nf) || (((o) & DNS_ADBFIND_GLUEOK) != 0)) #define HINT_OK(nf, o) (!NAME_HINTOK(nf) || (((o) & DNS_ADBFIND_HINTOK) != 0)) #define GLUEHINT_OK(nf, o) (GLUE_OK(nf, o) || HINT_OK(nf, o)) #define STARTATZONE_MATCHES(nf, o) \ (((nf)->flags & DNS_ADBFIND_STARTATZONE) == \ ((o) & DNS_ADBFIND_STARTATZONE)) #define ENTER_LEVEL ISC_LOG_DEBUG(50) #define EXIT_LEVEL ENTER_LEVEL #define CLEAN_LEVEL ISC_LOG_DEBUG(100) #define DEF_LEVEL ISC_LOG_DEBUG(5) #define NCACHE_LEVEL ISC_LOG_DEBUG(20) #define NCACHE_RESULT(r) \ ((r) == DNS_R_NCACHENXDOMAIN || (r) == DNS_R_NCACHENXRRSET) #define AUTH_NX(r) ((r) == DNS_R_NXDOMAIN || (r) == DNS_R_NXRRSET) #define NXDOMAIN_RESULT(r) \ ((r) == DNS_R_NXDOMAIN || (r) == DNS_R_NCACHENXDOMAIN) #define NXRRSET_RESULT(r) \ ((r) == DNS_R_NCACHENXRRSET || (r) == DNS_R_NXRRSET || \ (r) == DNS_R_HINTNXRRSET) /* * Error state rankings. */ #define FIND_ERR_SUCCESS 0 /* highest rank */ #define FIND_ERR_CANCELED 1 #define FIND_ERR_FAILURE 2 #define FIND_ERR_NXDOMAIN 3 #define FIND_ERR_NXRRSET 4 #define FIND_ERR_UNEXPECTED 5 #define FIND_ERR_NOTFOUND 6 #define FIND_ERR_MAX 7 static const char *errnames[] = { "success", "canceled", "failure", "nxdomain", "nxrrset", "unexpected", "not_found" }; #define NEWERR(old, new) (ISC_MIN((old), (new))) static isc_result_t find_err_map[FIND_ERR_MAX] = { ISC_R_SUCCESS, ISC_R_CANCELED, ISC_R_FAILURE, DNS_R_NXDOMAIN, DNS_R_NXRRSET, ISC_R_UNEXPECTED, ISC_R_NOTFOUND /* not YET found */ }; static void DP(int level, const char *format, ...) ISC_FORMAT_PRINTF(2, 3); static void DP(int level, const char *format, ...) { va_list args; va_start(args, format); isc_log_vwrite(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_ADB, level, format, args); va_end(args); } /*% * Increment resolver-related statistics counters. */ static void inc_stats(dns_adb_t *adb, isc_statscounter_t counter) { if (adb->view->resstats != NULL) { isc_stats_increment(adb->view->resstats, counter); } } /*% * Set adb-related statistics counters. */ static void set_adbstat(dns_adb_t *adb, uint64_t val, isc_statscounter_t counter) { if (adb->view->adbstats != NULL) { isc_stats_set(adb->view->adbstats, val, counter); } } static void dec_adbstats(dns_adb_t *adb, isc_statscounter_t counter) { if (adb->view->adbstats != NULL) { isc_stats_decrement(adb->view->adbstats, counter); } } static void inc_adbstats(dns_adb_t *adb, isc_statscounter_t counter) { if (adb->view->adbstats != NULL) { isc_stats_increment(adb->view->adbstats, counter); } } static dns_ttl_t ttlclamp(dns_ttl_t ttl) { if (ttl < ADB_CACHE_MINIMUM) { ttl = ADB_CACHE_MINIMUM; } if (ttl > ADB_CACHE_MAXIMUM) { ttl = ADB_CACHE_MAXIMUM; } return (ttl); } /* * Hashing is most efficient if the number of buckets is prime. * The sequence below is the closest previous primes to 2^n and * 1.5 * 2^n, for values of n from 10 to 28. (The tables will * no longer grow beyond 2^28 entries.) */ static const unsigned nbuckets[] = { 1021, 1531, 2039, 3067, 4093, 6143, 8191, 12281, 16381, 24571, 32749, 49193, 65521, 98299, 131071, 199603, 262139, 393209, 524287, 768431, 1048573, 1572853, 2097143, 3145721, 4194301, 6291449, 8388593, 12582893, 16777213, 25165813, 33554393, 50331599, 67108859, 100663291, 134217689, 201326557, 268535431, 0 }; static void grow_entries(isc_task_t *task, isc_event_t *ev) { dns_adb_t *adb; dns_adbentry_t *e; dns_adbentrylist_t *newdeadentries = NULL; dns_adbentrylist_t *newentries = NULL; bool *newentry_sd = NULL; isc_mutex_t *newentrylocks = NULL; isc_result_t result; unsigned int *newentry_refcnt = NULL; unsigned int i, n, bucket; adb = ev->ev_arg; INSIST(DNS_ADB_VALID(adb)); isc_event_free(&ev); result = isc_task_beginexclusive(task); if (result != ISC_R_SUCCESS) { goto check_exit; } i = 0; while (nbuckets[i] != 0 && adb->nentries >= nbuckets[i]) { i++; } if (nbuckets[i] != 0) { n = nbuckets[i]; } else { goto done; } DP(ISC_LOG_INFO, "adb: grow_entries to %u starting", n); /* * Are we shutting down? */ for (i = 0; i < adb->nentries; i++) { if (adb->entry_sd[i]) { goto cleanup; /* * Grab all the resources we need. */ } } /* * Grab all the resources we need. */ newentries = isc_mem_get(adb->hmctx, sizeof(*newentries) * n); newdeadentries = isc_mem_get(adb->hmctx, sizeof(*newdeadentries) * n); newentrylocks = isc_mem_get(adb->hmctx, sizeof(*newentrylocks) * n); newentry_sd = isc_mem_get(adb->hmctx, sizeof(*newentry_sd) * n); newentry_refcnt = isc_mem_get(adb->hmctx, sizeof(*newentry_refcnt) * n); /* * Initialise the new resources. */ isc_mutexblock_init(newentrylocks, n); for (i = 0; i < n; i++) { ISC_LIST_INIT(newentries[i]); ISC_LIST_INIT(newdeadentries[i]); newentry_sd[i] = false; newentry_refcnt[i] = 0; adb->irefcnt++; } /* * Move entries to new arrays. */ for (i = 0; i < adb->nentries; i++) { e = ISC_LIST_HEAD(adb->entries[i]); while (e != NULL) { ISC_LIST_UNLINK(adb->entries[i], e, plink); bucket = isc_sockaddr_hash(&e->sockaddr, true) % n; e->lock_bucket = bucket; ISC_LIST_APPEND(newentries[bucket], e, plink); INSIST(adb->entry_refcnt[i] > 0); adb->entry_refcnt[i]--; newentry_refcnt[bucket]++; e = ISC_LIST_HEAD(adb->entries[i]); } e = ISC_LIST_HEAD(adb->deadentries[i]); while (e != NULL) { ISC_LIST_UNLINK(adb->deadentries[i], e, plink); bucket = isc_sockaddr_hash(&e->sockaddr, true) % n; e->lock_bucket = bucket; ISC_LIST_APPEND(newdeadentries[bucket], e, plink); INSIST(adb->entry_refcnt[i] > 0); adb->entry_refcnt[i]--; newentry_refcnt[bucket]++; e = ISC_LIST_HEAD(adb->deadentries[i]); } INSIST(adb->entry_refcnt[i] == 0); adb->irefcnt--; } /* * Cleanup old resources. */ isc_mutexblock_destroy(adb->entrylocks, adb->nentries); isc_mem_put(adb->hmctx, adb->entries, sizeof(*adb->entries) * adb->nentries); isc_mem_put(adb->hmctx, adb->deadentries, sizeof(*adb->deadentries) * adb->nentries); isc_mem_put(adb->hmctx, adb->entrylocks, sizeof(*adb->entrylocks) * adb->nentries); isc_mem_put(adb->hmctx, adb->entry_sd, sizeof(*adb->entry_sd) * adb->nentries); isc_mem_put(adb->hmctx, adb->entry_refcnt, sizeof(*adb->entry_refcnt) * adb->nentries); /* * Install new resources. */ adb->entries = newentries; adb->deadentries = newdeadentries; adb->entrylocks = newentrylocks; adb->entry_sd = newentry_sd; adb->entry_refcnt = newentry_refcnt; adb->nentries = n; set_adbstat(adb, adb->nentries, dns_adbstats_nentries); /* * Only on success do we set adb->growentries_sent to false. * This will prevent us being continuously being called on error. */ adb->growentries_sent = false; goto done; cleanup: if (newentries != NULL) { isc_mem_put(adb->mctx, newentries, sizeof(*newentries) * n); } if (newdeadentries != NULL) { isc_mem_put(adb->mctx, newdeadentries, sizeof(*newdeadentries) * n); } if (newentrylocks != NULL) { isc_mem_put(adb->mctx, newentrylocks, sizeof(*newentrylocks) * n); } if (newentry_sd != NULL) { isc_mem_put(adb->mctx, newentry_sd, sizeof(*newentry_sd) * n); } if (newentry_refcnt != NULL) { isc_mem_put(adb->mctx, newentry_refcnt, sizeof(*newentry_refcnt) * n); } done: isc_task_endexclusive(task); check_exit: LOCK(&adb->lock); if (dec_adb_irefcnt(adb)) { check_exit(adb); } UNLOCK(&adb->lock); DP(ISC_LOG_INFO, "adb: grow_entries finished"); } static void grow_names(isc_task_t *task, isc_event_t *ev) { dns_adb_t *adb; dns_adbname_t *name; dns_adbnamelist_t *newdeadnames = NULL; dns_adbnamelist_t *newnames = NULL; bool *newname_sd = NULL; isc_mutex_t *newnamelocks = NULL; isc_result_t result; unsigned int *newname_refcnt = NULL; unsigned int i, n; unsigned int bucket; adb = ev->ev_arg; INSIST(DNS_ADB_VALID(adb)); isc_event_free(&ev); result = isc_task_beginexclusive(task); if (result != ISC_R_SUCCESS) { goto check_exit; } i = 0; while (nbuckets[i] != 0 && adb->nnames >= nbuckets[i]) { i++; } if (nbuckets[i] != 0) { n = nbuckets[i]; } else { goto done; } DP(ISC_LOG_INFO, "adb: grow_names to %u starting", n); /* * Are we shutting down? */ for (i = 0; i < adb->nnames; i++) { if (adb->name_sd[i]) { goto cleanup; /* * Grab all the resources we need. */ } } /* * Grab all the resources we need. */ newnames = isc_mem_get(adb->hmctx, sizeof(*newnames) * n); newdeadnames = isc_mem_get(adb->hmctx, sizeof(*newdeadnames) * n); newnamelocks = isc_mem_get(adb->hmctx, sizeof(*newnamelocks) * n); newname_sd = isc_mem_get(adb->hmctx, sizeof(*newname_sd) * n); newname_refcnt = isc_mem_get(adb->hmctx, sizeof(*newname_refcnt) * n); /* * Initialise the new resources. */ isc_mutexblock_init(newnamelocks, n); for (i = 0; i < n; i++) { ISC_LIST_INIT(newnames[i]); ISC_LIST_INIT(newdeadnames[i]); newname_sd[i] = false; newname_refcnt[i] = 0; adb->irefcnt++; } /* * Move names to new arrays. */ for (i = 0; i < adb->nnames; i++) { name = ISC_LIST_HEAD(adb->names[i]); while (name != NULL) { ISC_LIST_UNLINK(adb->names[i], name, plink); bucket = dns_name_fullhash(&name->name, true) % n; name->lock_bucket = bucket; ISC_LIST_APPEND(newnames[bucket], name, plink); INSIST(adb->name_refcnt[i] > 0); adb->name_refcnt[i]--; newname_refcnt[bucket]++; name = ISC_LIST_HEAD(adb->names[i]); } name = ISC_LIST_HEAD(adb->deadnames[i]); while (name != NULL) { ISC_LIST_UNLINK(adb->deadnames[i], name, plink); bucket = dns_name_fullhash(&name->name, true) % n; name->lock_bucket = bucket; ISC_LIST_APPEND(newdeadnames[bucket], name, plink); INSIST(adb->name_refcnt[i] > 0); adb->name_refcnt[i]--; newname_refcnt[bucket]++; name = ISC_LIST_HEAD(adb->deadnames[i]); } INSIST(adb->name_refcnt[i] == 0); adb->irefcnt--; } /* * Cleanup old resources. */ isc_mutexblock_destroy(adb->namelocks, adb->nnames); isc_mem_put(adb->hmctx, adb->names, sizeof(*adb->names) * adb->nnames); isc_mem_put(adb->hmctx, adb->deadnames, sizeof(*adb->deadnames) * adb->nnames); isc_mem_put(adb->hmctx, adb->namelocks, sizeof(*adb->namelocks) * adb->nnames); isc_mem_put(adb->hmctx, adb->name_sd, sizeof(*adb->name_sd) * adb->nnames); isc_mem_put(adb->hmctx, adb->name_refcnt, sizeof(*adb->name_refcnt) * adb->nnames); /* * Install new resources. */ adb->names = newnames; adb->deadnames = newdeadnames; adb->namelocks = newnamelocks; adb->name_sd = newname_sd; adb->name_refcnt = newname_refcnt; adb->nnames = n; set_adbstat(adb, adb->nnames, dns_adbstats_nnames); /* * Only on success do we set adb->grownames_sent to false. * This will prevent us being continuously being called on error. */ adb->grownames_sent = false; goto done; cleanup: if (newnames != NULL) { isc_mem_put(adb->hmctx, newnames, sizeof(*newnames) * n); } if (newdeadnames != NULL) { isc_mem_put(adb->hmctx, newdeadnames, sizeof(*newdeadnames) * n); } if (newnamelocks != NULL) { isc_mem_put(adb->hmctx, newnamelocks, sizeof(*newnamelocks) * n); } if (newname_sd != NULL) { isc_mem_put(adb->hmctx, newname_sd, sizeof(*newname_sd) * n); } if (newname_refcnt != NULL) { isc_mem_put(adb->hmctx, newname_refcnt, sizeof(*newname_refcnt) * n); } done: isc_task_endexclusive(task); check_exit: LOCK(&adb->lock); if (dec_adb_irefcnt(adb)) { check_exit(adb); } UNLOCK(&adb->lock); DP(ISC_LOG_INFO, "adb: grow_names finished"); } /* * Requires the adbname bucket be locked and that no entry buckets be locked. * * This code handles A and AAAA rdatasets only. */ static isc_result_t import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset, isc_stdtime_t now) { isc_result_t result; dns_adb_t *adb = NULL; dns_adbnamehook_t *nh = NULL; dns_adbnamehook_t *anh = NULL; dns_rdata_t rdata = DNS_RDATA_INIT; struct in_addr ina; struct in6_addr in6a; isc_sockaddr_t sockaddr; dns_adbentry_t *foundentry = NULL; /* NO CLEAN UP! */ int addr_bucket; bool new_addresses_added; dns_rdatatype_t rdtype; dns_adbnamehooklist_t *hookhead = NULL; INSIST(DNS_ADBNAME_VALID(adbname)); adb = adbname->adb; INSIST(DNS_ADB_VALID(adb)); rdtype = rdataset->type; INSIST((rdtype == dns_rdatatype_a) || (rdtype == dns_rdatatype_aaaa)); addr_bucket = DNS_ADB_INVALIDBUCKET; new_addresses_added = false; result = dns_rdataset_first(rdataset); while (result == ISC_R_SUCCESS) { dns_rdata_reset(&rdata); dns_rdataset_current(rdataset, &rdata); if (rdtype == dns_rdatatype_a) { INSIST(rdata.length == 4); memmove(&ina.s_addr, rdata.data, 4); isc_sockaddr_fromin(&sockaddr, &ina, 0); hookhead = &adbname->v4; } else { INSIST(rdata.length == 16); memmove(in6a.s6_addr, rdata.data, 16); isc_sockaddr_fromin6(&sockaddr, &in6a, 0); hookhead = &adbname->v6; } INSIST(nh == NULL); nh = new_adbnamehook(adb, NULL); foundentry = find_entry_and_lock(adb, &sockaddr, &addr_bucket, now); if (foundentry == NULL) { dns_adbentry_t *entry; entry = new_adbentry(adb); entry->sockaddr = sockaddr; entry->refcnt = 1; entry->nh = 1; nh->entry = entry; link_entry(adb, addr_bucket, entry); } else { for (anh = ISC_LIST_HEAD(*hookhead); anh != NULL; anh = ISC_LIST_NEXT(anh, plink)) { if (anh->entry == foundentry) { break; } } if (anh == NULL) { foundentry->refcnt++; foundentry->nh++; nh->entry = foundentry; } else { free_adbnamehook(adb, &nh); } } new_addresses_added = true; if (nh != NULL) { ISC_LIST_APPEND(*hookhead, nh, plink); } nh = NULL; result = dns_rdataset_next(rdataset); } if (addr_bucket != DNS_ADB_INVALIDBUCKET) { UNLOCK(&adb->entrylocks[addr_bucket]); } if (rdataset->trust == dns_trust_glue || rdataset->trust == dns_trust_additional) { rdataset->ttl = ADB_CACHE_MINIMUM; } else if (rdataset->trust == dns_trust_ultimate) { rdataset->ttl = 0; } else { rdataset->ttl = ttlclamp(rdataset->ttl); } if (rdtype == dns_rdatatype_a) { DP(NCACHE_LEVEL, "expire_v4 set to MIN(%u,%u) import_rdataset", adbname->expire_v4, now + rdataset->ttl); adbname->expire_v4 = ISC_MIN( adbname->expire_v4, ISC_MIN(now + ADB_ENTRY_WINDOW, now + rdataset->ttl)); } else { DP(NCACHE_LEVEL, "expire_v6 set to MIN(%u,%u) import_rdataset", adbname->expire_v6, now + rdataset->ttl); adbname->expire_v6 = ISC_MIN( adbname->expire_v6, ISC_MIN(now + ADB_ENTRY_WINDOW, now + rdataset->ttl)); } if (new_addresses_added) { /* * Lie a little here. This is more or less so code that cares * can find out if any new information was added or not. */ return (ISC_R_SUCCESS); } return (result); } /* * Requires the name's bucket be locked. */ static bool kill_name(dns_adbname_t **n, isc_eventtype_t ev) { dns_adbname_t *name; bool result = false; bool result4, result6; int bucket; dns_adb_t *adb; INSIST(n != NULL); name = *n; *n = NULL; INSIST(DNS_ADBNAME_VALID(name)); adb = name->adb; INSIST(DNS_ADB_VALID(adb)); DP(DEF_LEVEL, "killing name %p", name); /* * If we're dead already, just check to see if we should go * away now or not. */ if (NAME_DEAD(name) && !NAME_FETCH(name)) { result = unlink_name(adb, name); free_adbname(adb, &name); if (result) { result = dec_adb_irefcnt(adb); } return (result); } /* * Clean up the name's various lists. These two are destructive * in that they will always empty the list. */ clean_finds_at_name(name, ev, DNS_ADBFIND_ADDRESSMASK); result4 = clean_namehooks(adb, &name->v4); result6 = clean_namehooks(adb, &name->v6); clean_target(adb, &name->target); result = (result4 || result6); /* * If fetches are running, cancel them. If none are running, we can * just kill the name here. */ if (!NAME_FETCH(name)) { INSIST(!result); result = unlink_name(adb, name); free_adbname(adb, &name); if (result) { result = dec_adb_irefcnt(adb); } } else { cancel_fetches_at_name(name); if (!NAME_DEAD(name)) { bucket = name->lock_bucket; ISC_LIST_UNLINK(adb->names[bucket], name, plink); ISC_LIST_APPEND(adb->deadnames[bucket], name, plink); name->flags |= NAME_IS_DEAD; } } return (result); } /* * Requires the name's bucket be locked and no entry buckets be locked. */ static bool check_expire_namehooks(dns_adbname_t *name, isc_stdtime_t now) { dns_adb_t *adb; bool result4 = false; bool result6 = false; INSIST(DNS_ADBNAME_VALID(name)); adb = name->adb; INSIST(DNS_ADB_VALID(adb)); /* * Check to see if we need to remove the v4 addresses */ if (!NAME_FETCH_A(name) && EXPIRE_OK(name->expire_v4, now)) { if (NAME_HAS_V4(name)) { DP(DEF_LEVEL, "expiring v4 for name %p", name); result4 = clean_namehooks(adb, &name->v4); name->partial_result &= ~DNS_ADBFIND_INET; } name->expire_v4 = INT_MAX; name->fetch_err = FIND_ERR_UNEXPECTED; } /* * Check to see if we need to remove the v6 addresses */ if (!NAME_FETCH_AAAA(name) && EXPIRE_OK(name->expire_v6, now)) { if (NAME_HAS_V6(name)) { DP(DEF_LEVEL, "expiring v6 for name %p", name); result6 = clean_namehooks(adb, &name->v6); name->partial_result &= ~DNS_ADBFIND_INET6; } name->expire_v6 = INT_MAX; name->fetch6_err = FIND_ERR_UNEXPECTED; } /* * Check to see if we need to remove the alias target. */ if (EXPIRE_OK(name->expire_target, now)) { clean_target(adb, &name->target); name->expire_target = INT_MAX; } return (result4 || result6); } /* * Requires the name's bucket be locked. */ static void link_name(dns_adb_t *adb, int bucket, dns_adbname_t *name) { INSIST(name->lock_bucket == DNS_ADB_INVALIDBUCKET); ISC_LIST_PREPEND(adb->names[bucket], name, plink); name->lock_bucket = bucket; adb->name_refcnt[bucket]++; } /* * Requires the name's bucket be locked. */ static bool unlink_name(dns_adb_t *adb, dns_adbname_t *name) { int bucket; bool result = false; bucket = name->lock_bucket; INSIST(bucket != DNS_ADB_INVALIDBUCKET); if (NAME_DEAD(name)) { ISC_LIST_UNLINK(adb->deadnames[bucket], name, plink); } else { ISC_LIST_UNLINK(adb->names[bucket], name, plink); } name->lock_bucket = DNS_ADB_INVALIDBUCKET; INSIST(adb->name_refcnt[bucket] > 0); adb->name_refcnt[bucket]--; if (adb->name_sd[bucket] && adb->name_refcnt[bucket] == 0) { result = true; } return (result); } /* * Requires the entry's bucket be locked. */ static void link_entry(dns_adb_t *adb, int bucket, dns_adbentry_t *entry) { int i; dns_adbentry_t *e; if (isc_mem_isovermem(adb->mctx)) { for (i = 0; i < 2; i++) { e = ISC_LIST_TAIL(adb->entries[bucket]); if (e == NULL) { break; } if (e->refcnt == 0) { unlink_entry(adb, e); free_adbentry(adb, &e); continue; } INSIST((e->flags & ENTRY_IS_DEAD) == 0); e->flags |= ENTRY_IS_DEAD; ISC_LIST_UNLINK(adb->entries[bucket], e, plink); ISC_LIST_PREPEND(adb->deadentries[bucket], e, plink); } } ISC_LIST_PREPEND(adb->entries[bucket], entry, plink); entry->lock_bucket = bucket; adb->entry_refcnt[bucket]++; } /* * Requires the entry's bucket be locked. */ static bool unlink_entry(dns_adb_t *adb, dns_adbentry_t *entry) { int bucket; bool result = false; bucket = entry->lock_bucket; INSIST(bucket != DNS_ADB_INVALIDBUCKET); if ((entry->flags & ENTRY_IS_DEAD) != 0) { ISC_LIST_UNLINK(adb->deadentries[bucket], entry, plink); } else { ISC_LIST_UNLINK(adb->entries[bucket], entry, plink); } entry->lock_bucket = DNS_ADB_INVALIDBUCKET; INSIST(adb->entry_refcnt[bucket] > 0); adb->entry_refcnt[bucket]--; if (adb->entry_sd[bucket] && adb->entry_refcnt[bucket] == 0) { result = true; } return (result); } static void violate_locking_hierarchy(isc_mutex_t *have, isc_mutex_t *want) { if (isc_mutex_trylock(want) != ISC_R_SUCCESS) { UNLOCK(have); LOCK(want); LOCK(have); } } /* * The ADB _MUST_ be locked before calling. Also, exit conditions must be * checked after calling this function. */ static bool shutdown_names(dns_adb_t *adb) { unsigned int bucket; bool result = false; dns_adbname_t *name; dns_adbname_t *next_name; for (bucket = 0; bucket < adb->nnames; bucket++) { LOCK(&adb->namelocks[bucket]); adb->name_sd[bucket] = true; name = ISC_LIST_HEAD(adb->names[bucket]); if (name == NULL) { /* * This bucket has no names. We must decrement the * irefcnt ourselves, since it will not be * automatically triggered by a name being unlinked. */ INSIST(!result); result = dec_adb_irefcnt(adb); } else { /* * Run through the list. For each name, clean up finds * found there, and cancel any fetches running. When * all the fetches are canceled, the name will destroy * itself. */ while (name != NULL) { next_name = ISC_LIST_NEXT(name, plink); INSIST(!result); result = kill_name(&name, DNS_EVENT_ADBSHUTDOWN); name = next_name; } } UNLOCK(&adb->namelocks[bucket]); } return (result); } /* * The ADB _MUST_ be locked before calling. Also, exit conditions must be * checked after calling this function. */ static bool shutdown_entries(dns_adb_t *adb) { unsigned int bucket; bool result = false; dns_adbentry_t *entry; dns_adbentry_t *next_entry; for (bucket = 0; bucket < adb->nentries; bucket++) { LOCK(&adb->entrylocks[bucket]); adb->entry_sd[bucket] = true; entry = ISC_LIST_HEAD(adb->entries[bucket]); if (adb->entry_refcnt[bucket] == 0) { /* * This bucket has no entries. We must decrement the * irefcnt ourselves, since it will not be * automatically triggered by an entry being unlinked. */ result = dec_adb_irefcnt(adb); } else { /* * Run through the list. Cleanup any entries not * associated with names, and which are not in use. */ while (entry != NULL) { next_entry = ISC_LIST_NEXT(entry, plink); if (entry->refcnt == 0 && entry->expires != 0) { result = unlink_entry(adb, entry); free_adbentry(adb, &entry); if (result) { result = dec_adb_irefcnt(adb); } } entry = next_entry; } } UNLOCK(&adb->entrylocks[bucket]); } return (result); } /* * Name bucket must be locked */ static void cancel_fetches_at_name(dns_adbname_t *name) { if (NAME_FETCH_A(name)) { dns_resolver_cancelfetch(name->fetch_a->fetch); } if (NAME_FETCH_AAAA(name)) { dns_resolver_cancelfetch(name->fetch_aaaa->fetch); } } /* * Assumes the name bucket is locked. */ static bool clean_namehooks(dns_adb_t *adb, dns_adbnamehooklist_t *namehooks) { dns_adbentry_t *entry; dns_adbnamehook_t *namehook; int addr_bucket; bool result = false; bool overmem = isc_mem_isovermem(adb->mctx); addr_bucket = DNS_ADB_INVALIDBUCKET; namehook = ISC_LIST_HEAD(*namehooks); while (namehook != NULL) { INSIST(DNS_ADBNAMEHOOK_VALID(namehook)); /* * Clean up the entry if needed. */ entry = namehook->entry; if (entry != NULL) { INSIST(DNS_ADBENTRY_VALID(entry)); if (addr_bucket != entry->lock_bucket) { if (addr_bucket != DNS_ADB_INVALIDBUCKET) { UNLOCK(&adb->entrylocks[addr_bucket]); } addr_bucket = entry->lock_bucket; INSIST(addr_bucket != DNS_ADB_INVALIDBUCKET); LOCK(&adb->entrylocks[addr_bucket]); } entry->nh--; result = dec_entry_refcnt(adb, overmem, entry, false, INT_MAX); } /* * Free the namehook */ namehook->entry = NULL; ISC_LIST_UNLINK(*namehooks, namehook, plink); free_adbnamehook(adb, &namehook); namehook = ISC_LIST_HEAD(*namehooks); } if (addr_bucket != DNS_ADB_INVALIDBUCKET) { UNLOCK(&adb->entrylocks[addr_bucket]); } return (result); } static void clean_target(dns_adb_t *adb, dns_name_t *target) { if (dns_name_countlabels(target) > 0) { dns_name_free(target, adb->mctx); dns_name_init(target, NULL); } } static isc_result_t set_target(dns_adb_t *adb, const dns_name_t *name, const dns_name_t *fname, dns_rdataset_t *rdataset, dns_name_t *target) { isc_result_t result; dns_namereln_t namereln; unsigned int nlabels; int order; dns_rdata_t rdata = DNS_RDATA_INIT; dns_fixedname_t fixed1, fixed2; dns_name_t *prefix, *new_target; REQUIRE(dns_name_countlabels(target) == 0); if (rdataset->type == dns_rdatatype_cname) { dns_rdata_cname_t cname; /* * Copy the CNAME's target into the target name. */ result = dns_rdataset_first(rdataset); if (result != ISC_R_SUCCESS) { return (result); } dns_rdataset_current(rdataset, &rdata); result = dns_rdata_tostruct(&rdata, &cname, NULL); if (result != ISC_R_SUCCESS) { return (result); } dns_name_dup(&cname.cname, adb->mctx, target); dns_rdata_freestruct(&cname); } else { dns_rdata_dname_t dname; INSIST(rdataset->type == dns_rdatatype_dname); namereln = dns_name_fullcompare(name, fname, &order, &nlabels); INSIST(namereln == dns_namereln_subdomain); /* * Get the target name of the DNAME. */ result = dns_rdataset_first(rdataset); if (result != ISC_R_SUCCESS) { return (result); } dns_rdataset_current(rdataset, &rdata); result = dns_rdata_tostruct(&rdata, &dname, NULL); if (result != ISC_R_SUCCESS) { return (result); } /* * Construct the new target name. */ prefix = dns_fixedname_initname(&fixed1); new_target = dns_fixedname_initname(&fixed2); dns_name_split(name, nlabels, prefix, NULL); result = dns_name_concatenate(prefix, &dname.dname, new_target, NULL); dns_rdata_freestruct(&dname); if (result != ISC_R_SUCCESS) { return (result); } dns_name_dup(new_target, adb->mctx, target); } return (ISC_R_SUCCESS); } /* * Assumes nothing is locked, since this is called by the client. */ static void event_free(isc_event_t *event) { dns_adbfind_t *find; INSIST(event != NULL); find = event->ev_destroy_arg; INSIST(DNS_ADBFIND_VALID(find)); LOCK(&find->lock); find->flags |= FIND_EVENT_FREED; event->ev_destroy_arg = NULL; UNLOCK(&find->lock); } /* * Assumes the name bucket is locked. */ static void clean_finds_at_name(dns_adbname_t *name, isc_eventtype_t evtype, unsigned int addrs) { isc_event_t *ev; isc_task_t *task; dns_adbfind_t *find; dns_adbfind_t *next_find; bool process; unsigned int wanted, notify; DP(ENTER_LEVEL, "ENTER clean_finds_at_name, name %p, evtype %08x, addrs %08x", name, evtype, addrs); find = ISC_LIST_HEAD(name->finds); while (find != NULL) { LOCK(&find->lock); next_find = ISC_LIST_NEXT(find, plink); process = false; wanted = find->flags & DNS_ADBFIND_ADDRESSMASK; notify = wanted & addrs; switch (evtype) { case DNS_EVENT_ADBMOREADDRESSES: DP(ISC_LOG_DEBUG(3), "DNS_EVENT_ADBMOREADDRESSES"); if ((notify) != 0) { find->flags &= ~addrs; process = true; } break; case DNS_EVENT_ADBNOMOREADDRESSES: DP(ISC_LOG_DEBUG(3), "DNS_EVENT_ADBNOMOREADDRESSES"); find->flags &= ~addrs; wanted = find->flags & DNS_ADBFIND_ADDRESSMASK; if (wanted == 0) { process = true; } break; default: find->flags &= ~addrs; process = true; } if (process) { DP(DEF_LEVEL, "cfan: processing find %p", find); /* * Unlink the find from the name, letting the caller * call dns_adb_destroyfind() on it to clean it up * later. */ ISC_LIST_UNLINK(name->finds, find, plink); find->adbname = NULL; find->name_bucket = DNS_ADB_INVALIDBUCKET; INSIST(!FIND_EVENTSENT(find)); ev = &find->event; task = ev->ev_sender; ev->ev_sender = find; find->result_v4 = find_err_map[name->fetch_err]; find->result_v6 = find_err_map[name->fetch6_err]; ev->ev_type = evtype; ev->ev_destroy = event_free; ev->ev_destroy_arg = find; DP(DEF_LEVEL, "sending event %p to task %p for find %p", ev, task, find); isc_task_sendanddetach(&task, (isc_event_t **)&ev); find->flags |= FIND_EVENT_SENT; } else { DP(DEF_LEVEL, "cfan: skipping find %p", find); } UNLOCK(&find->lock); find = next_find; } DP(ENTER_LEVEL, "EXIT clean_finds_at_name, name %p", name); } static void check_exit(dns_adb_t *adb) { isc_event_t *event; /* * The caller must be holding the adb lock. */ if (atomic_load(&adb->shutting_down)) { /* * If there aren't any external references either, we're * done. Send the control event to initiate shutdown. */ INSIST(!adb->cevent_out); /* Sanity check. */ ISC_EVENT_INIT(&adb->cevent, sizeof(adb->cevent), 0, NULL, DNS_EVENT_ADBCONTROL, shutdown_task, adb, adb, NULL, NULL); event = &adb->cevent; isc_task_send(adb->task, &event); adb->cevent_out = true; } } static bool dec_adb_irefcnt(dns_adb_t *adb) { isc_event_t *event; isc_task_t *etask; bool result = false; LOCK(&adb->reflock); INSIST(adb->irefcnt > 0); adb->irefcnt--; if (adb->irefcnt == 0) { event = ISC_LIST_HEAD(adb->whenshutdown); while (event != NULL) { ISC_LIST_UNLINK(adb->whenshutdown, event, ev_link); etask = event->ev_sender; event->ev_sender = adb; isc_task_sendanddetach(&etask, &event); event = ISC_LIST_HEAD(adb->whenshutdown); } } if (adb->irefcnt == 0 && adb->erefcnt == 0) { result = true; } UNLOCK(&adb->reflock); return (result); } static void inc_adb_irefcnt(dns_adb_t *adb) { LOCK(&adb->reflock); adb->irefcnt++; UNLOCK(&adb->reflock); } static void inc_adb_erefcnt(dns_adb_t *adb) { LOCK(&adb->reflock); adb->erefcnt++; UNLOCK(&adb->reflock); } static void inc_entry_refcnt(dns_adb_t *adb, dns_adbentry_t *entry, bool lock) { int bucket; bucket = entry->lock_bucket; if (lock) { LOCK(&adb->entrylocks[bucket]); } entry->refcnt++; if (lock) { UNLOCK(&adb->entrylocks[bucket]); } } static bool dec_entry_refcnt(dns_adb_t *adb, bool overmem, dns_adbentry_t *entry, bool lock, isc_stdtime_t now) { int bucket; bool destroy_entry = false; bool result = false; bucket = entry->lock_bucket; if (lock) { LOCK(&adb->entrylocks[bucket]); } INSIST(entry->refcnt > 0); entry->refcnt--; if (entry->refcnt == 0 && (adb->entry_sd[bucket] || entry->expires == 0 || (overmem && entry->expires + ADB_CACHE_MINIMUM < now) || (entry->flags & ENTRY_IS_DEAD) != 0)) { destroy_entry = true; result = unlink_entry(adb, entry); } if (lock) { UNLOCK(&adb->entrylocks[bucket]); } if (!destroy_entry) { return (result); } entry->lock_bucket = DNS_ADB_INVALIDBUCKET; free_adbentry(adb, &entry); if (result) { result = dec_adb_irefcnt(adb); } return (result); } static dns_adbname_t * new_adbname(dns_adb_t *adb, const dns_name_t *dnsname) { dns_adbname_t *name; name = isc_mem_get(adb->mctx, sizeof(*name)); dns_name_init(&name->name, NULL); dns_name_dup(dnsname, adb->mctx, &name->name); dns_name_init(&name->target, NULL); name->magic = DNS_ADBNAME_MAGIC; name->adb = adb; name->partial_result = 0; name->flags = 0; name->expire_v4 = INT_MAX; name->expire_v6 = INT_MAX; name->expire_target = INT_MAX; name->chains = 0; name->lock_bucket = DNS_ADB_INVALIDBUCKET; ISC_LIST_INIT(name->v4); ISC_LIST_INIT(name->v6); name->fetch_a = NULL; name->fetch_aaaa = NULL; name->fetch_err = FIND_ERR_UNEXPECTED; name->fetch6_err = FIND_ERR_UNEXPECTED; ISC_LIST_INIT(name->finds); ISC_LINK_INIT(name, plink); LOCK(&adb->namescntlock); adb->namescnt++; inc_adbstats(adb, dns_adbstats_namescnt); if (!adb->grownames_sent && adb->excl != NULL && adb->namescnt > (adb->nnames * 8)) { isc_event_t *event = &adb->grownames; inc_adb_irefcnt(adb); isc_task_send(adb->excl, &event); adb->grownames_sent = true; } UNLOCK(&adb->namescntlock); return (name); } static void free_adbname(dns_adb_t *adb, dns_adbname_t **name) { dns_adbname_t *n; INSIST(name != NULL && DNS_ADBNAME_VALID(*name)); n = *name; *name = NULL; INSIST(!NAME_HAS_V4(n)); INSIST(!NAME_HAS_V6(n)); INSIST(!NAME_FETCH(n)); INSIST(ISC_LIST_EMPTY(n->finds)); INSIST(!ISC_LINK_LINKED(n, plink)); INSIST(n->lock_bucket == DNS_ADB_INVALIDBUCKET); INSIST(n->adb == adb); n->magic = 0; dns_name_free(&n->name, adb->mctx); isc_mem_put(adb->mctx, n, sizeof(*n)); LOCK(&adb->namescntlock); adb->namescnt--; dec_adbstats(adb, dns_adbstats_namescnt); UNLOCK(&adb->namescntlock); } static dns_adbnamehook_t * new_adbnamehook(dns_adb_t *adb, dns_adbentry_t *entry) { dns_adbnamehook_t *nh; nh = isc_mem_get(adb->mctx, sizeof(*nh)); isc_refcount_increment0(&adb->nhrefcnt); nh->magic = DNS_ADBNAMEHOOK_MAGIC; nh->entry = entry; ISC_LINK_INIT(nh, plink); return (nh); } static void free_adbnamehook(dns_adb_t *adb, dns_adbnamehook_t **namehook) { dns_adbnamehook_t *nh; INSIST(namehook != NULL && DNS_ADBNAMEHOOK_VALID(*namehook)); nh = *namehook; *namehook = NULL; INSIST(nh->entry == NULL); INSIST(!ISC_LINK_LINKED(nh, plink)); nh->magic = 0; isc_refcount_decrement(&adb->nhrefcnt); isc_mem_put(adb->mctx, nh, sizeof(*nh)); } static dns_adblameinfo_t * new_adblameinfo(dns_adb_t *adb, const dns_name_t *qname, dns_rdatatype_t qtype) { dns_adblameinfo_t *li; li = isc_mem_get(adb->mctx, sizeof(*li)); dns_name_init(&li->qname, NULL); dns_name_dup(qname, adb->mctx, &li->qname); li->magic = DNS_ADBLAMEINFO_MAGIC; li->lame_timer = 0; li->qtype = qtype; ISC_LINK_INIT(li, plink); return (li); } static void free_adblameinfo(dns_adb_t *adb, dns_adblameinfo_t **lameinfo) { dns_adblameinfo_t *li; INSIST(lameinfo != NULL && DNS_ADBLAMEINFO_VALID(*lameinfo)); li = *lameinfo; *lameinfo = NULL; INSIST(!ISC_LINK_LINKED(li, plink)); dns_name_free(&li->qname, adb->mctx); li->magic = 0; isc_mem_put(adb->mctx, li, sizeof(*li)); } static dns_adbentry_t * new_adbentry(dns_adb_t *adb) { dns_adbentry_t *e; e = isc_mem_get(adb->mctx, sizeof(*e)); e->magic = DNS_ADBENTRY_MAGIC; e->lock_bucket = DNS_ADB_INVALIDBUCKET; e->refcnt = 0; e->nh = 0; e->flags = 0; e->udpsize = 0; e->edns = 0; e->ednsto = 0; e->completed = 0; e->timeouts = 0; e->plain = 0; e->plainto = 0; e->cookie = NULL; e->cookielen = 0; e->srtt = (isc_random_uniform(0x1f)) + 1; e->lastage = 0; e->expires = 0; atomic_init(&e->active, 0); e->mode = 0; atomic_init(&e->quota, adb->quota); e->atr = 0.0; ISC_LIST_INIT(e->lameinfo); ISC_LINK_INIT(e, plink); LOCK(&adb->entriescntlock); adb->entriescnt++; inc_adbstats(adb, dns_adbstats_entriescnt); if (!adb->growentries_sent && adb->excl != NULL && adb->entriescnt > (adb->nentries * 8)) { isc_event_t *event = &adb->growentries; inc_adb_irefcnt(adb); isc_task_send(adb->excl, &event); adb->growentries_sent = true; } UNLOCK(&adb->entriescntlock); return (e); } static void free_adbentry(dns_adb_t *adb, dns_adbentry_t **entry) { dns_adbentry_t *e; dns_adblameinfo_t *li; uint_fast32_t active; INSIST(entry != NULL && DNS_ADBENTRY_VALID(*entry)); e = *entry; *entry = NULL; active = atomic_load_acquire(&e->active); INSIST(active == 0); INSIST(e->lock_bucket == DNS_ADB_INVALIDBUCKET); INSIST(e->refcnt == 0); INSIST(!ISC_LINK_LINKED(e, plink)); e->magic = 0; if (e->cookie != NULL) { isc_mem_put(adb->mctx, e->cookie, e->cookielen); } li = ISC_LIST_HEAD(e->lameinfo); while (li != NULL) { ISC_LIST_UNLINK(e->lameinfo, li, plink); free_adblameinfo(adb, &li); li = ISC_LIST_HEAD(e->lameinfo); } isc_mem_put(adb->mctx, e, sizeof(*e)); LOCK(&adb->entriescntlock); adb->entriescnt--; dec_adbstats(adb, dns_adbstats_entriescnt); UNLOCK(&adb->entriescntlock); } static dns_adbfind_t * new_adbfind(dns_adb_t *adb) { dns_adbfind_t *h; h = isc_mem_get(adb->mctx, sizeof(*h)); isc_refcount_increment0(&adb->ahrefcnt); /* * Public members. */ h->magic = 0; h->adb = adb; h->partial_result = 0; h->options = 0; h->flags = 0; h->result_v4 = ISC_R_UNEXPECTED; h->result_v6 = ISC_R_UNEXPECTED; ISC_LINK_INIT(h, publink); ISC_LINK_INIT(h, plink); ISC_LIST_INIT(h->list); h->adbname = NULL; h->name_bucket = DNS_ADB_INVALIDBUCKET; /* * private members */ isc_mutex_init(&h->lock); ISC_EVENT_INIT(&h->event, sizeof(isc_event_t), 0, 0, 0, NULL, NULL, NULL, NULL, h); inc_adb_irefcnt(adb); h->magic = DNS_ADBFIND_MAGIC; return (h); } static dns_adbfetch_t * new_adbfetch(dns_adb_t *adb) { dns_adbfetch_t *f; f = isc_mem_get(adb->mctx, sizeof(*f)); f->magic = 0; f->fetch = NULL; dns_rdataset_init(&f->rdataset); f->magic = DNS_ADBFETCH_MAGIC; return (f); } static void free_adbfetch(dns_adb_t *adb, dns_adbfetch_t **fetch) { dns_adbfetch_t *f; INSIST(fetch != NULL && DNS_ADBFETCH_VALID(*fetch)); f = *fetch; *fetch = NULL; f->magic = 0; if (dns_rdataset_isassociated(&f->rdataset)) { dns_rdataset_disassociate(&f->rdataset); } isc_mem_put(adb->mctx, f, sizeof(*f)); } static bool free_adbfind(dns_adb_t *adb, dns_adbfind_t **findp) { dns_adbfind_t *find; INSIST(findp != NULL && DNS_ADBFIND_VALID(*findp)); find = *findp; *findp = NULL; INSIST(!FIND_HAS_ADDRS(find)); INSIST(!ISC_LINK_LINKED(find, publink)); INSIST(!ISC_LINK_LINKED(find, plink)); INSIST(find->name_bucket == DNS_ADB_INVALIDBUCKET); INSIST(find->adbname == NULL); find->magic = 0; isc_mutex_destroy(&find->lock); isc_refcount_decrement(&adb->ahrefcnt); isc_mem_put(adb->mctx, find, sizeof(*find)); return (dec_adb_irefcnt(adb)); } /* * Copy bits from the entry into the newly allocated addrinfo. The entry * must be locked, and the reference count must be bumped up by one * if this function returns a valid pointer. */ static dns_adbaddrinfo_t * new_adbaddrinfo(dns_adb_t *adb, dns_adbentry_t *entry, in_port_t port) { dns_adbaddrinfo_t *ai; ai = isc_mem_get(adb->mctx, sizeof(*ai)); ai->magic = DNS_ADBADDRINFO_MAGIC; ai->sockaddr = entry->sockaddr; isc_sockaddr_setport(&ai->sockaddr, port); ai->srtt = entry->srtt; ai->flags = entry->flags; ai->entry = entry; ISC_LINK_INIT(ai, publink); return (ai); } static void free_adbaddrinfo(dns_adb_t *adb, dns_adbaddrinfo_t **ainfo) { dns_adbaddrinfo_t *ai; INSIST(ainfo != NULL && DNS_ADBADDRINFO_VALID(*ainfo)); ai = *ainfo; *ainfo = NULL; INSIST(ai->entry == NULL); INSIST(!ISC_LINK_LINKED(ai, publink)); ai->magic = 0; isc_mem_put(adb->mctx, ai, sizeof(*ai)); } /* * Search for the name. NOTE: The bucket is kept locked on both * success and failure, so it must always be unlocked by the caller! * * On the first call to this function, *bucketp must be set to * DNS_ADB_INVALIDBUCKET. */ static dns_adbname_t * find_name_and_lock(dns_adb_t *adb, const dns_name_t *name, unsigned int options, int *bucketp) { dns_adbname_t *adbname; int bucket; bucket = dns_name_fullhash(name, false) % adb->nnames; if (*bucketp == DNS_ADB_INVALIDBUCKET) { LOCK(&adb->namelocks[bucket]); *bucketp = bucket; } else if (*bucketp != bucket) { UNLOCK(&adb->namelocks[*bucketp]); LOCK(&adb->namelocks[bucket]); *bucketp = bucket; } adbname = ISC_LIST_HEAD(adb->names[bucket]); while (adbname != NULL) { if (!NAME_DEAD(adbname)) { if (dns_name_equal(name, &adbname->name) && GLUEHINT_OK(adbname, options) && STARTATZONE_MATCHES(adbname, options)) { return (adbname); } } adbname = ISC_LIST_NEXT(adbname, plink); } return (NULL); } /* * Search for the address. NOTE: The bucket is kept locked on both * success and failure, so it must always be unlocked by the caller. * * On the first call to this function, *bucketp must be set to * DNS_ADB_INVALIDBUCKET. This will cause a lock to occur. On * later calls (within the same "lock path") it can be left alone, so * if this function is called multiple times locking is only done if * the bucket changes. */ static dns_adbentry_t * find_entry_and_lock(dns_adb_t *adb, const isc_sockaddr_t *addr, int *bucketp, isc_stdtime_t now) { dns_adbentry_t *entry, *entry_next; int bucket; bucket = isc_sockaddr_hash(addr, true) % adb->nentries; if (*bucketp == DNS_ADB_INVALIDBUCKET) { LOCK(&adb->entrylocks[bucket]); *bucketp = bucket; } else if (*bucketp != bucket) { UNLOCK(&adb->entrylocks[*bucketp]); LOCK(&adb->entrylocks[bucket]); *bucketp = bucket; } /* Search the list, while cleaning up expired entries. */ for (entry = ISC_LIST_HEAD(adb->entries[bucket]); entry != NULL; entry = entry_next) { entry_next = ISC_LIST_NEXT(entry, plink); (void)check_expire_entry(adb, &entry, now); if (entry != NULL && (entry->expires == 0 || entry->expires > now) && isc_sockaddr_equal(addr, &entry->sockaddr)) { ISC_LIST_UNLINK(adb->entries[bucket], entry, plink); ISC_LIST_PREPEND(adb->entries[bucket], entry, plink); return (entry); } } return (NULL); } /* * Entry bucket MUST be locked! */ static bool entry_is_lame(dns_adb_t *adb, dns_adbentry_t *entry, const dns_name_t *qname, dns_rdatatype_t qtype, isc_stdtime_t now) { dns_adblameinfo_t *li, *next_li; bool is_bad; is_bad = false; li = ISC_LIST_HEAD(entry->lameinfo); if (li == NULL) { return (false); } while (li != NULL) { next_li = ISC_LIST_NEXT(li, plink); /* * Has the entry expired? */ if (li->lame_timer < now) { ISC_LIST_UNLINK(entry->lameinfo, li, plink); free_adblameinfo(adb, &li); } /* * Order tests from least to most expensive. * * We do not break out of the main loop here as * we use the loop for house keeping. */ if (li != NULL && !is_bad && li->qtype == qtype && dns_name_equal(qname, &li->qname)) { is_bad = true; } li = next_li; } return (is_bad); } static void log_quota(dns_adbentry_t *entry, const char *fmt, ...) { va_list ap; char msgbuf[2048]; char addrbuf[ISC_NETADDR_FORMATSIZE]; isc_netaddr_t netaddr; va_start(ap, fmt); vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); va_end(ap); isc_netaddr_fromsockaddr(&netaddr, &entry->sockaddr); isc_netaddr_format(&netaddr, addrbuf, sizeof(addrbuf)); isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_ADB, ISC_LOG_INFO, "adb: quota %s (%" PRIuFAST32 "/%" PRIuFAST32 "): %s", addrbuf, atomic_load_relaxed(&entry->active), atomic_load_relaxed(&entry->quota), msgbuf); } static void copy_namehook_lists(dns_adb_t *adb, dns_adbfind_t *find, const dns_name_t *qname, dns_rdatatype_t qtype, dns_adbname_t *name, isc_stdtime_t now) { dns_adbnamehook_t *namehook; dns_adbaddrinfo_t *addrinfo; dns_adbentry_t *entry; int bucket; bucket = DNS_ADB_INVALIDBUCKET; if ((find->options & DNS_ADBFIND_INET) != 0) { namehook = ISC_LIST_HEAD(name->v4); while (namehook != NULL) { entry = namehook->entry; bucket = entry->lock_bucket; INSIST(bucket != DNS_ADB_INVALIDBUCKET); LOCK(&adb->entrylocks[bucket]); if (dns_adbentry_overquota(entry)) { find->options |= (DNS_ADBFIND_LAMEPRUNED | DNS_ADBFIND_OVERQUOTA); goto nextv4; } if (!FIND_RETURNLAME(find) && entry_is_lame(adb, entry, qname, qtype, now)) { find->options |= DNS_ADBFIND_LAMEPRUNED; goto nextv4; } addrinfo = new_adbaddrinfo(adb, entry, find->port); /* * Found a valid entry. Add it to the find's list. */ inc_entry_refcnt(adb, entry, false); ISC_LIST_APPEND(find->list, addrinfo, publink); addrinfo = NULL; nextv4: UNLOCK(&adb->entrylocks[bucket]); bucket = DNS_ADB_INVALIDBUCKET; namehook = ISC_LIST_NEXT(namehook, plink); } } if ((find->options & DNS_ADBFIND_INET6) != 0) { namehook = ISC_LIST_HEAD(name->v6); while (namehook != NULL) { entry = namehook->entry; bucket = entry->lock_bucket; INSIST(bucket != DNS_ADB_INVALIDBUCKET); LOCK(&adb->entrylocks[bucket]); if (dns_adbentry_overquota(entry)) { find->options |= (DNS_ADBFIND_LAMEPRUNED | DNS_ADBFIND_OVERQUOTA); goto nextv6; } if (!FIND_RETURNLAME(find) && entry_is_lame(adb, entry, qname, qtype, now)) { find->options |= DNS_ADBFIND_LAMEPRUNED; goto nextv6; } addrinfo = new_adbaddrinfo(adb, entry, find->port); /* * Found a valid entry. Add it to the find's list. */ inc_entry_refcnt(adb, entry, false); ISC_LIST_APPEND(find->list, addrinfo, publink); addrinfo = NULL; nextv6: UNLOCK(&adb->entrylocks[bucket]); bucket = DNS_ADB_INVALIDBUCKET; namehook = ISC_LIST_NEXT(namehook, plink); } } if (bucket != DNS_ADB_INVALIDBUCKET) { UNLOCK(&adb->entrylocks[bucket]); } } static void shutdown_task(isc_task_t *task, isc_event_t *ev) { dns_adb_t *adb; UNUSED(task); adb = ev->ev_arg; INSIST(DNS_ADB_VALID(adb)); isc_event_free(&ev); /* * Wait for lock around check_exit() call to be released. */ LOCK(&adb->lock); UNLOCK(&adb->lock); destroy(adb); } /* * Name bucket must be locked; adb may be locked; no other locks held. */ static bool check_expire_name(dns_adbname_t **namep, isc_stdtime_t now) { dns_adbname_t *name; bool result = false; INSIST(namep != NULL && DNS_ADBNAME_VALID(*namep)); name = *namep; if (NAME_HAS_V4(name) || NAME_HAS_V6(name)) { return (result); } if (NAME_FETCH(name)) { return (result); } if (!EXPIRE_OK(name->expire_v4, now)) { return (result); } if (!EXPIRE_OK(name->expire_v6, now)) { return (result); } if (!EXPIRE_OK(name->expire_target, now)) { return (result); } /* * The name is empty. Delete it. */ *namep = NULL; result = kill_name(&name, DNS_EVENT_ADBEXPIRED); /* * Our caller, or one of its callers, will be calling check_exit() at * some point, so we don't need to do it here. */ return (result); } /*% * Examine the tail entry of the LRU list to see if it expires or is stale * (unused for some period); if so, the name entry will be freed. If the ADB * is in the overmem condition, the tail and the next to tail entries * will be unconditionally removed (unless they have an outstanding fetch). * We don't care about a race on 'overmem' at the risk of causing some * collateral damage or a small delay in starting cleanup, so we don't bother * to lock ADB (if it's not locked). * * Name bucket must be locked; adb may be locked; no other locks held. */ static void check_stale_name(dns_adb_t *adb, int bucket, isc_stdtime_t now) { int victims, max_victims; dns_adbname_t *victim, *next_victim; bool overmem = isc_mem_isovermem(adb->mctx); int scans = 0; INSIST(bucket != DNS_ADB_INVALIDBUCKET); max_victims = overmem ? 2 : 1; /* * We limit the number of scanned entries to 10 (arbitrary choice) * in order to avoid examining too many entries when there are many * tail entries that have fetches (this should be rare, but could * happen). */ victim = ISC_LIST_TAIL(adb->names[bucket]); for (victims = 0; victim != NULL && victims < max_victims && scans < 10; victim = next_victim) { INSIST(!NAME_DEAD(victim)); scans++; next_victim = ISC_LIST_PREV(victim, plink); (void)check_expire_name(&victim, now); if (victim == NULL) { victims++; goto next; } /* * Make sure that we are not purging ADB names that has been * just created. */ if (victim->last_used + ADB_CACHE_MINIMUM >= now) { break; } if (!NAME_FETCH(victim) && (overmem || victim->last_used + ADB_STALE_MARGIN <= now)) { RUNTIME_CHECK( !kill_name(&victim, DNS_EVENT_ADBCANCELED)); victims++; } next: if (!overmem) { break; } } } /* * Entry bucket must be locked; adb may be locked; no other locks held. */ static bool check_expire_entry(dns_adb_t *adb, dns_adbentry_t **entryp, isc_stdtime_t now) { dns_adbentry_t *entry; bool result = false; INSIST(entryp != NULL && DNS_ADBENTRY_VALID(*entryp)); entry = *entryp; if (entry->refcnt != 0) { return (result); } if (entry->expires == 0 || entry->expires > now) { return (result); } /* * The entry is not in use. Delete it. */ *entryp = NULL; DP(DEF_LEVEL, "killing entry %p", entry); INSIST(ISC_LINK_LINKED(entry, plink)); result = unlink_entry(adb, entry); free_adbentry(adb, &entry); if (result) { dec_adb_irefcnt(adb); } return (result); } /* * ADB must be locked, and no other locks held. */ static bool cleanup_names(dns_adb_t *adb, int bucket, isc_stdtime_t now) { dns_adbname_t *name; dns_adbname_t *next_name; bool result = false; DP(CLEAN_LEVEL, "cleaning name bucket %d", bucket); LOCK(&adb->namelocks[bucket]); if (adb->name_sd[bucket]) { UNLOCK(&adb->namelocks[bucket]); return (result); } name = ISC_LIST_HEAD(adb->names[bucket]); while (name != NULL) { next_name = ISC_LIST_NEXT(name, plink); INSIST(!result); result = check_expire_namehooks(name, now); if (!result) { result = check_expire_name(&name, now); } name = next_name; } UNLOCK(&adb->namelocks[bucket]); return (result); } /* * ADB must be locked, and no other locks held. */ static bool cleanup_entries(dns_adb_t *adb, int bucket, isc_stdtime_t now) { dns_adbentry_t *entry, *next_entry; bool result = false; DP(CLEAN_LEVEL, "cleaning entry bucket %d", bucket); LOCK(&adb->entrylocks[bucket]); entry = ISC_LIST_HEAD(adb->entries[bucket]); while (entry != NULL) { next_entry = ISC_LIST_NEXT(entry, plink); INSIST(!result); result = check_expire_entry(adb, &entry, now); entry = next_entry; } UNLOCK(&adb->entrylocks[bucket]); return (result); } static void destroy(dns_adb_t *adb) { adb->magic = 0; isc_task_detach(&adb->task); if (adb->excl != NULL) { isc_task_detach(&adb->excl); } isc_mutexblock_destroy(adb->entrylocks, adb->nentries); isc_mem_put(adb->hmctx, adb->entries, sizeof(*adb->entries) * adb->nentries); isc_mem_put(adb->hmctx, adb->deadentries, sizeof(*adb->deadentries) * adb->nentries); isc_mem_put(adb->hmctx, adb->entrylocks, sizeof(*adb->entrylocks) * adb->nentries); isc_mem_put(adb->hmctx, adb->entry_sd, sizeof(*adb->entry_sd) * adb->nentries); isc_mem_put(adb->hmctx, adb->entry_refcnt, sizeof(*adb->entry_refcnt) * adb->nentries); isc_mutexblock_destroy(adb->namelocks, adb->nnames); isc_mem_put(adb->hmctx, adb->names, sizeof(*adb->names) * adb->nnames); isc_mem_put(adb->hmctx, adb->deadnames, sizeof(*adb->deadnames) * adb->nnames); isc_mem_put(adb->hmctx, adb->namelocks, sizeof(*adb->namelocks) * adb->nnames); isc_mem_put(adb->hmctx, adb->name_sd, sizeof(*adb->name_sd) * adb->nnames); isc_mem_put(adb->hmctx, adb->name_refcnt, sizeof(*adb->name_refcnt) * adb->nnames); isc_mem_destroy(&adb->hmctx); isc_mutex_destroy(&adb->reflock); isc_mutex_destroy(&adb->lock); isc_mutex_destroy(&adb->overmemlock); isc_mutex_destroy(&adb->entriescntlock); isc_mutex_destroy(&adb->namescntlock); isc_mem_putanddetach(&adb->mctx, adb, sizeof(dns_adb_t)); } /* * Public functions. */ isc_result_t dns_adb_create(isc_mem_t *mem, dns_view_t *view, isc_timermgr_t *timermgr, isc_taskmgr_t *taskmgr, dns_adb_t **newadb) { dns_adb_t *adb; isc_result_t result; unsigned int i; REQUIRE(mem != NULL); REQUIRE(view != NULL); REQUIRE(timermgr != NULL); /* this is actually unused */ REQUIRE(taskmgr != NULL); REQUIRE(newadb != NULL && *newadb == NULL); UNUSED(timermgr); adb = isc_mem_get(mem, sizeof(dns_adb_t)); /* * Initialize things here that cannot fail, and especially things * that must be NULL for the error return to work properly. */ adb->magic = 0; adb->erefcnt = 1; adb->irefcnt = 0; adb->task = NULL; adb->excl = NULL; adb->mctx = NULL; adb->hmctx = NULL; adb->view = view; adb->taskmgr = taskmgr; adb->next_cleanbucket = 0; ISC_EVENT_INIT(&adb->cevent, sizeof(adb->cevent), 0, NULL, 0, NULL, NULL, NULL, NULL, NULL); adb->cevent_out = false; atomic_init(&adb->shutting_down, false); ISC_LIST_INIT(adb->whenshutdown); adb->nentries = nbuckets[0]; adb->entriescnt = 0; adb->entries = NULL; adb->deadentries = NULL; adb->entry_sd = NULL; adb->entry_refcnt = NULL; adb->entrylocks = NULL; ISC_EVENT_INIT(&adb->growentries, sizeof(adb->growentries), 0, NULL, DNS_EVENT_ADBGROWENTRIES, grow_entries, adb, adb, NULL, NULL); adb->growentries_sent = false; adb->quota = 0; adb->atr_freq = 0; adb->atr_low = 0.0; adb->atr_high = 0.0; adb->atr_discount = 0.0; adb->nnames = nbuckets[0]; adb->namescnt = 0; adb->names = NULL; adb->deadnames = NULL; adb->name_sd = NULL; adb->name_refcnt = NULL; adb->namelocks = NULL; ISC_EVENT_INIT(&adb->grownames, sizeof(adb->grownames), 0, NULL, DNS_EVENT_ADBGROWNAMES, grow_names, adb, adb, NULL, NULL); adb->grownames_sent = false; result = isc_taskmgr_excltask(adb->taskmgr, &adb->excl); if (result != ISC_R_SUCCESS) { DP(DEF_LEVEL, "adb: task-exclusive mode unavailable, " "initializing table sizes to %u\n", nbuckets[11]); adb->nentries = nbuckets[11]; adb->nnames = nbuckets[11]; } isc_mem_attach(mem, &adb->mctx); isc_mutex_init(&adb->lock); isc_mutex_init(&adb->reflock); isc_mutex_init(&adb->overmemlock); isc_mutex_init(&adb->entriescntlock); isc_mutex_init(&adb->namescntlock); isc_mem_create(&adb->hmctx); isc_mem_setname(adb->hmctx, "ADB_hashmaps"); #define ALLOCENTRY(adb, el) \ do { \ (adb)->el = isc_mem_get((adb)->hmctx, \ sizeof(*(adb)->el) * (adb)->nentries); \ } while (0) ALLOCENTRY(adb, entries); ALLOCENTRY(adb, deadentries); ALLOCENTRY(adb, entrylocks); ALLOCENTRY(adb, entry_sd); ALLOCENTRY(adb, entry_refcnt); #undef ALLOCENTRY #define ALLOCNAME(adb, el) \ do { \ (adb)->el = isc_mem_get((adb)->hmctx, \ sizeof(*(adb)->el) * (adb)->nnames); \ } while (0) ALLOCNAME(adb, names); ALLOCNAME(adb, deadnames); ALLOCNAME(adb, namelocks); ALLOCNAME(adb, name_sd); ALLOCNAME(adb, name_refcnt); #undef ALLOCNAME /* * Initialize the bucket locks for names and elements. * May as well initialize the list heads, too. */ isc_mutexblock_init(adb->namelocks, adb->nnames); for (i = 0; i < adb->nnames; i++) { ISC_LIST_INIT(adb->names[i]); ISC_LIST_INIT(adb->deadnames[i]); adb->name_sd[i] = false; adb->name_refcnt[i] = 0; adb->irefcnt++; } for (i = 0; i < adb->nentries; i++) { ISC_LIST_INIT(adb->entries[i]); ISC_LIST_INIT(adb->deadentries[i]); adb->entry_sd[i] = false; adb->entry_refcnt[i] = 0; adb->irefcnt++; } isc_mutexblock_init(adb->entrylocks, adb->nentries); isc_refcount_init(&adb->ahrefcnt, 0); isc_refcount_init(&adb->nhrefcnt, 0); /* * Allocate an internal task. */ result = isc_task_create(adb->taskmgr, 0, &adb->task); if (result != ISC_R_SUCCESS) { goto fail2; } isc_task_setname(adb->task, "ADB", adb); result = isc_stats_create(adb->mctx, &view->adbstats, dns_adbstats_max); if (result != ISC_R_SUCCESS) { goto fail2; } set_adbstat(adb, adb->nentries, dns_adbstats_nentries); set_adbstat(adb, adb->nnames, dns_adbstats_nnames); /* * Normal return. */ adb->magic = DNS_ADB_MAGIC; *newadb = adb; return (ISC_R_SUCCESS); fail2: if (adb->task != NULL) { isc_task_detach(&adb->task); } /* clean up entrylocks */ isc_mutexblock_destroy(adb->entrylocks, adb->nentries); isc_mutexblock_destroy(adb->namelocks, adb->nnames); if (adb->entries != NULL) { isc_mem_put(adb->hmctx, adb->entries, sizeof(*adb->entries) * adb->nentries); } if (adb->deadentries != NULL) { isc_mem_put(adb->hmctx, adb->deadentries, sizeof(*adb->deadentries) * adb->nentries); } if (adb->entrylocks != NULL) { isc_mem_put(adb->hmctx, adb->entrylocks, sizeof(*adb->entrylocks) * adb->nentries); } if (adb->entry_sd != NULL) { isc_mem_put(adb->hmctx, adb->entry_sd, sizeof(*adb->entry_sd) * adb->nentries); } if (adb->entry_refcnt != NULL) { isc_mem_put(adb->hmctx, adb->entry_refcnt, sizeof(*adb->entry_refcnt) * adb->nentries); } if (adb->names != NULL) { isc_mem_put(adb->hmctx, adb->names, sizeof(*adb->names) * adb->nnames); } if (adb->deadnames != NULL) { isc_mem_put(adb->hmctx, adb->deadnames, sizeof(*adb->deadnames) * adb->nnames); } if (adb->namelocks != NULL) { isc_mem_put(adb->hmctx, adb->namelocks, sizeof(*adb->namelocks) * adb->nnames); } if (adb->name_sd != NULL) { isc_mem_put(adb->hmctx, adb->name_sd, sizeof(*adb->name_sd) * adb->nnames); } if (adb->name_refcnt != NULL) { isc_mem_put(adb->hmctx, adb->name_refcnt, sizeof(*adb->name_refcnt) * adb->nnames); } isc_mem_destroy(&adb->hmctx); isc_mutex_destroy(&adb->namescntlock); isc_mutex_destroy(&adb->entriescntlock); isc_mutex_destroy(&adb->overmemlock); isc_mutex_destroy(&adb->reflock); isc_mutex_destroy(&adb->lock); if (adb->excl != NULL) { isc_task_detach(&adb->excl); } isc_mem_putanddetach(&adb->mctx, adb, sizeof(dns_adb_t)); return (result); } void dns_adb_attach(dns_adb_t *adb, dns_adb_t **adbx) { REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(adbx != NULL && *adbx == NULL); inc_adb_erefcnt(adb); *adbx = adb; } void dns_adb_detach(dns_adb_t **adbx) { dns_adb_t *adb; bool need_exit_check; REQUIRE(adbx != NULL && DNS_ADB_VALID(*adbx)); adb = *adbx; *adbx = NULL; LOCK(&adb->reflock); INSIST(adb->erefcnt > 0); adb->erefcnt--; need_exit_check = (adb->erefcnt == 0 && adb->irefcnt == 0); UNLOCK(&adb->reflock); if (need_exit_check) { LOCK(&adb->lock); INSIST(atomic_load(&adb->shutting_down)); check_exit(adb); UNLOCK(&adb->lock); } } void dns_adb_whenshutdown(dns_adb_t *adb, isc_task_t *task, isc_event_t **eventp) { isc_task_t *tclone; isc_event_t *event; bool zeroirefcnt; /* * Send '*eventp' to 'task' when 'adb' has shutdown. */ REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(eventp != NULL); event = *eventp; *eventp = NULL; LOCK(&adb->lock); LOCK(&adb->reflock); zeroirefcnt = (adb->irefcnt == 0); if (atomic_load(&adb->shutting_down) && zeroirefcnt && isc_refcount_current(&adb->ahrefcnt) == 0) { /* * We're already shutdown. Send the event. */ event->ev_sender = adb; isc_task_send(task, &event); } else { tclone = NULL; isc_task_attach(task, &tclone); event->ev_sender = tclone; ISC_LIST_APPEND(adb->whenshutdown, event, ev_link); } UNLOCK(&adb->reflock); UNLOCK(&adb->lock); } static void shutdown_stage2(isc_task_t *task, isc_event_t *event) { dns_adb_t *adb; UNUSED(task); adb = event->ev_arg; INSIST(DNS_ADB_VALID(adb)); LOCK(&adb->lock); INSIST(atomic_load(&adb->shutting_down)); adb->cevent_out = false; (void)shutdown_names(adb); (void)shutdown_entries(adb); if (dec_adb_irefcnt(adb)) { check_exit(adb); } UNLOCK(&adb->lock); } void dns_adb_shutdown(dns_adb_t *adb) { isc_event_t *event; /* * Shutdown 'adb'. */ LOCK(&adb->lock); if (atomic_compare_exchange_strong(&adb->shutting_down, &(bool){ false }, true)) { isc_mem_clearwater(adb->mctx); /* * Isolate shutdown_names and shutdown_entries calls. */ inc_adb_irefcnt(adb); ISC_EVENT_INIT(&adb->cevent, sizeof(adb->cevent), 0, NULL, DNS_EVENT_ADBCONTROL, shutdown_stage2, adb, adb, NULL, NULL); adb->cevent_out = true; event = &adb->cevent; isc_task_send(adb->task, &event); } UNLOCK(&adb->lock); } isc_result_t dns_adb_createfind(dns_adb_t *adb, isc_task_t *task, isc_taskaction_t action, void *arg, const dns_name_t *name, const dns_name_t *qname, dns_rdatatype_t qtype, unsigned int options, isc_stdtime_t now, dns_name_t *target, in_port_t port, unsigned int depth, isc_counter_t *qc, dns_adbfind_t **findp) { dns_adbfind_t *find = NULL; dns_adbname_t *adbname = NULL; int bucket; bool want_event = true; bool start_at_zone = false; bool alias = false; bool have_address = false; isc_result_t result; unsigned int wanted_addresses = (options & DNS_ADBFIND_ADDRESSMASK); unsigned int wanted_fetches = 0; unsigned int query_pending = 0; char namebuf[DNS_NAME_FORMATSIZE]; REQUIRE(DNS_ADB_VALID(adb)); if (task != NULL) { REQUIRE(action != NULL); } REQUIRE(name != NULL); REQUIRE(qname != NULL); REQUIRE(findp != NULL && *findp == NULL); REQUIRE(target == NULL || dns_name_hasbuffer(target)); REQUIRE((options & DNS_ADBFIND_ADDRESSMASK) != 0); result = ISC_R_UNEXPECTED; POST(result); if (atomic_load(&adb->shutting_down)) { DP(DEF_LEVEL, "dns_adb_createfind: returning " "ISC_R_SHUTTINGDOWN"); return (ISC_R_SHUTTINGDOWN); } if (now == 0) { isc_stdtime_get(&now); } /* * XXXMLG Move this comment somewhere else! * * Look up the name in our internal database. * * Possibilities: Note that these are not always exclusive. * * No name found. In this case, allocate a new name header and * an initial namehook or two. * * Name found, valid addresses present. Allocate one addrinfo * structure for each found and append it to the linked list * of addresses for this header. * * Name found, queries pending. In this case, if a task was * passed in, allocate a job id, attach it to the name's job * list and remember to tell the caller that there will be * more info coming later. */ find = new_adbfind(adb); find->port = port; /* * Remember what types of addresses we are interested in. */ find->options = options; find->flags |= wanted_addresses; if (FIND_WANTEVENT(find)) { REQUIRE(task != NULL); } if (isc_log_wouldlog(dns_lctx, DEF_LEVEL)) { dns_name_format(name, namebuf, sizeof(namebuf)); } else { namebuf[0] = 0; } /* * Try to see if we know anything about this name at all. */ bucket = DNS_ADB_INVALIDBUCKET; adbname = find_name_and_lock(adb, name, find->options, &bucket); INSIST(bucket != DNS_ADB_INVALIDBUCKET); if (adb->name_sd[bucket]) { DP(DEF_LEVEL, "dns_adb_createfind: returning " "ISC_R_SHUTTINGDOWN"); RUNTIME_CHECK(!free_adbfind(adb, &find)); result = ISC_R_SHUTTINGDOWN; goto out; } /* * Nothing found. Allocate a new adbname structure for this name. */ if (adbname == NULL) { /* * See if there is any stale name at the end of list, and purge * it if so. */ check_stale_name(adb, bucket, now); adbname = new_adbname(adb, name); link_name(adb, bucket, adbname); if (FIND_HINTOK(find)) { adbname->flags |= DNS_ADBFIND_HINTOK; } if (FIND_GLUEOK(find)) { adbname->flags |= DNS_ADBFIND_GLUEOK; } if (FIND_STARTATZONE(find)) { adbname->flags |= DNS_ADBFIND_STARTATZONE; } } else { /* Move this name forward in the LRU list */ ISC_LIST_UNLINK(adb->names[bucket], adbname, plink); ISC_LIST_PREPEND(adb->names[bucket], adbname, plink); } adbname->last_used = now; /* * Expire old entries, etc. */ RUNTIME_CHECK(!check_expire_namehooks(adbname, now)); /* * Do we know that the name is an alias? */ if (!EXPIRE_OK(adbname->expire_target, now)) { /* * Yes, it is. */ DP(DEF_LEVEL, "dns_adb_createfind: name %s (%p) is an alias (cached)", namebuf, adbname); alias = true; goto post_copy; } /* * Try to populate the name from the database and/or * start fetches. First try looking for an A record * in the database. */ if (!NAME_HAS_V4(adbname) && EXPIRE_OK(adbname->expire_v4, now) && WANT_INET(wanted_addresses)) { result = dbfind_name(adbname, now, dns_rdatatype_a); if (result == ISC_R_SUCCESS) { DP(DEF_LEVEL, "dns_adb_createfind: found A for name %s (%p) in db", namebuf, adbname); goto v6; } /* * Did we get a CNAME or DNAME? */ if (result == DNS_R_ALIAS) { DP(DEF_LEVEL, "dns_adb_createfind: name %s (%p) is an alias", namebuf, adbname); alias = true; goto post_copy; } /* * If the name doesn't exist at all, don't bother with * v6 queries; they won't work. * * If the name does exist but we didn't get our data, go * ahead and try AAAA. * * If the result is neither of these, try a fetch for A. */ if (NXDOMAIN_RESULT(result)) { goto fetch; } else if (NXRRSET_RESULT(result)) { goto v6; } if (!NAME_FETCH_A(adbname)) { wanted_fetches |= DNS_ADBFIND_INET; } } v6: if (!NAME_HAS_V6(adbname) && EXPIRE_OK(adbname->expire_v6, now) && WANT_INET6(wanted_addresses)) { result = dbfind_name(adbname, now, dns_rdatatype_aaaa); if (result == ISC_R_SUCCESS) { DP(DEF_LEVEL, "dns_adb_createfind: found AAAA for name %s (%p)", namebuf, adbname); goto fetch; } /* * Did we get a CNAME or DNAME? */ if (result == DNS_R_ALIAS) { DP(DEF_LEVEL, "dns_adb_createfind: name %s (%p) is an alias", namebuf, adbname); alias = true; goto post_copy; } /* * Listen to negative cache hints, and don't start * another query. */ if (NCACHE_RESULT(result) || AUTH_NX(result)) { goto fetch; } if (!NAME_FETCH_AAAA(adbname)) { wanted_fetches |= DNS_ADBFIND_INET6; } } fetch: if ((WANT_INET(wanted_addresses) && NAME_HAS_V4(adbname)) || (WANT_INET6(wanted_addresses) && NAME_HAS_V6(adbname))) { have_address = true; } else { have_address = false; } if (wanted_fetches != 0 && !(FIND_AVOIDFETCHES(find) && have_address) && !FIND_NOFETCH(find)) { /* * We're missing at least one address family. Either the * caller hasn't instructed us to avoid fetches, or we don't * know anything about any of the address families that would * be acceptable so we have to launch fetches. */ if (FIND_STARTATZONE(find)) { start_at_zone = true; } /* * Start V4. */ if (WANT_INET(wanted_fetches) && fetch_name(adbname, start_at_zone, depth, qc, dns_rdatatype_a) == ISC_R_SUCCESS) { DP(DEF_LEVEL, "dns_adb_createfind: " "started A fetch for name %s (%p)", namebuf, adbname); } /* * Start V6. */ if (WANT_INET6(wanted_fetches) && fetch_name(adbname, start_at_zone, depth, qc, dns_rdatatype_aaaa) == ISC_R_SUCCESS) { DP(DEF_LEVEL, "dns_adb_createfind: " "started AAAA fetch for name %s (%p)", namebuf, adbname); } } /* * Run through the name and copy out the bits we are * interested in. */ copy_namehook_lists(adb, find, qname, qtype, adbname, now); post_copy: if (NAME_FETCH_A(adbname)) { query_pending |= DNS_ADBFIND_INET; } if (NAME_FETCH_AAAA(adbname)) { query_pending |= DNS_ADBFIND_INET6; } /* * Attach to the name's query list if there are queries * already running, and we have been asked to. */ if (!FIND_WANTEVENT(find)) { want_event = false; } if (FIND_WANTEMPTYEVENT(find) && FIND_HAS_ADDRS(find)) { want_event = false; } if ((wanted_addresses & query_pending) == 0) { want_event = false; } if (alias) { want_event = false; } if (want_event) { bool empty; find->adbname = adbname; find->name_bucket = bucket; empty = ISC_LIST_EMPTY(adbname->finds); ISC_LIST_APPEND(adbname->finds, find, plink); find->query_pending = (query_pending & wanted_addresses); find->flags &= ~DNS_ADBFIND_ADDRESSMASK; find->flags |= (find->query_pending & DNS_ADBFIND_ADDRESSMASK); DP(DEF_LEVEL, "createfind: attaching find %p to adbname %p %d", find, adbname, empty); } else { /* * Remove the flag so the caller knows there will never * be an event, and set internal flags to fake that * the event was sent and freed, so dns_adb_destroyfind() will * do the right thing. */ find->query_pending = (query_pending & wanted_addresses); find->options &= ~DNS_ADBFIND_WANTEVENT; find->flags |= (FIND_EVENT_SENT | FIND_EVENT_FREED); find->flags &= ~DNS_ADBFIND_ADDRESSMASK; } find->partial_result |= (adbname->partial_result & wanted_addresses); if (alias) { if (target != NULL) { dns_name_copy(&adbname->target, target); } result = DNS_R_ALIAS; } else { result = ISC_R_SUCCESS; } /* * Copy out error flags from the name structure into the find. */ find->result_v4 = find_err_map[adbname->fetch_err]; find->result_v6 = find_err_map[adbname->fetch6_err]; out: if (find != NULL) { if (want_event) { isc_task_t *taskp = NULL; INSIST((find->flags & DNS_ADBFIND_ADDRESSMASK) != 0); isc_task_attach(task, &taskp); find->event.ev_sender = taskp; find->event.ev_action = action; find->event.ev_arg = arg; } *findp = find; } UNLOCK(&adb->namelocks[bucket]); return (result); } void dns_adb_destroyfind(dns_adbfind_t **findp) { dns_adbfind_t *find; dns_adbentry_t *entry; dns_adbaddrinfo_t *ai; int bucket; dns_adb_t *adb; bool overmem; isc_stdtime_t now; REQUIRE(findp != NULL && DNS_ADBFIND_VALID(*findp)); find = *findp; *findp = NULL; LOCK(&find->lock); DP(DEF_LEVEL, "dns_adb_destroyfind on find %p", find); adb = find->adb; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(FIND_EVENTFREED(find)); bucket = find->name_bucket; INSIST(bucket == DNS_ADB_INVALIDBUCKET); UNLOCK(&find->lock); /* * The find doesn't exist on any list, and nothing is locked. * Return the find to the memory pool, and decrement the adb's * reference count. */ isc_stdtime_get(&now); overmem = isc_mem_isovermem(adb->mctx); ai = ISC_LIST_HEAD(find->list); while (ai != NULL) { ISC_LIST_UNLINK(find->list, ai, publink); entry = ai->entry; ai->entry = NULL; INSIST(DNS_ADBENTRY_VALID(entry)); RUNTIME_CHECK( !dec_entry_refcnt(adb, overmem, entry, true, now)); free_adbaddrinfo(adb, &ai); ai = ISC_LIST_HEAD(find->list); } /* * WARNING: The find is freed with the adb locked. This is done * to avoid a race condition where we free the find, some other * thread tests to see if it should be destroyed, detects it should * be, destroys it, and then we try to lock it for our check, but the * lock is destroyed. */ LOCK(&adb->lock); if (free_adbfind(adb, &find)) { check_exit(adb); } UNLOCK(&adb->lock); } void dns_adb_cancelfind(dns_adbfind_t *find) { isc_event_t *ev; isc_task_t *task; dns_adb_t *adb; int bucket; int unlock_bucket; LOCK(&find->lock); DP(DEF_LEVEL, "dns_adb_cancelfind on find %p", find); adb = find->adb; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(!FIND_EVENTFREED(find)); REQUIRE(FIND_WANTEVENT(find)); bucket = find->name_bucket; if (bucket == DNS_ADB_INVALIDBUCKET) { goto cleanup; } /* * We need to get the adbname's lock to unlink the find. */ unlock_bucket = bucket; violate_locking_hierarchy(&find->lock, &adb->namelocks[unlock_bucket]); bucket = find->name_bucket; if (bucket != DNS_ADB_INVALIDBUCKET) { ISC_LIST_UNLINK(find->adbname->finds, find, plink); find->adbname = NULL; find->name_bucket = DNS_ADB_INVALIDBUCKET; } UNLOCK(&adb->namelocks[unlock_bucket]); bucket = DNS_ADB_INVALIDBUCKET; POST(bucket); cleanup: if (!FIND_EVENTSENT(find)) { ev = &find->event; task = ev->ev_sender; ev->ev_sender = find; ev->ev_type = DNS_EVENT_ADBCANCELED; ev->ev_destroy = event_free; ev->ev_destroy_arg = find; find->result_v4 = ISC_R_CANCELED; find->result_v6 = ISC_R_CANCELED; DP(DEF_LEVEL, "sending event %p to task %p for find %p", ev, task, find); isc_task_sendanddetach(&task, (isc_event_t **)&ev); } UNLOCK(&find->lock); } void dns_adb_dump(dns_adb_t *adb, FILE *f) { unsigned int i; isc_stdtime_t now; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(f != NULL); /* * Lock the adb itself, lock all the name buckets, then lock all * the entry buckets. This should put the adb into a state where * nothing can change, so we can iterate through everything and * print at our leisure. */ LOCK(&adb->lock); isc_stdtime_get(&now); for (i = 0; i < adb->nnames; i++) { RUNTIME_CHECK(!cleanup_names(adb, i, now)); } for (i = 0; i < adb->nentries; i++) { RUNTIME_CHECK(!cleanup_entries(adb, i, now)); } dump_adb(adb, f, false, now); UNLOCK(&adb->lock); } static void dump_ttl(FILE *f, const char *legend, isc_stdtime_t value, isc_stdtime_t now) { if (value == INT_MAX) { return; } fprintf(f, " [%s TTL %d]", legend, (int)(value - now)); } static void dump_adb(dns_adb_t *adb, FILE *f, bool debug, isc_stdtime_t now) { dns_adbname_t *name; dns_adbentry_t *entry; fprintf(f, ";\n; Address database dump\n;\n"); fprintf(f, "; [edns success/timeout]\n"); fprintf(f, "; [plain success/timeout]\n;\n"); if (debug) { LOCK(&adb->reflock); fprintf(f, "; addr %p, erefcnt %u, irefcnt %u, finds out " "%" PRIuFAST32 "\n", adb, adb->erefcnt, adb->irefcnt, isc_refcount_current(&adb->nhrefcnt)); UNLOCK(&adb->reflock); } /* * In TSAN mode we need to lock the locks individually, as TSAN * can't handle more than 64 locks locked by one thread. * In regular mode we want a consistent dump so we need to * lock everything. */ #ifndef __SANITIZE_THREAD__ for (size_t i = 0; i < adb->nnames; i++) { LOCK(&adb->namelocks[i]); } for (size_t i = 0; i < adb->nentries; i++) { LOCK(&adb->entrylocks[i]); } #endif /* ifndef __SANITIZE_THREAD__ */ /* * Dump the names */ for (size_t i = 0; i < adb->nnames; i++) { #ifdef __SANITIZE_THREAD__ LOCK(&adb->namelocks[i]); #endif /* ifdef __SANITIZE_THREAD__ */ name = ISC_LIST_HEAD(adb->names[i]); if (name == NULL) { #ifdef __SANITIZE_THREAD__ UNLOCK(&adb->namelocks[i]); #endif /* ifdef __SANITIZE_THREAD__ */ continue; } if (debug) { fprintf(f, "; bucket %zu\n", i); } for (; name != NULL; name = ISC_LIST_NEXT(name, plink)) { if (debug) { fprintf(f, "; name %p (flags %08x)\n", name, name->flags); } fprintf(f, "; "); print_dns_name(f, &name->name); if (dns_name_countlabels(&name->target) > 0) { fprintf(f, " alias "); print_dns_name(f, &name->target); } dump_ttl(f, "v4", name->expire_v4, now); dump_ttl(f, "v6", name->expire_v6, now); dump_ttl(f, "target", name->expire_target, now); fprintf(f, " [v4 %s] [v6 %s]", errnames[name->fetch_err], errnames[name->fetch6_err]); fprintf(f, "\n"); print_namehook_list(f, "v4", adb, &name->v4, debug, now); print_namehook_list(f, "v6", adb, &name->v6, debug, now); if (debug) { print_fetch_list(f, name); print_find_list(f, name); } } #ifdef __SANITIZE_THREAD__ UNLOCK(&adb->namelocks[i]); #endif /* ifdef __SANITIZE_THREAD__ */ } fprintf(f, ";\n; Unassociated entries\n;\n"); for (size_t i = 0; i < adb->nentries; i++) { #ifdef __SANITIZE_THREAD__ LOCK(&adb->entrylocks[i]); #endif /* ifdef __SANITIZE_THREAD__ */ entry = ISC_LIST_HEAD(adb->entries[i]); while (entry != NULL) { if (entry->nh == 0) { dump_entry(f, adb, entry, debug, now); } entry = ISC_LIST_NEXT(entry, plink); } #ifdef __SANITIZE_THREAD__ UNLOCK(&adb->entrylocks[i]); #endif /* ifdef __SANITIZE_THREAD__ */ } #ifndef __SANITIZE_THREAD__ /* * Unlock everything */ for (ssize_t i = adb->nentries - 1; i >= 0; i--) { UNLOCK(&adb->entrylocks[i]); } for (ssize_t i = adb->nnames - 1; i >= 0; i--) { UNLOCK(&adb->namelocks[i]); } #endif /* ifndef __SANITIZE_THREAD__ */ } static void dump_entry(FILE *f, dns_adb_t *adb, dns_adbentry_t *entry, bool debug, isc_stdtime_t now) { char addrbuf[ISC_NETADDR_FORMATSIZE]; char typebuf[DNS_RDATATYPE_FORMATSIZE]; isc_netaddr_t netaddr; dns_adblameinfo_t *li; isc_netaddr_fromsockaddr(&netaddr, &entry->sockaddr); isc_netaddr_format(&netaddr, addrbuf, sizeof(addrbuf)); if (debug) { fprintf(f, ";\t%p: refcnt %u\n", entry, entry->refcnt); } fprintf(f, ";\t%s [srtt %u] [flags %08x] [edns %u/%u] " "[plain %u/%u]", addrbuf, entry->srtt, entry->flags, entry->edns, entry->ednsto, entry->plain, entry->plainto); if (entry->udpsize != 0U) { fprintf(f, " [udpsize %u]", entry->udpsize); } if (entry->cookie != NULL) { unsigned int i; fprintf(f, " [cookie="); for (i = 0; i < entry->cookielen; i++) { fprintf(f, "%02x", entry->cookie[i]); } fprintf(f, "]"); } if (entry->expires != 0) { fprintf(f, " [ttl %d]", (int)(entry->expires - now)); } if (adb != NULL && adb->quota != 0 && adb->atr_freq != 0) { uint_fast32_t quota = atomic_load_relaxed(&entry->quota); fprintf(f, " [atr %0.2f] [quota %" PRIuFAST32 "]", entry->atr, quota); } fprintf(f, "\n"); for (li = ISC_LIST_HEAD(entry->lameinfo); li != NULL; li = ISC_LIST_NEXT(li, plink)) { fprintf(f, ";\t\t"); print_dns_name(f, &li->qname); dns_rdatatype_format(li->qtype, typebuf, sizeof(typebuf)); fprintf(f, " %s [lame TTL %d]\n", typebuf, (int)(li->lame_timer - now)); } } void dns_adb_dumpfind(dns_adbfind_t *find, FILE *f) { char tmp[512]; const char *tmpp; dns_adbaddrinfo_t *ai; isc_sockaddr_t *sa; /* * Not used currently, in the API Just In Case we * want to dump out the name and/or entries too. */ LOCK(&find->lock); fprintf(f, ";Find %p\n", find); fprintf(f, ";\tqpending %08x partial %08x options %08x flags %08x\n", find->query_pending, find->partial_result, find->options, find->flags); fprintf(f, ";\tname_bucket %d, name %p, event sender %p\n", find->name_bucket, find->adbname, find->event.ev_sender); ai = ISC_LIST_HEAD(find->list); if (ai != NULL) { fprintf(f, "\tAddresses:\n"); } while (ai != NULL) { sa = &ai->sockaddr; switch (sa->type.sa.sa_family) { case AF_INET: tmpp = inet_ntop(AF_INET, &sa->type.sin.sin_addr, tmp, sizeof(tmp)); break; case AF_INET6: tmpp = inet_ntop(AF_INET6, &sa->type.sin6.sin6_addr, tmp, sizeof(tmp)); break; default: tmpp = "UnkFamily"; } if (tmpp == NULL) { tmpp = "BadAddress"; } fprintf(f, "\t\tentry %p, flags %08x" " srtt %u addr %s\n", ai->entry, ai->flags, ai->srtt, tmpp); ai = ISC_LIST_NEXT(ai, publink); } UNLOCK(&find->lock); } static void print_dns_name(FILE *f, const dns_name_t *name) { char buf[DNS_NAME_FORMATSIZE]; INSIST(f != NULL); dns_name_format(name, buf, sizeof(buf)); fprintf(f, "%s", buf); } static void print_namehook_list(FILE *f, const char *legend, dns_adb_t *adb, dns_adbnamehooklist_t *list, bool debug, isc_stdtime_t now) { dns_adbnamehook_t *nh; for (nh = ISC_LIST_HEAD(*list); nh != NULL; nh = ISC_LIST_NEXT(nh, plink)) { if (debug) { fprintf(f, ";\tHook(%s) %p\n", legend, nh); } #ifdef __SANITIZE_THREAD__ LOCK(&adb->entrylocks[nh->entry->lock_bucket]); #endif dump_entry(f, adb, nh->entry, debug, now); #ifdef __SANITIZE_THREAD__ UNLOCK(&adb->entrylocks[nh->entry->lock_bucket]); #endif } } static void print_fetch(FILE *f, dns_adbfetch_t *ft, const char *type) { fprintf(f, "\t\tFetch(%s): %p -> { fetch %p }\n", type, ft, ft->fetch); } static void print_fetch_list(FILE *f, dns_adbname_t *n) { if (NAME_FETCH_A(n)) { print_fetch(f, n->fetch_a, "A"); } if (NAME_FETCH_AAAA(n)) { print_fetch(f, n->fetch_aaaa, "AAAA"); } } static void print_find_list(FILE *f, dns_adbname_t *name) { dns_adbfind_t *find; find = ISC_LIST_HEAD(name->finds); while (find != NULL) { dns_adb_dumpfind(find, f); find = ISC_LIST_NEXT(find, plink); } } static isc_result_t dbfind_name(dns_adbname_t *adbname, isc_stdtime_t now, dns_rdatatype_t rdtype) { isc_result_t result; dns_rdataset_t rdataset; dns_adb_t *adb; dns_fixedname_t foundname; dns_name_t *fname; INSIST(DNS_ADBNAME_VALID(adbname)); adb = adbname->adb; INSIST(DNS_ADB_VALID(adb)); INSIST(rdtype == dns_rdatatype_a || rdtype == dns_rdatatype_aaaa); fname = dns_fixedname_initname(&foundname); dns_rdataset_init(&rdataset); if (rdtype == dns_rdatatype_a) { adbname->fetch_err = FIND_ERR_UNEXPECTED; } else { adbname->fetch6_err = FIND_ERR_UNEXPECTED; } /* * We need to specify whether to search static-stub zones (if * configured) depending on whether this is a "start at zone" lookup, * i.e., whether it's a "bailiwick" glue. If it's bailiwick (in which * case DNS_ADBFIND_STARTATZONE is set) we need to stop the search at * any matching static-stub zone without looking into the cache to honor * the configuration on which server we should send queries to. */ result = dns_view_find(adb->view, &adbname->name, rdtype, now, NAME_GLUEOK(adbname) ? DNS_DBFIND_GLUEOK : 0, NAME_HINTOK(adbname), ((adbname->flags & DNS_ADBFIND_STARTATZONE) != 0), NULL, NULL, fname, &rdataset, NULL); /* XXXVIX this switch statement is too sparse to gen a jump table. */ switch (result) { case DNS_R_GLUE: case DNS_R_HINT: case ISC_R_SUCCESS: /* * Found in the database. Even if we can't copy out * any information, return success, or else a fetch * will be made, which will only make things worse. */ if (rdtype == dns_rdatatype_a) { adbname->fetch_err = FIND_ERR_SUCCESS; } else { adbname->fetch6_err = FIND_ERR_SUCCESS; } result = import_rdataset(adbname, &rdataset, now); break; case DNS_R_NXDOMAIN: case DNS_R_NXRRSET: /* * We're authoritative and the data doesn't exist. * Make up a negative cache entry so we don't ask again * for a while. * * XXXRTH What time should we use? I'm putting in 30 seconds * for now. */ if (rdtype == dns_rdatatype_a) { adbname->expire_v4 = now + 30; DP(NCACHE_LEVEL, "adb name %p: Caching auth negative entry for A", adbname); if (result == DNS_R_NXDOMAIN) { adbname->fetch_err = FIND_ERR_NXDOMAIN; } else { adbname->fetch_err = FIND_ERR_NXRRSET; } } else { DP(NCACHE_LEVEL, "adb name %p: Caching auth negative entry for AAAA", adbname); adbname->expire_v6 = now + 30; if (result == DNS_R_NXDOMAIN) { adbname->fetch6_err = FIND_ERR_NXDOMAIN; } else { adbname->fetch6_err = FIND_ERR_NXRRSET; } } break; case DNS_R_NCACHENXDOMAIN: case DNS_R_NCACHENXRRSET: /* * We found a negative cache entry. Pull the TTL from it * so we won't ask again for a while. */ rdataset.ttl = ttlclamp(rdataset.ttl); if (rdtype == dns_rdatatype_a) { adbname->expire_v4 = rdataset.ttl + now; if (result == DNS_R_NCACHENXDOMAIN) { adbname->fetch_err = FIND_ERR_NXDOMAIN; } else { adbname->fetch_err = FIND_ERR_NXRRSET; } DP(NCACHE_LEVEL, "adb name %p: Caching negative entry for A (ttl %u)", adbname, rdataset.ttl); } else { DP(NCACHE_LEVEL, "adb name %p: Caching negative entry for AAAA (ttl " "%u)", adbname, rdataset.ttl); adbname->expire_v6 = rdataset.ttl + now; if (result == DNS_R_NCACHENXDOMAIN) { adbname->fetch6_err = FIND_ERR_NXDOMAIN; } else { adbname->fetch6_err = FIND_ERR_NXRRSET; } } break; case DNS_R_CNAME: case DNS_R_DNAME: /* * Clear the hint and glue flags, so this will match * more often. */ adbname->flags &= ~(DNS_ADBFIND_GLUEOK | DNS_ADBFIND_HINTOK); rdataset.ttl = ttlclamp(rdataset.ttl); clean_target(adb, &adbname->target); adbname->expire_target = INT_MAX; result = set_target(adb, &adbname->name, fname, &rdataset, &adbname->target); if (result == ISC_R_SUCCESS) { result = DNS_R_ALIAS; DP(NCACHE_LEVEL, "adb name %p: caching alias target", adbname); adbname->expire_target = rdataset.ttl + now; } if (rdtype == dns_rdatatype_a) { adbname->fetch_err = FIND_ERR_SUCCESS; } else { adbname->fetch6_err = FIND_ERR_SUCCESS; } break; default: break; } if (dns_rdataset_isassociated(&rdataset)) { dns_rdataset_disassociate(&rdataset); } return (result); } static void fetch_callback(isc_task_t *task, isc_event_t *ev) { dns_fetchevent_t *dev; dns_adbname_t *name; dns_adb_t *adb; dns_adbfetch_t *fetch; int bucket; isc_eventtype_t ev_status; isc_stdtime_t now; isc_result_t result; unsigned int address_type; bool want_check_exit = false; UNUSED(task); INSIST(ev->ev_type == DNS_EVENT_FETCHDONE); dev = (dns_fetchevent_t *)ev; name = ev->ev_arg; INSIST(DNS_ADBNAME_VALID(name)); adb = name->adb; INSIST(DNS_ADB_VALID(adb)); bucket = name->lock_bucket; LOCK(&adb->namelocks[bucket]); INSIST(NAME_FETCH_A(name) || NAME_FETCH_AAAA(name)); address_type = 0; if (NAME_FETCH_A(name) && (name->fetch_a->fetch == dev->fetch)) { address_type = DNS_ADBFIND_INET; fetch = name->fetch_a; name->fetch_a = NULL; } else if (NAME_FETCH_AAAA(name) && (name->fetch_aaaa->fetch == dev->fetch)) { address_type = DNS_ADBFIND_INET6; fetch = name->fetch_aaaa; name->fetch_aaaa = NULL; } else { fetch = NULL; } INSIST(address_type != 0 && fetch != NULL); dns_resolver_destroyfetch(&fetch->fetch); dev->fetch = NULL; ev_status = DNS_EVENT_ADBNOMOREADDRESSES; /* * Cleanup things we don't care about. */ if (dev->node != NULL) { dns_db_detachnode(dev->db, &dev->node); } if (dev->db != NULL) { dns_db_detach(&dev->db); } /* * If this name is marked as dead, clean up, throwing away * potentially good data. */ if (NAME_DEAD(name)) { free_adbfetch(adb, &fetch); isc_event_free(&ev); want_check_exit = kill_name(&name, DNS_EVENT_ADBCANCELED); UNLOCK(&adb->namelocks[bucket]); if (want_check_exit) { LOCK(&adb->lock); check_exit(adb); UNLOCK(&adb->lock); } return; } isc_stdtime_get(&now); /* * If we got a negative cache response, remember it. */ if (NCACHE_RESULT(dev->result)) { dev->rdataset->ttl = ttlclamp(dev->rdataset->ttl); if (address_type == DNS_ADBFIND_INET) { DP(NCACHE_LEVEL, "adb fetch name %p: " "caching negative entry for A (ttl %u)", name, dev->rdataset->ttl); name->expire_v4 = ISC_MIN(name->expire_v4, dev->rdataset->ttl + now); if (dev->result == DNS_R_NCACHENXDOMAIN) { name->fetch_err = FIND_ERR_NXDOMAIN; } else { name->fetch_err = FIND_ERR_NXRRSET; } inc_stats(adb, dns_resstatscounter_gluefetchv4fail); } else { DP(NCACHE_LEVEL, "adb fetch name %p: " "caching negative entry for AAAA (ttl %u)", name, dev->rdataset->ttl); name->expire_v6 = ISC_MIN(name->expire_v6, dev->rdataset->ttl + now); if (dev->result == DNS_R_NCACHENXDOMAIN) { name->fetch6_err = FIND_ERR_NXDOMAIN; } else { name->fetch6_err = FIND_ERR_NXRRSET; } inc_stats(adb, dns_resstatscounter_gluefetchv6fail); } goto out; } /* * Handle CNAME/DNAME. */ if (dev->result == DNS_R_CNAME || dev->result == DNS_R_DNAME) { dev->rdataset->ttl = ttlclamp(dev->rdataset->ttl); clean_target(adb, &name->target); name->expire_target = INT_MAX; result = set_target(adb, &name->name, dev->foundname, dev->rdataset, &name->target); if (result == ISC_R_SUCCESS) { DP(NCACHE_LEVEL, "adb fetch name %p: caching alias target", name); name->expire_target = dev->rdataset->ttl + now; } goto check_result; } /* * Did we get back junk? If so, and there are no more fetches * sitting out there, tell all the finds about it. */ if (dev->result != ISC_R_SUCCESS) { char buf[DNS_NAME_FORMATSIZE]; dns_name_format(&name->name, buf, sizeof(buf)); DP(DEF_LEVEL, "adb: fetch of '%s' %s failed: %s", buf, address_type == DNS_ADBFIND_INET ? "A" : "AAAA", isc_result_totext(dev->result)); /* * Don't record a failure unless this is the initial * fetch of a chain. */ if (fetch->depth > 1) { goto out; } /* XXXMLG Don't pound on bad servers. */ if (address_type == DNS_ADBFIND_INET) { name->expire_v4 = ISC_MIN(name->expire_v4, now + 10); name->fetch_err = FIND_ERR_FAILURE; inc_stats(adb, dns_resstatscounter_gluefetchv4fail); } else { name->expire_v6 = ISC_MIN(name->expire_v6, now + 10); name->fetch6_err = FIND_ERR_FAILURE; inc_stats(adb, dns_resstatscounter_gluefetchv6fail); } goto out; } /* * We got something potentially useful. */ result = import_rdataset(name, &fetch->rdataset, now); check_result: if (result == ISC_R_SUCCESS) { ev_status = DNS_EVENT_ADBMOREADDRESSES; if (address_type == DNS_ADBFIND_INET) { name->fetch_err = FIND_ERR_SUCCESS; } else { name->fetch6_err = FIND_ERR_SUCCESS; } } out: free_adbfetch(adb, &fetch); isc_event_free(&ev); clean_finds_at_name(name, ev_status, address_type); UNLOCK(&adb->namelocks[bucket]); } static isc_result_t fetch_name(dns_adbname_t *adbname, bool start_at_zone, unsigned int depth, isc_counter_t *qc, dns_rdatatype_t type) { isc_result_t result; dns_adbfetch_t *fetch = NULL; dns_adb_t *adb; dns_fixedname_t fixed; dns_name_t *name; dns_rdataset_t rdataset; dns_rdataset_t *nameservers; unsigned int options; INSIST(DNS_ADBNAME_VALID(adbname)); adb = adbname->adb; INSIST(DNS_ADB_VALID(adb)); INSIST((type == dns_rdatatype_a && !NAME_FETCH_A(adbname)) || (type == dns_rdatatype_aaaa && !NAME_FETCH_AAAA(adbname))); adbname->fetch_err = FIND_ERR_NOTFOUND; name = NULL; nameservers = NULL; dns_rdataset_init(&rdataset); options = DNS_FETCHOPT_NOVALIDATE; if (start_at_zone) { DP(ENTER_LEVEL, "fetch_name: starting at zone for name %p", adbname); name = dns_fixedname_initname(&fixed); result = dns_view_findzonecut(adb->view, &adbname->name, name, NULL, 0, 0, true, false, &rdataset, NULL); if (result != ISC_R_SUCCESS && result != DNS_R_HINT) { goto cleanup; } nameservers = &rdataset; options |= DNS_FETCHOPT_UNSHARED; } fetch = new_adbfetch(adb); fetch->depth = depth; /* * We're not minimizing this query, as nothing user-related should * be leaked here. * However, if we'd ever want to change it we'd have to modify * createfetch to find deepest cached name when we're providing * domain and nameservers. */ result = dns_resolver_createfetch( adb->view->resolver, &adbname->name, type, name, nameservers, NULL, NULL, 0, options, depth, qc, adb->task, fetch_callback, adbname, &fetch->rdataset, NULL, &fetch->fetch); if (result != ISC_R_SUCCESS) { DP(ENTER_LEVEL, "fetch_name: createfetch failed with %s", isc_result_totext(result)); goto cleanup; } if (type == dns_rdatatype_a) { adbname->fetch_a = fetch; inc_stats(adb, dns_resstatscounter_gluefetchv4); } else { adbname->fetch_aaaa = fetch; inc_stats(adb, dns_resstatscounter_gluefetchv6); } fetch = NULL; /* Keep us from cleaning this up below. */ cleanup: if (fetch != NULL) { free_adbfetch(adb, &fetch); } if (dns_rdataset_isassociated(&rdataset)) { dns_rdataset_disassociate(&rdataset); } return (result); } /* * XXXMLG Needs to take a find argument and an address info, no zone or adb, * since these can be extracted from the find itself. */ isc_result_t dns_adb_marklame(dns_adb_t *adb, dns_adbaddrinfo_t *addr, const dns_name_t *qname, dns_rdatatype_t qtype, isc_stdtime_t expire_time) { dns_adblameinfo_t *li; int bucket; isc_result_t result = ISC_R_SUCCESS; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); REQUIRE(qname != NULL); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); li = ISC_LIST_HEAD(addr->entry->lameinfo); while (li != NULL && (li->qtype != qtype || !dns_name_equal(qname, &li->qname))) { li = ISC_LIST_NEXT(li, plink); } if (li != NULL) { if (expire_time > li->lame_timer) { li->lame_timer = expire_time; } goto unlock; } li = new_adblameinfo(adb, qname, qtype); li->lame_timer = expire_time; ISC_LIST_PREPEND(addr->entry->lameinfo, li, plink); unlock: UNLOCK(&adb->entrylocks[bucket]); return (result); } void dns_adb_adjustsrtt(dns_adb_t *adb, dns_adbaddrinfo_t *addr, unsigned int rtt, unsigned int factor) { int bucket; isc_stdtime_t now = 0; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); REQUIRE(factor <= 10); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); if (addr->entry->expires == 0 || factor == DNS_ADB_RTTADJAGE) { isc_stdtime_get(&now); } adjustsrtt(addr, rtt, factor, now); UNLOCK(&adb->entrylocks[bucket]); } void dns_adb_agesrtt(dns_adb_t *adb, dns_adbaddrinfo_t *addr, isc_stdtime_t now) { int bucket; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); adjustsrtt(addr, 0, DNS_ADB_RTTADJAGE, now); UNLOCK(&adb->entrylocks[bucket]); } static void adjustsrtt(dns_adbaddrinfo_t *addr, unsigned int rtt, unsigned int factor, isc_stdtime_t now) { uint64_t new_srtt; if (factor == DNS_ADB_RTTADJAGE) { if (addr->entry->lastage != now) { new_srtt = addr->entry->srtt; new_srtt <<= 9; new_srtt -= addr->entry->srtt; new_srtt >>= 9; addr->entry->lastage = now; } else { new_srtt = addr->entry->srtt; } } else { new_srtt = ((uint64_t)addr->entry->srtt / 10 * factor) + ((uint64_t)rtt / 10 * (10 - factor)); } addr->entry->srtt = (unsigned int)new_srtt; addr->srtt = (unsigned int)new_srtt; if (addr->entry->expires == 0) { addr->entry->expires = now + ADB_ENTRY_WINDOW; } } void dns_adb_changeflags(dns_adb_t *adb, dns_adbaddrinfo_t *addr, unsigned int bits, unsigned int mask) { int bucket; isc_stdtime_t now; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); REQUIRE((bits & ENTRY_IS_DEAD) == 0); REQUIRE((mask & ENTRY_IS_DEAD) == 0); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); addr->entry->flags = (addr->entry->flags & ~mask) | (bits & mask); if (addr->entry->expires == 0) { isc_stdtime_get(&now); addr->entry->expires = now + ADB_ENTRY_WINDOW; } /* * Note that we do not update the other bits in addr->flags with * the most recent values from addr->entry->flags. */ addr->flags = (addr->flags & ~mask) | (bits & mask); UNLOCK(&adb->entrylocks[bucket]); } /* * The polynomial backoff curve (10000 / ((10 + n) / 10)^(3/2)) <0..99> drops * fairly aggressively at first, then slows down and tails off at around 2-3%. * * These will be used to make quota adjustments. */ static int quota_adj[] = { 10000, 8668, 7607, 6747, 6037, 5443, 4941, 4512, 4141, 3818, 3536, 3286, 3065, 2867, 2690, 2530, 2385, 2254, 2134, 2025, 1925, 1832, 1747, 1668, 1595, 1527, 1464, 1405, 1350, 1298, 1250, 1205, 1162, 1121, 1083, 1048, 1014, 981, 922, 894, 868, 843, 820, 797, 775, 755, 735, 716, 698, 680, 664, 648, 632, 618, 603, 590, 577, 564, 552, 540, 529, 518, 507, 497, 487, 477, 468, 459, 450, 442, 434, 426, 418, 411, 404, 397, 390, 383, 377, 370, 364, 358, 353, 347, 342, 336, 331, 326, 321, 316, 312, 307, 303, 298, 294, 290, 286, 282, 278 }; #define QUOTA_ADJ_SIZE (sizeof(quota_adj) / sizeof(quota_adj[0])) /* * Caller must hold adbentry lock */ static void maybe_adjust_quota(dns_adb_t *adb, dns_adbaddrinfo_t *addr, bool timeout) { double tr; UNUSED(adb); if (adb->quota == 0 || adb->atr_freq == 0) { return; } if (timeout) { addr->entry->timeouts++; } if (addr->entry->completed++ <= adb->atr_freq) { return; } /* * Calculate an exponential rolling average of the timeout ratio * * XXX: Integer arithmetic might be better than floating point */ tr = (double)addr->entry->timeouts / addr->entry->completed; addr->entry->timeouts = addr->entry->completed = 0; INSIST(addr->entry->atr >= 0.0); INSIST(addr->entry->atr <= 1.0); INSIST(adb->atr_discount >= 0.0); INSIST(adb->atr_discount <= 1.0); addr->entry->atr *= 1.0 - adb->atr_discount; addr->entry->atr += tr * adb->atr_discount; addr->entry->atr = ISC_CLAMP(addr->entry->atr, 0.0, 1.0); if (addr->entry->atr < adb->atr_low && addr->entry->mode > 0) { uint_fast32_t new_quota = adb->quota * quota_adj[--addr->entry->mode] / 10000; atomic_store_release(&addr->entry->quota, ISC_MAX(1, new_quota)); log_quota(addr->entry, "atr %0.2f, quota increased to %" PRIuFAST32, addr->entry->atr, new_quota); } else if (addr->entry->atr > adb->atr_high && addr->entry->mode < (QUOTA_ADJ_SIZE - 1)) { uint_fast32_t new_quota = adb->quota * quota_adj[++addr->entry->mode] / 10000; atomic_store_release(&addr->entry->quota, ISC_MAX(1, new_quota)); log_quota(addr->entry, "atr %0.2f, quota decreased to %" PRIuFAST32, addr->entry->atr, new_quota); } } #define EDNSTOS 3U void dns_adb_plainresponse(dns_adb_t *adb, dns_adbaddrinfo_t *addr) { int bucket; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); maybe_adjust_quota(adb, addr, false); addr->entry->plain++; if (addr->entry->plain == 0xff) { addr->entry->edns >>= 1; addr->entry->ednsto >>= 1; addr->entry->plain >>= 1; addr->entry->plainto >>= 1; } UNLOCK(&adb->entrylocks[bucket]); } void dns_adb_timeout(dns_adb_t *adb, dns_adbaddrinfo_t *addr) { int bucket; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); maybe_adjust_quota(adb, addr, true); addr->entry->plainto++; if (addr->entry->plainto == 0xff) { addr->entry->edns >>= 1; addr->entry->ednsto >>= 1; addr->entry->plain >>= 1; addr->entry->plainto >>= 1; } UNLOCK(&adb->entrylocks[bucket]); } void dns_adb_ednsto(dns_adb_t *adb, dns_adbaddrinfo_t *addr) { int bucket; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); maybe_adjust_quota(adb, addr, true); addr->entry->ednsto++; if (addr->entry->ednsto == 0xff) { addr->entry->edns >>= 1; addr->entry->ednsto >>= 1; addr->entry->plain >>= 1; addr->entry->plainto >>= 1; } UNLOCK(&adb->entrylocks[bucket]); } void dns_adb_setudpsize(dns_adb_t *adb, dns_adbaddrinfo_t *addr, unsigned int size) { int bucket; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); if (size < 512U) { size = 512U; } if (size > addr->entry->udpsize) { addr->entry->udpsize = size; } maybe_adjust_quota(adb, addr, false); addr->entry->edns++; if (addr->entry->edns == 0xff) { addr->entry->edns >>= 1; addr->entry->ednsto >>= 1; addr->entry->plain >>= 1; addr->entry->plainto >>= 1; } UNLOCK(&adb->entrylocks[bucket]); } unsigned int dns_adb_getudpsize(dns_adb_t *adb, dns_adbaddrinfo_t *addr) { int bucket; unsigned int size; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); size = addr->entry->udpsize; UNLOCK(&adb->entrylocks[bucket]); return (size); } void dns_adb_setcookie(dns_adb_t *adb, dns_adbaddrinfo_t *addr, const unsigned char *cookie, size_t len) { int bucket; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); if (addr->entry->cookie != NULL && (cookie == NULL || len != addr->entry->cookielen)) { isc_mem_put(adb->mctx, addr->entry->cookie, addr->entry->cookielen); addr->entry->cookie = NULL; addr->entry->cookielen = 0; } if (addr->entry->cookie == NULL && cookie != NULL && len != 0U) { addr->entry->cookie = isc_mem_get(adb->mctx, len); addr->entry->cookielen = (uint16_t)len; } if (addr->entry->cookie != NULL) { memmove(addr->entry->cookie, cookie, len); } UNLOCK(&adb->entrylocks[bucket]); } size_t dns_adb_getcookie(dns_adb_t *adb, dns_adbaddrinfo_t *addr, unsigned char *cookie, size_t len) { int bucket; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); if (cookie != NULL && addr->entry->cookie != NULL && len >= addr->entry->cookielen) { memmove(cookie, addr->entry->cookie, addr->entry->cookielen); len = addr->entry->cookielen; } else { len = 0; } UNLOCK(&adb->entrylocks[bucket]); return (len); } isc_result_t dns_adb_findaddrinfo(dns_adb_t *adb, const isc_sockaddr_t *sa, dns_adbaddrinfo_t **addrp, isc_stdtime_t now) { int bucket; dns_adbentry_t *entry; dns_adbaddrinfo_t *addr; isc_result_t result; in_port_t port; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(addrp != NULL && *addrp == NULL); UNUSED(now); result = ISC_R_SUCCESS; bucket = DNS_ADB_INVALIDBUCKET; entry = find_entry_and_lock(adb, sa, &bucket, now); INSIST(bucket != DNS_ADB_INVALIDBUCKET); if (adb->entry_sd[bucket]) { result = ISC_R_SHUTTINGDOWN; goto unlock; } if (entry == NULL) { /* * We don't know anything about this address. */ entry = new_adbentry(adb); entry->sockaddr = *sa; link_entry(adb, bucket, entry); DP(ENTER_LEVEL, "findaddrinfo: new entry %p", entry); } else { DP(ENTER_LEVEL, "findaddrinfo: found entry %p", entry); } port = isc_sockaddr_getport(sa); addr = new_adbaddrinfo(adb, entry, port); inc_entry_refcnt(adb, entry, false); *addrp = addr; unlock: UNLOCK(&adb->entrylocks[bucket]); return (result); } void dns_adb_freeaddrinfo(dns_adb_t *adb, dns_adbaddrinfo_t **addrp) { dns_adbaddrinfo_t *addr; dns_adbentry_t *entry; int bucket; isc_stdtime_t now; bool want_check_exit = false; bool overmem; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(addrp != NULL); addr = *addrp; *addrp = NULL; REQUIRE(DNS_ADBADDRINFO_VALID(addr)); entry = addr->entry; REQUIRE(DNS_ADBENTRY_VALID(entry)); overmem = isc_mem_isovermem(adb->mctx); bucket = addr->entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); isc_stdtime_get(&now); if (entry->expires == 0) { entry->expires = now + ADB_ENTRY_WINDOW; } want_check_exit = dec_entry_refcnt(adb, overmem, entry, false, now); UNLOCK(&adb->entrylocks[bucket]); addr->entry = NULL; free_adbaddrinfo(adb, &addr); if (want_check_exit) { LOCK(&adb->lock); check_exit(adb); UNLOCK(&adb->lock); } } void dns_adb_flush(dns_adb_t *adb) { unsigned int i; INSIST(DNS_ADB_VALID(adb)); LOCK(&adb->lock); /* * Call our cleanup routines. */ for (i = 0; i < adb->nnames; i++) { RUNTIME_CHECK(!cleanup_names(adb, i, INT_MAX)); } for (i = 0; i < adb->nentries; i++) { RUNTIME_CHECK(!cleanup_entries(adb, i, INT_MAX)); } #ifdef DUMP_ADB_AFTER_CLEANING dump_adb(adb, stdout, true, INT_MAX); #endif /* ifdef DUMP_ADB_AFTER_CLEANING */ UNLOCK(&adb->lock); } void dns_adb_flushname(dns_adb_t *adb, const dns_name_t *name) { dns_adbname_t *adbname; dns_adbname_t *nextname; unsigned int bucket; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(name != NULL); LOCK(&adb->lock); bucket = dns_name_hash(name, false) % adb->nnames; LOCK(&adb->namelocks[bucket]); adbname = ISC_LIST_HEAD(adb->names[bucket]); while (adbname != NULL) { nextname = ISC_LIST_NEXT(adbname, plink); if (!NAME_DEAD(adbname) && dns_name_equal(name, &adbname->name)) { RUNTIME_CHECK( !kill_name(&adbname, DNS_EVENT_ADBCANCELED)); } adbname = nextname; } UNLOCK(&adb->namelocks[bucket]); UNLOCK(&adb->lock); } void dns_adb_flushnames(dns_adb_t *adb, const dns_name_t *name) { dns_adbname_t *adbname, *nextname; unsigned int i; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(name != NULL); LOCK(&adb->lock); for (i = 0; i < adb->nnames; i++) { LOCK(&adb->namelocks[i]); adbname = ISC_LIST_HEAD(adb->names[i]); while (adbname != NULL) { bool ret; nextname = ISC_LIST_NEXT(adbname, plink); if (!NAME_DEAD(adbname) && dns_name_issubdomain(&adbname->name, name)) { ret = kill_name(&adbname, DNS_EVENT_ADBCANCELED); RUNTIME_CHECK(!ret); } adbname = nextname; } UNLOCK(&adb->namelocks[i]); } UNLOCK(&adb->lock); } static void water(void *arg, int mark) { /* * We're going to change the way to handle overmem condition: use * isc_mem_isovermem() instead of storing the state via this callback, * since the latter way tends to cause race conditions. * To minimize the change, and in case we re-enable the callback * approach, however, keep this function at the moment. */ dns_adb_t *adb = arg; bool overmem = (mark == ISC_MEM_HIWATER); REQUIRE(DNS_ADB_VALID(adb)); DP(ISC_LOG_DEBUG(1), "adb reached %s water mark", overmem ? "high" : "low"); } void dns_adb_setadbsize(dns_adb_t *adb, size_t size) { size_t hiwater, lowater; INSIST(DNS_ADB_VALID(adb)); if (size != 0U && size < DNS_ADB_MINADBSIZE) { size = DNS_ADB_MINADBSIZE; } hiwater = size - (size >> 3); /* Approximately 7/8ths. */ lowater = size - (size >> 2); /* Approximately 3/4ths. */ if (size == 0U || hiwater == 0U || lowater == 0U) { isc_mem_clearwater(adb->mctx); } else { isc_mem_setwater(adb->mctx, water, adb, hiwater, lowater); } } void dns_adb_setquota(dns_adb_t *adb, uint32_t quota, uint32_t freq, double low, double high, double discount) { REQUIRE(DNS_ADB_VALID(adb)); adb->quota = quota; adb->atr_freq = freq; adb->atr_low = low; adb->atr_high = high; adb->atr_discount = discount; } bool dns_adbentry_overquota(dns_adbentry_t *entry) { REQUIRE(DNS_ADBENTRY_VALID(entry)); uint_fast32_t quota = atomic_load_relaxed(&entry->quota); uint_fast32_t active = atomic_load_acquire(&entry->active); return (quota != 0 && active >= quota); } void dns_adb_beginudpfetch(dns_adb_t *adb, dns_adbaddrinfo_t *addr) { uint_fast32_t active; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); active = atomic_fetch_add_relaxed(&addr->entry->active, 1); INSIST(active != UINT32_MAX); } void dns_adb_endudpfetch(dns_adb_t *adb, dns_adbaddrinfo_t *addr) { uint_fast32_t active; REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); active = atomic_fetch_sub_release(&addr->entry->active, 1); INSIST(active != 0); }