/* $NetBSD: rrl.c,v 1.1.2.2 2024/02/24 13:07:01 martin Exp $ */ /* * Copyright (C) Internet Systems Consortium, Inc. ("ISC") * * SPDX-License-Identifier: MPL-2.0 * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, you can obtain one at https://mozilla.org/MPL/2.0/. * * See the COPYRIGHT file distributed with this work for additional * information regarding copyright ownership. */ /*! \file */ /* * Rate limit DNS responses. */ /* #define ISC_LIST_CHECKINIT */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, bool early, char *log_buf, unsigned int log_buf_len); /* * Get a modulus for a hash function that is tolerably likely to be * relatively prime to most inputs. Of course, we get a prime for for initial * values not larger than the square of the last prime. We often get a prime * after that. * This works well in practice for hash tables up to at least 100 * times the square of the last prime and better than a multiplicative hash. */ static int hash_divisor(unsigned int initial) { static uint16_t primes[] = { 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, #if 0 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997, 1009, #endif /* if 0 */ }; int divisions, tries; unsigned int result; uint16_t *pp, p; result = initial; if (primes[sizeof(primes) / sizeof(primes[0]) - 1] >= result) { pp = primes; while (*pp < result) { ++pp; } return (*pp); } if ((result & 1) == 0) { ++result; } divisions = 0; tries = 1; pp = primes; do { p = *pp++; ++divisions; if ((result % p) == 0) { ++tries; result += 2; pp = primes; } } while (pp < &primes[sizeof(primes) / sizeof(primes[0])]); if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3)) { isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3, "%d hash_divisor() divisions in %d tries" " to get %d from %d", divisions, tries, result, initial); } return (result); } /* * Convert a timestamp to a number of seconds in the past. */ static int delta_rrl_time(isc_stdtime_t ts, isc_stdtime_t now) { int delta; delta = now - ts; if (delta >= 0) { return (delta); } /* * The timestamp is in the future. That future might result from * re-ordered requests, because we use timestamps on requests * instead of consulting a clock. Timestamps in the distant future are * assumed to result from clock changes. When the clock changes to * the past, make existing timestamps appear to be in the past. */ if (delta < -DNS_RRL_MAX_TIME_TRAVEL) { return (DNS_RRL_FOREVER); } return (0); } static int get_age(const dns_rrl_t *rrl, const dns_rrl_entry_t *e, isc_stdtime_t now) { if (!e->ts_valid) { return (DNS_RRL_FOREVER); } return (delta_rrl_time(e->ts + rrl->ts_bases[e->ts_gen], now)); } static void set_age(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_stdtime_t now) { dns_rrl_entry_t *e_old; unsigned int ts_gen; int i, ts; ts_gen = rrl->ts_gen; ts = now - rrl->ts_bases[ts_gen]; if (ts < 0) { if (ts < -DNS_RRL_MAX_TIME_TRAVEL) { ts = DNS_RRL_FOREVER; } else { ts = 0; } } /* * Make a new timestamp base if the current base is too old. * All entries older than DNS_RRL_MAX_WINDOW seconds are ancient, * useless history. Their timestamps can be treated as if they are * all the same. * We only do arithmetic on more recent timestamps, so bases for * older timestamps can be recycled provided the old timestamps are * marked as ancient history. * This loop is almost always very short because most entries are * recycled after one second and any entries that need to be marked * are older than (DNS_RRL_TS_BASES)*DNS_RRL_MAX_TS seconds. */ if (ts >= DNS_RRL_MAX_TS) { ts_gen = (ts_gen + 1) % DNS_RRL_TS_BASES; for (e_old = ISC_LIST_TAIL(rrl->lru), i = 0; e_old != NULL && (e_old->ts_gen == ts_gen || !ISC_LINK_LINKED(e_old, hlink)); e_old = ISC_LIST_PREV(e_old, lru), ++i) { e_old->ts_valid = false; } if (i != 0) { isc_log_write( dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1, "rrl new time base scanned %d entries" " at %d for %d %d %d %d", i, now, rrl->ts_bases[ts_gen], rrl->ts_bases[(ts_gen + 1) % DNS_RRL_TS_BASES], rrl->ts_bases[(ts_gen + 2) % DNS_RRL_TS_BASES], rrl->ts_bases[(ts_gen + 3) % DNS_RRL_TS_BASES]); } rrl->ts_gen = ts_gen; rrl->ts_bases[ts_gen] = now; ts = 0; } e->ts_gen = ts_gen; e->ts = ts; e->ts_valid = true; } static isc_result_t expand_entries(dns_rrl_t *rrl, int newsize) { unsigned int bsize; dns_rrl_block_t *b; dns_rrl_entry_t *e; double rate; int i; if (rrl->num_entries + newsize >= rrl->max_entries && rrl->max_entries != 0) { newsize = rrl->max_entries - rrl->num_entries; if (newsize <= 0) { return (ISC_R_SUCCESS); } } /* * Log expansions so that the user can tune max-table-size * and min-table-size. */ if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) && rrl->hash != NULL) { rate = rrl->probes; if (rrl->searches != 0) { rate /= rrl->searches; } isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP, "increase from %d to %d RRL entries with" " %d bins; average search length %.1f", rrl->num_entries, rrl->num_entries + newsize, rrl->hash->length, rate); } bsize = sizeof(dns_rrl_block_t) + (newsize - 1) * sizeof(dns_rrl_entry_t); b = isc_mem_get(rrl->mctx, bsize); memset(b, 0, bsize); b->size = bsize; e = b->entries; for (i = 0; i < newsize; ++i, ++e) { ISC_LINK_INIT(e, hlink); ISC_LIST_INITANDAPPEND(rrl->lru, e, lru); } rrl->num_entries += newsize; ISC_LIST_INITANDAPPEND(rrl->blocks, b, link); return (ISC_R_SUCCESS); } static dns_rrl_bin_t * get_bin(dns_rrl_hash_t *hash, unsigned int hval) { INSIST(hash != NULL); return (&hash->bins[hval % hash->length]); } static void free_old_hash(dns_rrl_t *rrl) { dns_rrl_hash_t *old_hash; dns_rrl_bin_t *old_bin; dns_rrl_entry_t *e, *e_next; old_hash = rrl->old_hash; for (old_bin = &old_hash->bins[0]; old_bin < &old_hash->bins[old_hash->length]; ++old_bin) { for (e = ISC_LIST_HEAD(*old_bin); e != NULL; e = e_next) { e_next = ISC_LIST_NEXT(e, hlink); ISC_LINK_INIT(e, hlink); } } isc_mem_put(rrl->mctx, old_hash, sizeof(*old_hash) + (old_hash->length - 1) * sizeof(old_hash->bins[0])); rrl->old_hash = NULL; } static isc_result_t expand_rrl_hash(dns_rrl_t *rrl, isc_stdtime_t now) { dns_rrl_hash_t *hash; int old_bins, new_bins, hsize; double rate; if (rrl->old_hash != NULL) { free_old_hash(rrl); } /* * Most searches fail and so go to the end of the chain. * Use a small hash table load factor. */ old_bins = (rrl->hash == NULL) ? 0 : rrl->hash->length; new_bins = old_bins / 8 + old_bins; if (new_bins < rrl->num_entries) { new_bins = rrl->num_entries; } new_bins = hash_divisor(new_bins); hsize = sizeof(dns_rrl_hash_t) + (new_bins - 1) * sizeof(hash->bins[0]); hash = isc_mem_get(rrl->mctx, hsize); memset(hash, 0, hsize); hash->length = new_bins; rrl->hash_gen ^= 1; hash->gen = rrl->hash_gen; if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) && old_bins != 0) { rate = rrl->probes; if (rrl->searches != 0) { rate /= rrl->searches; } isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP, "increase from %d to %d RRL bins for" " %d entries; average search length %.1f", old_bins, new_bins, rrl->num_entries, rate); } rrl->old_hash = rrl->hash; if (rrl->old_hash != NULL) { rrl->old_hash->check_time = now; } rrl->hash = hash; return (ISC_R_SUCCESS); } static void ref_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, int probes, isc_stdtime_t now) { /* * Make the entry most recently used. */ if (ISC_LIST_HEAD(rrl->lru) != e) { if (e == rrl->last_logged) { rrl->last_logged = ISC_LIST_PREV(e, lru); } ISC_LIST_UNLINK(rrl->lru, e, lru); ISC_LIST_PREPEND(rrl->lru, e, lru); } /* * Expand the hash table if it is time and necessary. * This will leave the newly referenced entry in a chain in the * old hash table. It will migrate to the new hash table the next * time it is used or be cut loose when the old hash table is destroyed. */ rrl->probes += probes; ++rrl->searches; if (rrl->searches > 100 && delta_rrl_time(rrl->hash->check_time, now) > 1) { if (rrl->probes / rrl->searches > 2) { expand_rrl_hash(rrl, now); } rrl->hash->check_time = now; rrl->probes = 0; rrl->searches = 0; } } static bool key_cmp(const dns_rrl_key_t *a, const dns_rrl_key_t *b) { if (memcmp(a, b, sizeof(dns_rrl_key_t)) == 0) { return (true); } return (false); } static uint32_t hash_key(const dns_rrl_key_t *key) { uint32_t hval; int i; hval = key->w[0]; for (i = sizeof(key->w) / sizeof(key->w[0]) - 1; i >= 0; --i) { hval = key->w[i] + (hval << 1); } return (hval); } /* * Construct the hash table key. * Use a hash of the DNS query name to save space in the database. * Collisions result in legitimate rate limiting responses for one * query name also limiting responses for other names to the * same client. This is rare and benign enough given the large * space costs compared to keeping the entire name in the database * entry or the time costs of dynamic allocation. */ static void make_key(const dns_rrl_t *rrl, dns_rrl_key_t *key, const isc_sockaddr_t *client_addr, dns_zone_t *zone, dns_rdatatype_t qtype, const dns_name_t *qname, dns_rdataclass_t qclass, dns_rrl_rtype_t rtype) { int i; memset(key, 0, sizeof(*key)); key->s.rtype = rtype; if (rtype == DNS_RRL_RTYPE_QUERY) { key->s.qtype = qtype; key->s.qclass = qclass & 0xff; } else if (rtype == DNS_RRL_RTYPE_REFERRAL || rtype == DNS_RRL_RTYPE_NODATA) { /* * Because there is no qtype in the empty answer sections of * referral and NODATA responses, count them as the same. */ key->s.qclass = qclass & 0xff; } if (qname != NULL && qname->labels != 0) { dns_name_t *origin = NULL; if ((qname->attributes & DNS_NAMEATTR_WILDCARD) != 0 && zone != NULL && (origin = dns_zone_getorigin(zone)) != NULL) { dns_fixedname_t fixed; dns_name_t *wild; isc_result_t result; /* * Put all wildcard names in one bucket using the zone's * origin name concatenated to the "*" name. */ wild = dns_fixedname_initname(&fixed); result = dns_name_concatenate(dns_wildcardname, origin, wild, NULL); if (result != ISC_R_SUCCESS) { /* * Fallback to use the zone's origin name * instead of the concatenated name. */ wild = origin; } key->s.qname_hash = dns_name_fullhash(wild, false); } else { key->s.qname_hash = dns_name_fullhash(qname, false); } } switch (client_addr->type.sa.sa_family) { case AF_INET: key->s.ip[0] = (client_addr->type.sin.sin_addr.s_addr & rrl->ipv4_mask); break; case AF_INET6: key->s.ipv6 = true; memmove(key->s.ip, &client_addr->type.sin6.sin6_addr, sizeof(key->s.ip)); for (i = 0; i < DNS_RRL_MAX_PREFIX / 32; ++i) { key->s.ip[i] &= rrl->ipv6_mask[i]; } break; } } static dns_rrl_rate_t * get_rate(dns_rrl_t *rrl, dns_rrl_rtype_t rtype) { switch (rtype) { case DNS_RRL_RTYPE_QUERY: return (&rrl->responses_per_second); case DNS_RRL_RTYPE_REFERRAL: return (&rrl->referrals_per_second); case DNS_RRL_RTYPE_NODATA: return (&rrl->nodata_per_second); case DNS_RRL_RTYPE_NXDOMAIN: return (&rrl->nxdomains_per_second); case DNS_RRL_RTYPE_ERROR: return (&rrl->errors_per_second); case DNS_RRL_RTYPE_ALL: return (&rrl->all_per_second); default: UNREACHABLE(); } } static int response_balance(dns_rrl_t *rrl, const dns_rrl_entry_t *e, int age) { dns_rrl_rate_t *ratep; int balance, rate; if (e->key.s.rtype == DNS_RRL_RTYPE_TCP) { rate = 1; } else { ratep = get_rate(rrl, e->key.s.rtype); rate = ratep->scaled; } balance = e->responses + age * rate; if (balance > rate) { balance = rate; } return (balance); } /* * Search for an entry for a response and optionally create it. */ static dns_rrl_entry_t * get_entry(dns_rrl_t *rrl, const isc_sockaddr_t *client_addr, dns_zone_t *zone, dns_rdataclass_t qclass, dns_rdatatype_t qtype, const dns_name_t *qname, dns_rrl_rtype_t rtype, isc_stdtime_t now, bool create, char *log_buf, unsigned int log_buf_len) { dns_rrl_key_t key; uint32_t hval; dns_rrl_entry_t *e; dns_rrl_hash_t *hash; dns_rrl_bin_t *new_bin, *old_bin; int probes, age; make_key(rrl, &key, client_addr, zone, qtype, qname, qclass, rtype); hval = hash_key(&key); /* * Look for the entry in the current hash table. */ new_bin = get_bin(rrl->hash, hval); probes = 1; e = ISC_LIST_HEAD(*new_bin); while (e != NULL) { if (key_cmp(&e->key, &key)) { ref_entry(rrl, e, probes, now); return (e); } ++probes; e = ISC_LIST_NEXT(e, hlink); } /* * Look in the old hash table. */ if (rrl->old_hash != NULL) { old_bin = get_bin(rrl->old_hash, hval); e = ISC_LIST_HEAD(*old_bin); while (e != NULL) { if (key_cmp(&e->key, &key)) { ISC_LIST_UNLINK(*old_bin, e, hlink); ISC_LIST_PREPEND(*new_bin, e, hlink); e->hash_gen = rrl->hash_gen; ref_entry(rrl, e, probes, now); return (e); } e = ISC_LIST_NEXT(e, hlink); } /* * Discard previous hash table when all of its entries are old. */ age = delta_rrl_time(rrl->old_hash->check_time, now); if (age > rrl->window) { free_old_hash(rrl); } } if (!create) { return (NULL); } /* * The entry does not exist, so create it by finding a free entry. * Keep currently penalized and logged entries. * Try to make more entries if none are idle. * Steal the oldest entry if we cannot create more. */ for (e = ISC_LIST_TAIL(rrl->lru); e != NULL; e = ISC_LIST_PREV(e, lru)) { if (!ISC_LINK_LINKED(e, hlink)) { break; } age = get_age(rrl, e, now); if (age <= 1) { e = NULL; break; } if (!e->logged && response_balance(rrl, e, age) > 0) { break; } } if (e == NULL) { expand_entries(rrl, ISC_MIN((rrl->num_entries + 1) / 2, 1000)); e = ISC_LIST_TAIL(rrl->lru); } if (e->logged) { log_end(rrl, e, true, log_buf, log_buf_len); } if (ISC_LINK_LINKED(e, hlink)) { if (e->hash_gen == rrl->hash_gen) { hash = rrl->hash; } else { hash = rrl->old_hash; } old_bin = get_bin(hash, hash_key(&e->key)); ISC_LIST_UNLINK(*old_bin, e, hlink); } ISC_LIST_PREPEND(*new_bin, e, hlink); e->hash_gen = rrl->hash_gen; e->key = key; e->ts_valid = false; ref_entry(rrl, e, probes, now); return (e); } static void debit_log(const dns_rrl_entry_t *e, int age, const char *action) { char buf[sizeof("age=2147483647")]; const char *age_str; if (age == DNS_RRL_FOREVER) { age_str = ""; } else { snprintf(buf, sizeof(buf), "age=%d", age); age_str = buf; } isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3, "rrl %08x %6s responses=%-3d %s", hash_key(&e->key), age_str, e->responses, action); } static dns_rrl_result_t debit_rrl_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, double qps, double scale, const isc_sockaddr_t *client_addr, isc_stdtime_t now, char *log_buf, unsigned int log_buf_len) { int rate, new_rate, slip, new_slip, age, log_secs, min; dns_rrl_rate_t *ratep; dns_rrl_entry_t const *credit_e; /* * Pick the rate counter. * Optionally adjust the rate by the estimated query/second rate. */ ratep = get_rate(rrl, e->key.s.rtype); rate = ratep->r; if (rate == 0) { return (DNS_RRL_RESULT_OK); } if (scale < 1.0) { /* * The limit for clients that have used TCP is not scaled. */ credit_e = get_entry( rrl, client_addr, NULL, 0, dns_rdatatype_none, NULL, DNS_RRL_RTYPE_TCP, now, false, log_buf, log_buf_len); if (credit_e != NULL) { age = get_age(rrl, e, now); if (age < rrl->window) { scale = 1.0; } } } if (scale < 1.0) { new_rate = (int)(rate * scale); if (new_rate < 1) { new_rate = 1; } if (ratep->scaled != new_rate) { isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1, "%d qps scaled %s by %.2f" " from %d to %d", (int)qps, ratep->str, scale, rate, new_rate); rate = new_rate; ratep->scaled = rate; } } min = -rrl->window * rate; /* * Treat time jumps into the recent past as no time. * Treat entries older than the window as if they were just created * Credit other entries. */ age = get_age(rrl, e, now); if (age > 0) { /* * Credit tokens earned during elapsed time. */ if (age > rrl->window) { e->responses = rate; e->slip_cnt = 0; } else { e->responses += rate * age; if (e->responses > rate) { e->responses = rate; e->slip_cnt = 0; } } /* * Find the seconds since last log message without overflowing * small counter. This counter is reset when an entry is * created. It is not necessarily reset when some requests * are answered provided other requests continue to be dropped * or slipped. This can happen when the request rate is just * at the limit. */ if (e->logged) { log_secs = e->log_secs; log_secs += age; if (log_secs > DNS_RRL_MAX_LOG_SECS || log_secs < 0) { log_secs = DNS_RRL_MAX_LOG_SECS; } e->log_secs = log_secs; } } set_age(rrl, e, now); /* * Debit the entry for this response. */ if (--e->responses >= 0) { if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3)) { debit_log(e, age, ""); } return (DNS_RRL_RESULT_OK); } if (e->responses < min) { e->responses = min; } /* * Drop this response unless it should slip or leak. */ slip = rrl->slip.r; if (slip > 2 && scale < 1.0) { new_slip = (int)(slip * scale); if (new_slip < 2) { new_slip = 2; } if (rrl->slip.scaled != new_slip) { isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1, "%d qps scaled slip" " by %.2f from %d to %d", (int)qps, scale, slip, new_slip); slip = new_slip; rrl->slip.scaled = slip; } } if (slip != 0 && e->key.s.rtype != DNS_RRL_RTYPE_ALL) { if (e->slip_cnt++ == 0) { if ((int)e->slip_cnt >= slip) { e->slip_cnt = 0; } if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3)) { debit_log(e, age, "slip"); } return (DNS_RRL_RESULT_SLIP); } else if ((int)e->slip_cnt >= slip) { e->slip_cnt = 0; } } if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3)) { debit_log(e, age, "drop"); } return (DNS_RRL_RESULT_DROP); } static dns_rrl_qname_buf_t * get_qname(dns_rrl_t *rrl, const dns_rrl_entry_t *e) { dns_rrl_qname_buf_t *qbuf; qbuf = rrl->qnames[e->log_qname]; if (qbuf == NULL || qbuf->e != e) { return (NULL); } return (qbuf); } static void free_qname(dns_rrl_t *rrl, dns_rrl_entry_t *e) { dns_rrl_qname_buf_t *qbuf; qbuf = get_qname(rrl, e); if (qbuf != NULL) { qbuf->e = NULL; ISC_LIST_APPEND(rrl->qname_free, qbuf, link); } } static void add_log_str(isc_buffer_t *lb, const char *str, unsigned int str_len) { isc_region_t region; isc_buffer_availableregion(lb, ®ion); if (str_len >= region.length) { if (region.length == 0U) { return; } str_len = region.length; } memmove(region.base, str, str_len); isc_buffer_add(lb, str_len); } #define ADD_LOG_CSTR(eb, s) add_log_str(eb, s, sizeof(s) - 1) /* * Build strings for the logs */ static void make_log_buf(dns_rrl_t *rrl, dns_rrl_entry_t *e, const char *str1, const char *str2, bool plural, const dns_name_t *qname, bool save_qname, dns_rrl_result_t rrl_result, isc_result_t resp_result, char *log_buf, unsigned int log_buf_len) { isc_buffer_t lb; dns_rrl_qname_buf_t *qbuf; isc_netaddr_t cidr; char strbuf[ISC_MAX(sizeof("/123"), sizeof(" (12345678)"))]; const char *rstr; isc_result_t msg_result; if (log_buf_len <= 1) { if (log_buf_len == 1) { log_buf[0] = '\0'; } return; } isc_buffer_init(&lb, log_buf, log_buf_len - 1); if (str1 != NULL) { add_log_str(&lb, str1, strlen(str1)); } if (str2 != NULL) { add_log_str(&lb, str2, strlen(str2)); } switch (rrl_result) { case DNS_RRL_RESULT_OK: break; case DNS_RRL_RESULT_DROP: ADD_LOG_CSTR(&lb, "drop "); break; case DNS_RRL_RESULT_SLIP: ADD_LOG_CSTR(&lb, "slip "); break; default: UNREACHABLE(); } switch (e->key.s.rtype) { case DNS_RRL_RTYPE_QUERY: break; case DNS_RRL_RTYPE_REFERRAL: ADD_LOG_CSTR(&lb, "referral "); break; case DNS_RRL_RTYPE_NODATA: ADD_LOG_CSTR(&lb, "NODATA "); break; case DNS_RRL_RTYPE_NXDOMAIN: ADD_LOG_CSTR(&lb, "NXDOMAIN "); break; case DNS_RRL_RTYPE_ERROR: if (resp_result == ISC_R_SUCCESS) { ADD_LOG_CSTR(&lb, "error "); } else { rstr = isc_result_totext(resp_result); add_log_str(&lb, rstr, strlen(rstr)); ADD_LOG_CSTR(&lb, " error "); } break; case DNS_RRL_RTYPE_ALL: ADD_LOG_CSTR(&lb, "all "); break; default: UNREACHABLE(); } if (plural) { ADD_LOG_CSTR(&lb, "responses to "); } else { ADD_LOG_CSTR(&lb, "response to "); } memset(&cidr, 0, sizeof(cidr)); if (e->key.s.ipv6) { snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv6_prefixlen); cidr.family = AF_INET6; memset(&cidr.type.in6, 0, sizeof(cidr.type.in6)); memmove(&cidr.type.in6, e->key.s.ip, sizeof(e->key.s.ip)); } else { snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv4_prefixlen); cidr.family = AF_INET; cidr.type.in.s_addr = e->key.s.ip[0]; } msg_result = isc_netaddr_totext(&cidr, &lb); if (msg_result != ISC_R_SUCCESS) { ADD_LOG_CSTR(&lb, "?"); } add_log_str(&lb, strbuf, strlen(strbuf)); if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY || e->key.s.rtype == DNS_RRL_RTYPE_REFERRAL || e->key.s.rtype == DNS_RRL_RTYPE_NODATA || e->key.s.rtype == DNS_RRL_RTYPE_NXDOMAIN) { qbuf = get_qname(rrl, e); if (save_qname && qbuf == NULL && qname != NULL && dns_name_isabsolute(qname)) { /* * Capture the qname for the "stop limiting" message. */ qbuf = ISC_LIST_TAIL(rrl->qname_free); if (qbuf != NULL) { ISC_LIST_UNLINK(rrl->qname_free, qbuf, link); } else if (rrl->num_qnames < DNS_RRL_QNAMES) { qbuf = isc_mem_get(rrl->mctx, sizeof(*qbuf)); { memset(qbuf, 0, sizeof(*qbuf)); ISC_LINK_INIT(qbuf, link); qbuf->index = rrl->num_qnames; rrl->qnames[rrl->num_qnames++] = qbuf; } } if (qbuf != NULL) { e->log_qname = qbuf->index; qbuf->e = e; dns_fixedname_init(&qbuf->qname); dns_name_copynf(qname, dns_fixedname_name( &qbuf->qname)); } } if (qbuf != NULL) { qname = dns_fixedname_name(&qbuf->qname); } if (qname != NULL) { ADD_LOG_CSTR(&lb, " for "); (void)dns_name_totext(qname, true, &lb); } else { ADD_LOG_CSTR(&lb, " for (?)"); } if (e->key.s.rtype != DNS_RRL_RTYPE_NXDOMAIN) { ADD_LOG_CSTR(&lb, " "); (void)dns_rdataclass_totext(e->key.s.qclass, &lb); if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY) { ADD_LOG_CSTR(&lb, " "); (void)dns_rdatatype_totext(e->key.s.qtype, &lb); } } snprintf(strbuf, sizeof(strbuf), " (%08" PRIx32 ")", e->key.s.qname_hash); add_log_str(&lb, strbuf, strlen(strbuf)); } /* * We saved room for '\0'. */ log_buf[isc_buffer_usedlength(&lb)] = '\0'; } static void log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, bool early, char *log_buf, unsigned int log_buf_len) { if (e->logged) { make_log_buf(rrl, e, early ? "*" : NULL, rrl->log_only ? "would stop limiting " : "stop limiting ", true, NULL, false, DNS_RRL_RESULT_OK, ISC_R_SUCCESS, log_buf, log_buf_len); isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP, "%s", log_buf); free_qname(rrl, e); e->logged = false; --rrl->num_logged; } } /* * Log messages for streams that have stopped being rate limited. */ static void log_stops(dns_rrl_t *rrl, isc_stdtime_t now, int limit, char *log_buf, unsigned int log_buf_len) { dns_rrl_entry_t *e; int age; for (e = rrl->last_logged; e != NULL; e = ISC_LIST_PREV(e, lru)) { if (!e->logged) { continue; } if (now != 0) { age = get_age(rrl, e, now); if (age < DNS_RRL_STOP_LOG_SECS || response_balance(rrl, e, age) < 0) { break; } } log_end(rrl, e, now == 0, log_buf, log_buf_len); if (rrl->num_logged <= 0) { break; } /* * Too many messages could stall real work. */ if (--limit < 0) { rrl->last_logged = ISC_LIST_PREV(e, lru); return; } } if (e == NULL) { INSIST(rrl->num_logged == 0); rrl->log_stops_time = now; } rrl->last_logged = e; } /* * Main rate limit interface. */ dns_rrl_result_t dns_rrl(dns_view_t *view, dns_zone_t *zone, const isc_sockaddr_t *client_addr, bool is_tcp, dns_rdataclass_t qclass, dns_rdatatype_t qtype, const dns_name_t *qname, isc_result_t resp_result, isc_stdtime_t now, bool wouldlog, char *log_buf, unsigned int log_buf_len) { dns_rrl_t *rrl; dns_rrl_rtype_t rtype; dns_rrl_entry_t *e; isc_netaddr_t netclient; int secs; double qps, scale; int exempt_match; isc_result_t result; dns_rrl_result_t rrl_result; INSIST(log_buf != NULL && log_buf_len > 0); rrl = view->rrl; if (rrl->exempt != NULL) { isc_netaddr_fromsockaddr(&netclient, client_addr); result = dns_acl_match(&netclient, NULL, rrl->exempt, &view->aclenv, &exempt_match, NULL); if (result == ISC_R_SUCCESS && exempt_match > 0) { return (DNS_RRL_RESULT_OK); } } LOCK(&rrl->lock); /* * Estimate total query per second rate when scaling by qps. */ if (rrl->qps_scale == 0) { qps = 0.0; scale = 1.0; } else { ++rrl->qps_responses; secs = delta_rrl_time(rrl->qps_time, now); if (secs <= 0) { qps = rrl->qps; } else { qps = (1.0 * rrl->qps_responses) / secs; if (secs >= rrl->window) { if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3)) { isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3, "%d responses/%d seconds" " = %d qps", rrl->qps_responses, secs, (int)qps); } rrl->qps = qps; rrl->qps_responses = 0; rrl->qps_time = now; } else if (qps < rrl->qps) { qps = rrl->qps; } } scale = rrl->qps_scale / qps; } /* * Do maintenance once per second. */ if (rrl->num_logged > 0 && rrl->log_stops_time != now) { log_stops(rrl, now, 8, log_buf, log_buf_len); } /* * Notice TCP responses when scaling limits by qps. * Do not try to rate limit TCP responses. */ if (is_tcp) { if (scale < 1.0) { e = get_entry(rrl, client_addr, NULL, 0, dns_rdatatype_none, NULL, DNS_RRL_RTYPE_TCP, now, true, log_buf, log_buf_len); if (e != NULL) { e->responses = -(rrl->window + 1); set_age(rrl, e, now); } } UNLOCK(&rrl->lock); return (ISC_R_SUCCESS); } /* * Find the right kind of entry, creating it if necessary. * If that is impossible, then nothing more can be done */ switch (resp_result) { case ISC_R_SUCCESS: rtype = DNS_RRL_RTYPE_QUERY; break; case DNS_R_DELEGATION: rtype = DNS_RRL_RTYPE_REFERRAL; break; case DNS_R_NXRRSET: rtype = DNS_RRL_RTYPE_NODATA; break; case DNS_R_NXDOMAIN: rtype = DNS_RRL_RTYPE_NXDOMAIN; break; default: rtype = DNS_RRL_RTYPE_ERROR; break; } e = get_entry(rrl, client_addr, zone, qclass, qtype, qname, rtype, now, true, log_buf, log_buf_len); if (e == NULL) { UNLOCK(&rrl->lock); return (DNS_RRL_RESULT_OK); } if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) { /* * Do not worry about speed or releasing the lock. * This message appears before messages from debit_rrl_entry(). */ make_log_buf(rrl, e, "consider limiting ", NULL, false, qname, false, DNS_RRL_RESULT_OK, resp_result, log_buf, log_buf_len); isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1, "%s", log_buf); } rrl_result = debit_rrl_entry(rrl, e, qps, scale, client_addr, now, log_buf, log_buf_len); if (rrl->all_per_second.r != 0) { /* * We must debit the all-per-second token bucket if we have * an all-per-second limit for the IP address. * The all-per-second limit determines the log message * when both limits are hit. * The response limiting must continue if the * all-per-second limiting lapses. */ dns_rrl_entry_t *e_all; dns_rrl_result_t rrl_all_result; e_all = get_entry(rrl, client_addr, zone, 0, dns_rdatatype_none, NULL, DNS_RRL_RTYPE_ALL, now, true, log_buf, log_buf_len); if (e_all == NULL) { UNLOCK(&rrl->lock); return (DNS_RRL_RESULT_OK); } rrl_all_result = debit_rrl_entry(rrl, e_all, qps, scale, client_addr, now, log_buf, log_buf_len); if (rrl_all_result != DNS_RRL_RESULT_OK) { e = e_all; rrl_result = rrl_all_result; if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) { make_log_buf(rrl, e, "prefer all-per-second limiting ", NULL, true, qname, false, DNS_RRL_RESULT_OK, resp_result, log_buf, log_buf_len); isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1, "%s", log_buf); } } } if (rrl_result == DNS_RRL_RESULT_OK) { UNLOCK(&rrl->lock); return (DNS_RRL_RESULT_OK); } /* * Log occasionally in the rate-limit category. */ if ((!e->logged || e->log_secs >= DNS_RRL_MAX_LOG_SECS) && isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP)) { make_log_buf(rrl, e, rrl->log_only ? "would " : NULL, e->logged ? "continue limiting " : "limit ", true, qname, true, DNS_RRL_RESULT_OK, resp_result, log_buf, log_buf_len); if (!e->logged) { e->logged = true; if (++rrl->num_logged <= 1) { rrl->last_logged = e; } } e->log_secs = 0; /* * Avoid holding the lock. */ if (!wouldlog) { UNLOCK(&rrl->lock); e = NULL; } isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP, "%s", log_buf); } /* * Make a log message for the caller. */ if (wouldlog) { make_log_buf(rrl, e, rrl->log_only ? "would rate limit " : "rate limit ", NULL, false, qname, false, rrl_result, resp_result, log_buf, log_buf_len); } if (e != NULL) { /* * Do not save the qname unless we might need it for * the ending log message. */ if (!e->logged) { free_qname(rrl, e); } UNLOCK(&rrl->lock); } return (rrl_result); } void dns_rrl_view_destroy(dns_view_t *view) { dns_rrl_t *rrl; dns_rrl_block_t *b; dns_rrl_hash_t *h; char log_buf[DNS_RRL_LOG_BUF_LEN]; int i; rrl = view->rrl; if (rrl == NULL) { return; } view->rrl = NULL; /* * Assume the caller takes care of locking the view and anything else. */ if (rrl->num_logged > 0) { log_stops(rrl, 0, INT32_MAX, log_buf, sizeof(log_buf)); } for (i = 0; i < DNS_RRL_QNAMES; ++i) { if (rrl->qnames[i] == NULL) { break; } isc_mem_put(rrl->mctx, rrl->qnames[i], sizeof(*rrl->qnames[i])); } if (rrl->exempt != NULL) { dns_acl_detach(&rrl->exempt); } isc_mutex_destroy(&rrl->lock); while (!ISC_LIST_EMPTY(rrl->blocks)) { b = ISC_LIST_HEAD(rrl->blocks); ISC_LIST_UNLINK(rrl->blocks, b, link); isc_mem_put(rrl->mctx, b, b->size); } h = rrl->hash; if (h != NULL) { isc_mem_put(rrl->mctx, h, sizeof(*h) + (h->length - 1) * sizeof(h->bins[0])); } h = rrl->old_hash; if (h != NULL) { isc_mem_put(rrl->mctx, h, sizeof(*h) + (h->length - 1) * sizeof(h->bins[0])); } isc_mem_putanddetach(&rrl->mctx, rrl, sizeof(*rrl)); } isc_result_t dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries) { dns_rrl_t *rrl; isc_result_t result; *rrlp = NULL; rrl = isc_mem_get(view->mctx, sizeof(*rrl)); memset(rrl, 0, sizeof(*rrl)); isc_mem_attach(view->mctx, &rrl->mctx); isc_mutex_init(&rrl->lock); isc_stdtime_get(&rrl->ts_bases[0]); view->rrl = rrl; result = expand_entries(rrl, min_entries); if (result != ISC_R_SUCCESS) { dns_rrl_view_destroy(view); return (result); } result = expand_rrl_hash(rrl, 0); if (result != ISC_R_SUCCESS) { dns_rrl_view_destroy(view); return (result); } *rrlp = rrl; return (ISC_R_SUCCESS); }