/* $NetBSD: apple_dart.c,v 1.5 2023/02/24 11:19:15 jmcneill Exp $ */
/*	$OpenBSD: apldart.c,v 1.10 2022/02/27 17:36:52 kettenis Exp $	*/

/*-
 * Copyright (c) 2021 Mark Kettenis <kettenis@openbsd.org>
 * Copyright (c) 2021 Jared McNeill <jmcneill@invisible.ca>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

//#define APPLE_DART_DEBUG

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: apple_dart.c,v 1.5 2023/02/24 11:19:15 jmcneill Exp $");

#include <sys/param.h>
#include <sys/bus.h>
#include <sys/device.h>
#include <sys/intr.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/vmem.h>

#include <arm/cpufunc.h>

#include <dev/fdt/fdtvar.h>
/*
 * This driver largely ignores stream IDs and simply uses a single
 * translation table for all the devices that it serves.  This is good
 * enough for the PCIe host bridge that serves the on-board devices on
 * the current generation Apple Silicon Macs as these only have a
 * single PCIe device behind each DART.
 */

/*
 * DART registers
 */
#define	DART_PARAMS2		0x0004
#define	 DART_PARAMS2_BYPASS_SUPPORT	__BIT(0)
#define	DART_TLB_OP		0x0020
#define	 DART_TLB_OP_BUSY		__BIT(2)
#define	 DART_TLB_OP_FLUSH		__BIT(20)
#define	DART_TLB_OP_SIDMASK	0x0034
#define	DART_ERR_STATUS		0x0040
#define	 DART_ERR_FLAG		__BIT(31)
#define	 DART_ERR_STREAM_MASK	__BITS(27, 24)
#define	 DART_ERR_CODE_MASK	__BITS(11, 0)
#define	 DART_ERR_READ_FAULT	__BIT(4)
#define	 DART_ERR_WRITE_FAULT	__BIT(3)
#define	 DART_ERR_NOPTE		__BIT(2)
#define	 DART_ERR_NOPMD		__BIT(1)
#define	 DART_ERR_NOTTBR	__BIT(0)
#define	DART_ERR_ADDRL		0x0050
#define	DART_ERR_ADDRH		0x0054
#define	DART_CONFIG		0x0060
#define	 DART_CONFIG_LOCK		__BIT(15)
#define	DART_TCR(sid)		(0x0100 + (sid) * 0x4)
#define	 DART_TCR_TRANSLATE_ENABLE	__BIT(7)
#define	 DART_TCR_BYPASS_DART		__BIT(8)
#define	 DART_TCR_BYPASS_DAPF		__BIT(12)
#define	DART_TTBR(sid, idx)	(0x0200 + (sid) * 0x10 + (idx) * 0x4)
#define	 DART_TTBR_VALID		__BIT(31)
#define	 DART_TTBR_SHIFT		12

#define	DART_NUM_STREAMS	16
#define	DART_ALL_STREAMS	((1 << DART_NUM_STREAMS) - 1)

#define	DART_APERTURE_START	0x00100000
#define	DART_APERTURE_SIZE	0x3fe00000
#define	DART_PAGE_SIZE		16384
#define	DART_PAGE_MASK		(DART_PAGE_SIZE - 1)

/*
 * Some hardware (e.g. bge(4)) will always use (aligned) 64-bit memory
 * access.  To make sure this doesn't fault, round the subpage limits
 * down and up accordingly.
 */
#define	DART_OFFSET_MASK	7

#define	DART_L1_TABLE		0x3
#define	DART_L2_INVAL		0x0
#define	DART_L2_VALID		__BIT(0)
#define	DART_L2_FULL_PAGE	__BIT(1)

#define	DART_L2_START_MASK	__BITS(63, 52)
#define	DART_L2_END_MASK	__BITS(51, 40)
#define	DART_L2_SUBPAGE(addr)	__SHIFTOUT((addr), __BITS(13, 2))
#define	DART_L2_START(addr)	__SHIFTIN(DART_L2_SUBPAGE(addr), DART_L2_START_MASK)
#define	DART_L2_END(addr)	__SHIFTIN(DART_L2_SUBPAGE(addr), DART_L2_END_MASK)

#define	DART_ROUND_PAGE(pa)	(((pa) + DART_PAGE_MASK) & ~DART_PAGE_MASK)
#define	DART_TRUNC_PAGE(pa)	((pa) & ~DART_PAGE_MASK)
#define	DART_ROUND_OFFSET(pa)	(((pa) + DART_OFFSET_MASK) & ~DART_OFFSET_MASK)
#define	DART_TRUNC_OFFSET(pa)	((pa) & ~DART_OFFSET_MASK)

static const struct device_compatible_entry compat_data[] = {
	{ .compat = "apple,dart-m1",		.value = 16 },
	{ .compat = "apple,t8103-dart",		.value = 16 },
	DEVICE_COMPAT_EOL
};

static struct arm32_dma_range apple_dart_dma_ranges[] = {
	[0] = {
		.dr_sysbase = 0,
		.dr_busbase = 0,
		.dr_len = UINTPTR_MAX,
		.dr_flags = _BUS_DMAMAP_COHERENT,
	}
};

struct apple_dart_map_state {
	bus_addr_t ams_dva;
	bus_size_t ams_len;
};

struct apple_dart_dma {
	bus_dmamap_t dma_map;
	bus_dma_segment_t dma_seg;
	bus_size_t dma_size;
	void *dma_kva;
};

#define	DART_DMA_MAP(_dma)	((_dma)->dma_map)
#define	DART_DMA_LEN(_dma)	((_dma)->dma_size)
#define	DART_DMA_DVA(_dma)	((_dma)->dma_map->dm_segs[0].ds_addr)
#define	DART_DMA_KVA(_dma)	((_dma)->dma_kva)

struct apple_dart_softc {
	device_t sc_dev;
	int sc_phandle;
	bus_space_tag_t sc_bst;
	bus_space_handle_t sc_bsh;
	bus_dma_tag_t sc_dmat;

	uint64_t sc_sid_mask;
	u_int sc_nsid;

	vmem_t *sc_dvamap;

	struct apple_dart_dma *sc_l1;
	struct apple_dart_dma **sc_l2;
	u_int sc_nl2;

	struct arm32_bus_dma_tag sc_bus_dmat;
};

#define DART_READ(sc, reg) \
	bus_space_read_4((sc)->sc_bst, (sc)->sc_bsh, (reg))
#define	DART_WRITE(sc, reg, val) \
	bus_space_write_4((sc)->sc_bst, (sc)->sc_bsh, (reg), (val))

static void
apple_dart_flush_tlb(struct apple_dart_softc *sc)
{
	dsb(sy);
	isb();

	DART_WRITE(sc, DART_TLB_OP_SIDMASK, sc->sc_sid_mask);
	DART_WRITE(sc, DART_TLB_OP, DART_TLB_OP_FLUSH);
	while ((DART_READ(sc, DART_TLB_OP) & DART_TLB_OP_BUSY) != 0) {
		__asm volatile ("yield" ::: "memory");
	}
}

static struct apple_dart_dma *
apple_dart_dma_alloc(bus_dma_tag_t dmat, bus_size_t size, bus_size_t align)
{
	struct apple_dart_dma *dma;
	int nsegs, error;

	dma = kmem_zalloc(sizeof(*dma), KM_SLEEP);
	dma->dma_size = size;

	error = bus_dmamem_alloc(dmat, size, align, 0, &dma->dma_seg, 1,
	    &nsegs, BUS_DMA_WAITOK);
	if (error != 0) {
		goto destroy;
	}

	error = bus_dmamem_map(dmat, &dma->dma_seg, nsegs, size,
	    &dma->dma_kva, BUS_DMA_WAITOK | BUS_DMA_NOCACHE);
	if (error != 0) {
		goto free;
	}

	error = bus_dmamap_create(dmat, size, 1, size, 0,
	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
	if (error != 0) {
		goto dmafree;
	}

	error = bus_dmamap_load(dmat, dma->dma_map, dma->dma_kva, size,
	    NULL, BUS_DMA_WAITOK);
	if (error != 0) {
		goto unmap;
	}

	memset(dma->dma_kva, 0, size);

	return dma;

destroy:
	bus_dmamap_destroy(dmat, dma->dma_map);
unmap:
	bus_dmamem_unmap(dmat, dma->dma_kva, size);
free:
	bus_dmamem_free(dmat, &dma->dma_seg, 1);
dmafree:
	kmem_free(dma, sizeof(*dma));
	return NULL;
}

static int
apple_dart_intr(void *priv)
{
	struct apple_dart_softc * const sc = priv;
	char fdt_path[128];
	uint64_t addr;
	uint32_t status;

	status = DART_READ(sc, DART_ERR_STATUS);
	addr  = __SHIFTIN(DART_READ(sc, DART_ERR_ADDRL), __BITS(31, 0));
	addr |= __SHIFTIN(DART_READ(sc, DART_ERR_ADDRH), __BITS(63, 32));
	DART_WRITE(sc, DART_ERR_STATUS, status);

	if ((status & DART_ERR_FLAG) == 0)
		return 1;

#ifdef APPLE_DART_DEBUG
	printf("%s: status %#"PRIx32"\n", __func__, status);
	printf("%s: addrl  %#"PRIx32"\n", __func__, DART_READ(sc, DART_ERR_ADDRL));
	printf("%s: addrh  %#"PRIx32"\n", __func__, DART_READ(sc, DART_ERR_ADDRH));
#endif

	const char *reason = NULL;
	int32_t code = __SHIFTOUT(status, DART_ERR_CODE_MASK);
	switch (code) {
	case DART_ERR_NOTTBR:
	    reason = "no ttbr for address";
	    break;
	case DART_ERR_NOPMD:
	    reason = "no pmd for address";
	    break;
	case DART_ERR_NOPTE:
	    reason = "no pte for address";
	    break;
	case DART_ERR_WRITE_FAULT:
	    reason = "write fault";
	    break;
	case DART_ERR_READ_FAULT:
	    reason = "read fault";
	    break;
	}
	fdtbus_get_path(sc->sc_phandle, fdt_path, sizeof(fdt_path));

	printf("%s (%s): error addr 0x%016lx status 0x%08x: %s\n",
	    device_xname(sc->sc_dev), fdt_path, addr, status, reason);

	return 1;
}

static volatile uint64_t *
apple_dart_lookup_tte(struct apple_dart_softc *sc, bus_addr_t dva)
{
	int idx = dva / DART_PAGE_SIZE;
	int l2_idx = idx / (DART_PAGE_SIZE / sizeof(uint64_t));
	int tte_idx = idx % (DART_PAGE_SIZE / sizeof(uint64_t));
	volatile uint64_t *l2 = DART_DMA_KVA(sc->sc_l2[l2_idx]);

	return &l2[tte_idx];
}

static void
apple_dart_unload_map(struct apple_dart_softc *sc, bus_dmamap_t map)
{
	struct apple_dart_map_state *ams = map->_dm_iommu;
	volatile uint64_t *tte;
	int seg;

	/* For each segment */
	for (seg = 0; seg < map->dm_nsegs; seg++) {
		u_long len, dva;

		if (ams[seg].ams_len == 0) {
			continue;
		}

		dva = ams[seg].ams_dva;
		len = ams[seg].ams_len;

		while (len > 0) {
			tte = apple_dart_lookup_tte(sc, dva);
			*tte = DART_L2_INVAL;

			dva += DART_PAGE_SIZE;
			len -= DART_PAGE_SIZE;
		}

		vmem_xfree(sc->sc_dvamap, ams[seg].ams_dva, ams[seg].ams_len);

		ams[seg].ams_dva = 0;
		ams[seg].ams_len = 0;
	}

	apple_dart_flush_tlb(sc);
}

static int
apple_dart_load_map(struct apple_dart_softc *sc, bus_dmamap_t map)
{
	struct apple_dart_map_state *ams = map->_dm_iommu;
	volatile uint64_t *tte;
	int seg, error;

	/* For each segment */
	for (seg = 0; seg < map->dm_nsegs; seg++) {
		paddr_t pa = map->dm_segs[seg]._ds_paddr;
		psize_t off = pa - DART_TRUNC_PAGE(pa);
		u_long len, dva;

		len = DART_ROUND_PAGE(map->dm_segs[seg].ds_len + off);

#ifdef APPLE_DART_DEBUG
		device_printf(sc->sc_dev, "load pa=%#lx off=%lu len=%lu ",
		    pa, off, len);
#endif

		error = vmem_xalloc(sc->sc_dvamap, len, DART_PAGE_SIZE, 0,
		    0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, VM_BESTFIT|VM_NOSLEEP,
		    &dva);
		if (error != 0) {
			apple_dart_unload_map(sc, map);
#ifdef APPLE_DART_DEBUG
			printf("error=%d\n", error);
#endif
			return error;
		}

#ifdef APPLE_DART_DEBUG
		printf("dva=%#lx\n", dva);
#endif

		ams[seg].ams_dva = dva;
		ams[seg].ams_len = len;

		map->dm_segs[seg].ds_addr = dva + off;

		pa = DART_TRUNC_PAGE(pa);
		paddr_t start = DART_TRUNC_OFFSET(off);
		paddr_t end = DART_PAGE_MASK;
		while (len > 0) {
			tte = apple_dart_lookup_tte(sc, dva);
			if (len < DART_PAGE_SIZE)
				end = DART_ROUND_OFFSET(len) - 1;

			*tte = pa | DART_L2_VALID |
			    DART_L2_START(start) | DART_L2_END(end);
#ifdef APPLE_DART_DEBUG
			printf("tte %p = %"PRIx64"\n", tte, *tte);
#endif
			pa += DART_PAGE_SIZE;
			dva += DART_PAGE_SIZE;
			len -= DART_PAGE_SIZE;
			start = 0;
		}
	}

	apple_dart_flush_tlb(sc);

	return 0;
}

static int
apple_dart_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
    bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamap)
{
	struct apple_dart_softc *sc = t->_cookie;
	struct apple_dart_map_state *ams;
	bus_dmamap_t map;
	int error;

	error = sc->sc_dmat->_dmamap_create(sc->sc_dmat, size, nsegments,
	    maxsegsz, boundary, flags, &map);
	if (error != 0) {
		return error;
	}

	ams = kmem_zalloc(map->_dm_segcnt * sizeof(*ams),
	    (flags & BUS_DMA_NOWAIT) != 0 ? KM_NOSLEEP : KM_SLEEP);
	if (ams == NULL) {
		sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
		return ENOMEM;
	}

	map->_dm_iommu = ams;
	*dmamap = map;
	return 0;
}

static void
apple_dart_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
{
	struct apple_dart_softc *sc = t->_cookie;
	struct apple_dart_map_state *ams = map->_dm_iommu;

	kmem_free(ams, map->_dm_segcnt * sizeof(*ams));
	sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
}

static int
apple_dart_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
    size_t buflen, struct proc *p, int flags)
{
	struct apple_dart_softc *sc = t->_cookie;
	int error;

	error = sc->sc_dmat->_dmamap_load(sc->sc_dmat, map,
	    buf, buflen, p, flags);
	if (error != 0) {
		return error;
	}

	error = apple_dart_load_map(sc, map);
	if (error != 0) {
		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
	}

	return error;
}

static int
apple_dart_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map,
    struct mbuf *m, int flags)
{
	struct apple_dart_softc *sc = t->_cookie;
	int error;

	error = sc->sc_dmat->_dmamap_load_mbuf(sc->sc_dmat, map,
	    m, flags);
	if (error != 0) {
		return error;
	}

	error = apple_dart_load_map(sc, map);
	if (error != 0) {
		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
	}

	return error;
}

static int
apple_dart_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map,
    struct uio *uio, int flags)
{
	struct apple_dart_softc *sc = t->_cookie;
	int error;

	error = sc->sc_dmat->_dmamap_load_uio(sc->sc_dmat, map,
	    uio, flags);
	if (error != 0) {
		return error;
	}

	error = apple_dart_load_map(sc, map);
	if (error != 0) {
		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
	}

	return error;
}

static int
apple_dart_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map,
    bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
{
	struct apple_dart_softc *sc = t->_cookie;
	int error;

	error = sc->sc_dmat->_dmamap_load_raw(sc->sc_dmat, map,
	    segs, nsegs, size, flags);
	if (error != 0) {
		return error;
	}

	error = apple_dart_load_map(sc, map);
	if (error != 0) {
		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
	}

	return error;
}

static void
apple_dart_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
{
	struct apple_dart_softc *sc = t->_cookie;

	apple_dart_unload_map(sc, map);
	sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
}

static bus_dma_tag_t
apple_dart_iommu_map(device_t dev, const u_int *data, bus_dma_tag_t dmat)
{
	struct apple_dart_softc * const sc = device_private(dev);

	return &sc->sc_bus_dmat;
}

const struct fdtbus_iommu_func apple_dart_iommu_funcs = {
	.map = apple_dart_iommu_map,
};

static int
apple_dart_match(device_t parent, cfdata_t cf, void *aux)
{
	struct fdt_attach_args * const faa = aux;

	return of_compatible_match(faa->faa_phandle, compat_data);
}

static void
apple_dart_attach(device_t parent, device_t self, void *aux)
{
	struct apple_dart_softc * const sc = device_private(self);
	struct fdt_attach_args * const faa = aux;
	const int phandle = faa->faa_phandle;
	char intrstr[128];
	volatile uint64_t *l1;
	bus_addr_t addr;
	bus_size_t size;
	u_int sid, idx;
	paddr_t pa;
	void *ih;

	if (fdtbus_get_reg(phandle, 0, &addr, &size) != 0) {
		aprint_error(": couldn't get registers\n");
		return;
	}
	if (!fdtbus_intr_str(phandle, 0, intrstr, sizeof(intrstr))) {
		aprint_error(": couldn't decode interrupt\n");
		return;
	}

	sc->sc_dev = self;
	sc->sc_phandle = phandle;
	sc->sc_dmat = faa->faa_dmat;
	sc->sc_bst = faa->faa_bst;
	if (bus_space_map(sc->sc_bst, addr, size, 0, &sc->sc_bsh) != 0) {
		aprint_error(": couldn't map registers\n");
		return;
	}

	/* Skip locked DARTs for now. */
	uint32_t config = DART_READ(sc, DART_CONFIG);
	if (config & DART_CONFIG_LOCK) {
		aprint_naive("\n");
		aprint_normal(": locked\n");
		return;
	}

	/*
	 * Use bypass mode if supported.  This avoids an issue with
	 * the USB3 controllers which need mappings entered into two
	 * IOMMUs, which is somewhat difficult to implement with our
	 * current kernel interfaces.
	 */
	uint32_t params2 = DART_READ(sc, DART_PARAMS2);
	if (params2 & DART_PARAMS2_BYPASS_SUPPORT) {
		for (sid = 0; sid < DART_NUM_STREAMS; sid++) {
			DART_WRITE(sc, DART_TCR(sid),
			    DART_TCR_BYPASS_DART | DART_TCR_BYPASS_DAPF);
		}
		aprint_naive("\n");
		aprint_normal(": bypass\n");
		return;
	}

	sc->sc_nsid = of_compatible_lookup(phandle, compat_data)->value;
	sc->sc_sid_mask = __MASK(sc->sc_nsid);

	aprint_naive("\n");
	aprint_normal(": Apple DART @ %#lx/%#lx, %u SIDs (mask 0x%lx)\n",
	    addr, size, sc->sc_nsid, sc->sc_sid_mask);

	KASSERT(sc->sc_nsid == 16);
	KASSERT(sc->sc_sid_mask == 0xffff);

	sc->sc_dvamap = vmem_create(device_xname(self),
	    DART_APERTURE_START, DART_APERTURE_SIZE, DART_PAGE_SIZE,
	    NULL, NULL, NULL, 0, VM_SLEEP, IPL_HIGH);
	if (sc->sc_dvamap == NULL) {
		aprint_error_dev(self, "couldn't allocate DVA map\n");
		return;
	}

	/* Disable translations */
	for (sid = 0; sid < sc->sc_nsid; sid++) {
		DART_WRITE(sc, DART_TCR(sid), 0);
	}

	/* Remove page tables */
	for (sid = 0; sid < sc->sc_nsid; sid++) {
		for (idx = 0; idx < 4; idx++) {
			DART_WRITE(sc, DART_TTBR(sid, idx), 0);
		}
	}
	apple_dart_flush_tlb(sc);

	/*
	 * Build translation tables. We pre-allocate the translation
	 * tables for the entire aperture such that we don't have to worry
	 * about growing them in an mpsafe manner later.
	 *
	 * Cover the entire address space [0, ..._START + ..._SIZE) even if vmem
	 * only allocates from [..._START, ..._START + ...+SIZE)
	 */

	const u_int ntte = howmany(DART_APERTURE_START + DART_APERTURE_SIZE - 1,
				   DART_PAGE_SIZE);
	const u_int nl2 = howmany(ntte, DART_PAGE_SIZE / sizeof(uint64_t));
	const u_int nl1 = howmany(nl2, DART_PAGE_SIZE / sizeof(uint64_t));

	sc->sc_l1 = apple_dart_dma_alloc(sc->sc_dmat,
	    nl1 * DART_PAGE_SIZE, DART_PAGE_SIZE);
	if (sc->sc_l1 == NULL) {
		aprint_error_dev(self, "couldn't allocate L1 tables\n");
		return;
	}
	sc->sc_l2 = kmem_zalloc(nl2 * sizeof(*sc->sc_l2), KM_SLEEP);
	sc->sc_nl2 = nl2;

	l1 = DART_DMA_KVA(sc->sc_l1);
	for (idx = 0; idx < nl2; idx++) {
		sc->sc_l2[idx] = apple_dart_dma_alloc(sc->sc_dmat,
		    DART_PAGE_SIZE, DART_PAGE_SIZE);
		if (sc->sc_l2[idx] == NULL) {
			aprint_error_dev(self,
			    "couldn't allocate L2 tables\n");
			return;
		}

		l1[idx] = DART_DMA_DVA(sc->sc_l2[idx]) | DART_L1_TABLE;
#ifdef APPLE_DART_DEBUG
		printf("l1[%d] (%p) = %"PRIx64"\n", idx, &l1[idx], l1[idx]);
#endif
	}

	/* Install page tables */
	for (sid = 0; sid < sc->sc_nsid; sid++) {
		pa = DART_DMA_DVA(sc->sc_l1);
		for (idx = 0; idx < nl1; idx++) {
			KASSERTMSG(__SHIFTOUT(pa, __BITS(DART_TTBR_SHIFT - 1, 0)) == 0,
			    "TTBR pa is not correctly aligned %" PRIxPADDR, pa);

			DART_WRITE(sc, DART_TTBR(sid, idx),
			    (pa >> DART_TTBR_SHIFT) | DART_TTBR_VALID);
			pa += DART_PAGE_SIZE;
#ifdef APPLE_DART_DEBUG
			printf("writing %"PRIx64" to %"PRIx32"\n",
			    (pa >> DART_TTBR_SHIFT) | DART_TTBR_VALID,
			    DART_TTBR(sid, idx));
#endif
		}
	}
	apple_dart_flush_tlb(sc);

	/* Enable translations */
	for (sid = 0; sid < sc->sc_nsid; sid++) {
		DART_WRITE(sc, DART_TCR(sid), DART_TCR_TRANSLATE_ENABLE);
	}

	ih = fdtbus_intr_establish_xname(phandle, 0, IPL_HIGH, FDT_INTR_MPSAFE,
	    apple_dart_intr, sc, device_xname(self));
	if (ih == NULL) {
		aprint_error_dev(self, "couldn't establish interrupt on %s\n",
		    intrstr);
		return;
	}
	aprint_normal_dev(self, "interrupting on %s\n", intrstr);

	/* Setup bus DMA tag */
	sc->sc_bus_dmat = *sc->sc_dmat;
	sc->sc_bus_dmat._ranges = apple_dart_dma_ranges;
	sc->sc_bus_dmat._nranges = 1;
	sc->sc_bus_dmat._cookie = sc;
	sc->sc_bus_dmat._dmamap_create = apple_dart_dmamap_create;
	sc->sc_bus_dmat._dmamap_destroy = apple_dart_dmamap_destroy;
	sc->sc_bus_dmat._dmamap_load = apple_dart_dmamap_load;
	sc->sc_bus_dmat._dmamap_load_mbuf = apple_dart_dmamap_load_mbuf;
	sc->sc_bus_dmat._dmamap_load_uio = apple_dart_dmamap_load_uio;
	sc->sc_bus_dmat._dmamap_load_raw = apple_dart_dmamap_load_raw;
	sc->sc_bus_dmat._dmamap_unload = apple_dart_dmamap_unload;

	fdtbus_register_iommu(self, phandle, &apple_dart_iommu_funcs);
}

CFATTACH_DECL_NEW(apple_dart, sizeof(struct apple_dart_softc),
	apple_dart_match, apple_dart_attach, NULL, NULL);