/* $NetBSD: pmap_tlb.c,v 1.8 2016/11/13 12:58:40 maxv Exp $ */ /*- * Copyright (c) 2008-2012 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Andrew Doran and Mindaugas Rasiukevicius. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * x86 pmap(9) module: TLB shootdowns. * * TLB shootdowns are hard interrupts that operate outside the SPL framework. * They do not need to be blocked, provided that the pmap module gets the * order of events correct. The calls are made by poking the LAPIC directly. * The interrupt handler is short and does one of the following: invalidate * a set of pages, all user TLB entries or the entire TLB. */ #include __KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.8 2016/11/13 12:58:40 maxv Exp $"); #include #include #include #include #include #include #include #include #ifdef XEN #include #endif /* XEN */ #include #include /* * TLB shootdown structures. */ typedef struct { #ifdef _LP64 uintptr_t tp_va[14]; /* whole struct: 128 bytes */ #else uintptr_t tp_va[13]; /* whole struct: 64 bytes */ #endif uint16_t tp_count; uint16_t tp_pte; int tp_userpmap; kcpuset_t * tp_cpumask; } pmap_tlb_packet_t; /* * No more than N separate invlpg. * * Statistically, a value of six is big enough to cover the requested number * of pages in ~ 95% of the TLB shootdowns we are getting. We therefore rarely * reach the limit, and increasing it can actually reduce the performance due * to the high cost of invlpg. */ #define TP_MAXVA 6 /* * TLB shootdown state. */ static pmap_tlb_packet_t pmap_tlb_packet __cacheline_aligned; static volatile u_int pmap_tlb_pendcount __cacheline_aligned; static volatile u_int pmap_tlb_gen __cacheline_aligned; static struct evcnt pmap_tlb_evcnt __cacheline_aligned; /* * TLB shootdown statistics. */ #ifdef TLBSTATS static struct evcnt tlbstat_local[TLBSHOOT__MAX]; static struct evcnt tlbstat_remote[TLBSHOOT__MAX]; static struct evcnt tlbstat_kernel[TLBSHOOT__MAX]; static struct evcnt tlbstat_single_req; static struct evcnt tlbstat_single_issue; static const char * tlbstat_name[ ] = { "APTE", "KENTER", "KREMOVE", "FREE_PTP1", "FREE_PTP2", "REMOVE_PTE", "REMOVE_PTES", "SYNC_PV1", "SYNC_PV2", "WRITE_PROTECT", "ENTER", "UPDATE", "BUS_DMA", "BUS_SPACE" }; #endif void pmap_tlb_init(void) { memset(&pmap_tlb_packet, 0, sizeof(pmap_tlb_packet_t)); pmap_tlb_pendcount = 0; pmap_tlb_gen = 0; evcnt_attach_dynamic(&pmap_tlb_evcnt, EVCNT_TYPE_INTR, NULL, "TLB", "shootdown"); #ifdef TLBSTATS int i; for (i = 0; i < TLBSHOOT__MAX; i++) { evcnt_attach_dynamic(&tlbstat_local[i], EVCNT_TYPE_MISC, NULL, "tlbshoot local", tlbstat_name[i]); } for (i = 0; i < TLBSHOOT__MAX; i++) { evcnt_attach_dynamic(&tlbstat_remote[i], EVCNT_TYPE_MISC, NULL, "tlbshoot remote", tlbstat_name[i]); } for (i = 0; i < TLBSHOOT__MAX; i++) { evcnt_attach_dynamic(&tlbstat_kernel[i], EVCNT_TYPE_MISC, NULL, "tlbshoot kernel", tlbstat_name[i]); } evcnt_attach_dynamic(&tlbstat_single_req, EVCNT_TYPE_MISC, NULL, "tlbshoot single page", "requests"); evcnt_attach_dynamic(&tlbstat_single_issue, EVCNT_TYPE_MISC, NULL, "tlbshoot single page", "issues"); #endif } void pmap_tlb_cpu_init(struct cpu_info *ci) { pmap_tlb_packet_t *tp = (pmap_tlb_packet_t *)ci->ci_pmap_data; memset(tp, 0, sizeof(pmap_tlb_packet_t)); kcpuset_create(&tp->tp_cpumask, true); } static inline void pmap_tlbstat_count(struct pmap *pm, vaddr_t va, tlbwhy_t why) { #ifdef TLBSTATS const cpuid_t cid = cpu_index(curcpu()); bool local = false, remote = false; if (va != (vaddr_t)-1LL) { atomic_inc_64(&tlbstat_single_req.ev_count); } if (pm == pmap_kernel()) { atomic_inc_64(&tlbstat_kernel[why].ev_count); return; } if (va >= VM_MAXUSER_ADDRESS) { remote = kcpuset_isotherset(pm->pm_kernel_cpus, cid); local = kcpuset_isset(pm->pm_kernel_cpus, cid); } remote |= kcpuset_isotherset(pm->pm_cpus, cid); local |= kcpuset_isset(pm->pm_cpus, cid); if (local) { atomic_inc_64(&tlbstat_local[why].ev_count); } if (remote) { atomic_inc_64(&tlbstat_remote[why].ev_count); } #endif } static inline void pmap_tlb_invalidate(const pmap_tlb_packet_t *tp) { int i; /* Find out what we need to invalidate. */ if (tp->tp_count == (uint16_t)-1) { u_int egen = uvm_emap_gen_return(); if (tp->tp_pte & PG_G) { /* Invalidating user and kernel TLB entries. */ tlbflushg(); } else { /* Invalidating user TLB entries only. */ tlbflush(); } uvm_emap_update(egen); } else { /* Invalidating a single page or a range of pages. */ for (i = tp->tp_count - 1; i >= 0; i--) { pmap_update_pg(tp->tp_va[i]); } } } /* * pmap_tlb_shootdown: invalidate a page on all CPUs using pmap 'pm'. */ void pmap_tlb_shootdown(struct pmap *pm, vaddr_t va, pt_entry_t pte, tlbwhy_t why) { pmap_tlb_packet_t *tp; int s; #ifndef XEN KASSERT((pte & PG_G) == 0 || pm == pmap_kernel()); #endif /* * If tearing down the pmap, do nothing. We will flush later * when we are ready to recycle/destroy it. */ if (__predict_false(curlwp->l_md.md_gc_pmap == pm)) { return; } if ((pte & PG_PS) != 0) { va &= PG_LGFRAME; } /* * Add the shootdown operation to our pending set. */ s = splvm(); tp = (pmap_tlb_packet_t *)curcpu()->ci_pmap_data; /* Whole address flush will be needed if PG_G is set. */ CTASSERT(PG_G == (uint16_t)PG_G); tp->tp_pte |= (uint16_t)pte; if (tp->tp_count == (uint16_t)-1) { /* * Already flushing everything. */ } else if (tp->tp_count < TP_MAXVA && va != (vaddr_t)-1LL) { /* Flush a single page. */ tp->tp_va[tp->tp_count++] = va; KASSERT(tp->tp_count > 0); } else { /* Flush everything. */ tp->tp_count = (uint16_t)-1; } if (pm != pmap_kernel()) { kcpuset_merge(tp->tp_cpumask, pm->pm_cpus); if (va >= VM_MAXUSER_ADDRESS) { kcpuset_merge(tp->tp_cpumask, pm->pm_kernel_cpus); } tp->tp_userpmap = 1; } else { kcpuset_copy(tp->tp_cpumask, kcpuset_running); } pmap_tlbstat_count(pm, va, why); splx(s); } #ifdef MULTIPROCESSOR #ifdef XEN static inline void pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target) { if (tp->tp_count != (uint16_t)-1) { /* Invalidating a single page or a range of pages. */ for (int i = tp->tp_count - 1; i >= 0; i--) { xen_mcast_invlpg(tp->tp_va[i], target); } } else { xen_mcast_tlbflush(target); } /* Remote CPUs have been synchronously flushed. */ pmap_tlb_pendcount = 0; } #else static inline void pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target) { int err = 0; if (!kcpuset_match(target, kcpuset_attached)) { const struct cpu_info * const self = curcpu(); CPU_INFO_ITERATOR cii; struct cpu_info *lci; for (CPU_INFO_FOREACH(cii, lci)) { const cpuid_t lcid = cpu_index(lci); if (__predict_false(lci == self) || !kcpuset_isset(target, lcid)) { continue; } err |= x86_ipi(LAPIC_TLB_VECTOR, lci->ci_cpuid, LAPIC_DLMODE_FIXED); } } else { err = x86_ipi(LAPIC_TLB_VECTOR, LAPIC_DEST_ALLEXCL, LAPIC_DLMODE_FIXED); } KASSERT(err == 0); } #endif /* XEN */ #endif /* MULTIPROCESSOR */ /* * pmap_tlb_shootnow: process pending TLB shootdowns queued on current CPU. * * => Must be called with preemption disabled. */ void pmap_tlb_shootnow(void) { pmap_tlb_packet_t *tp; struct cpu_info *ci; kcpuset_t *target; u_int local, gen, rcpucount; cpuid_t cid; int s; KASSERT(kpreempt_disabled()); ci = curcpu(); tp = (pmap_tlb_packet_t *)ci->ci_pmap_data; /* Pre-check first. */ if (tp->tp_count == 0) { return; } s = splvm(); if (tp->tp_count == 0) { splx(s); return; } cid = cpu_index(ci); target = tp->tp_cpumask; local = kcpuset_isset(target, cid) ? 1 : 0; rcpucount = kcpuset_countset(target) - local; gen = 0; #ifdef MULTIPROCESSOR if (rcpucount) { int count; /* * Gain ownership of the shootdown mailbox. We must stay * at IPL_VM once we own it or could deadlock against an * interrupt on this CPU trying to do the same. */ KASSERT(rcpucount < ncpu); while (atomic_cas_uint(&pmap_tlb_pendcount, 0, rcpucount)) { splx(s); count = SPINLOCK_BACKOFF_MIN; while (pmap_tlb_pendcount) { KASSERT(pmap_tlb_pendcount < ncpu); SPINLOCK_BACKOFF(count); } s = splvm(); /* An interrupt might have done it for us. */ if (tp->tp_count == 0) { splx(s); return; } } /* * Start a new generation of updates. Copy our shootdown * requests into the global buffer. Note that tp_cpumask * will not be used by remote CPUs (it would be unsafe). */ gen = ++pmap_tlb_gen; memcpy(&pmap_tlb_packet, tp, sizeof(*tp)); pmap_tlb_evcnt.ev_count++; /* * Initiate shootdowns on remote CPUs. */ pmap_tlb_processpacket(tp, target); } #endif /* * Shootdowns on remote CPUs are now in flight. In the meantime, * perform local shootdown if needed. */ if (local) { pmap_tlb_invalidate(tp); } /* * Clear out our local buffer. */ #ifdef TLBSTATS if (tp->tp_count != (uint16_t)-1) { atomic_add_64(&tlbstat_single_issue.ev_count, tp->tp_count); } #endif kcpuset_zero(tp->tp_cpumask); tp->tp_userpmap = 0; tp->tp_count = 0; tp->tp_pte = 0; splx(s); /* * Now wait for the current generation of updates to be * processed by remote CPUs. */ if (rcpucount && pmap_tlb_pendcount) { int count = SPINLOCK_BACKOFF_MIN; while (pmap_tlb_pendcount && pmap_tlb_gen == gen) { KASSERT(pmap_tlb_pendcount < ncpu); SPINLOCK_BACKOFF(count); } } } /* * pmap_tlb_intr: pmap shootdown interrupt handler to invalidate TLB entries. * * => Called from IPI only. */ void pmap_tlb_intr(void) { const pmap_tlb_packet_t *tp = &pmap_tlb_packet; struct cpu_info *ci = curcpu(); KASSERT(pmap_tlb_pendcount > 0); /* First, TLB flush. */ pmap_tlb_invalidate(tp); /* * Check the current TLB state. If we do not want further * invalidations for this pmap, then take the CPU out of * the pmap's bitmask. */ if (ci->ci_tlbstate == TLBSTATE_LAZY && tp->tp_userpmap) { struct pmap *pm = ci->ci_pmap; cpuid_t cid = cpu_index(ci); kcpuset_atomic_clear(pm->pm_cpus, cid); ci->ci_tlbstate = TLBSTATE_STALE; } /* Finally, ack the request. */ atomic_dec_uint(&pmap_tlb_pendcount); }