patch-2.4.19 linux-2.4.19/arch/ia64/sn/kernel/sn1/sn1_smp.c
Next file: linux-2.4.19/arch/ia64/sn/kernel/sn1/synergy.c
Previous file: linux-2.4.19/arch/ia64/sn/kernel/sn1/iomv.c
Back to the patch index
Back to the overall index
- Lines: 475
- Date:
Fri Aug 2 17:39:43 2002
- Orig file:
linux-2.4.18/arch/ia64/sn/kernel/sn1/sn1_smp.c
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.18/arch/ia64/sn/kernel/sn1/sn1_smp.c linux-2.4.19/arch/ia64/sn/kernel/sn1/sn1_smp.c
@@ -0,0 +1,474 @@
+/*
+ * SN1 Platform specific SMP Support
+ *
+ * Copyright (C) 2000-2002 Silicon Graphics, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/NoticeExplan
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mmzone.h>
+
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/hw_irq.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+#include <asm/sn/sn_cpuid.h>
+
+/*
+ * The following structure is used to pass params thru smp_call_function
+ * to other cpus for flushing TLB ranges.
+ */
+typedef struct {
+ unsigned long start;
+ unsigned long end;
+ unsigned long nbits;
+ unsigned int rid;
+ atomic_t unfinished_count;
+} ptc_params_t;
+
+#define NUMPTC 512
+
+static ptc_params_t ptcParamArray[NUMPTC] __attribute__((__aligned__(128)));
+
+/* use separate cache lines on ptcParamsNextByCpu to avoid false sharing */
+static ptc_params_t *ptcParamsNextByCpu[NR_CPUS*16] __attribute__((__aligned__(128)));
+static volatile ptc_params_t *ptcParamsEmpty __cacheline_aligned;
+
+/*REFERENCED*/
+static spinlock_t ptcParamsLock __cacheline_aligned = SPIN_LOCK_UNLOCKED;
+
+static int ptcInit = 0;
+#ifdef PTCDEBUG
+static int ptcParamsAllBusy = 0; /* debugging/statistics */
+static int ptcCountBacklog = 0;
+static int ptcBacklog[NUMPTC+1];
+static char ptcParamsCounts[NR_CPUS][NUMPTC] __attribute__((__aligned__(128)));
+static char ptcParamsResults[NR_CPUS][NUMPTC] __attribute__((__aligned__(128)));
+#endif
+
+/*
+ * Make smp_send_flush_tlbsmp_send_flush_tlb() a weak reference,
+ * so that we get a clean compile with the ia64 patch without the
+ * actual SN1 specific code in arch/ia64/kernel/smp.c.
+ */
+extern void smp_send_flush_tlb (void) __attribute((weak));
+
+/*
+ * The following table/struct is for remembering PTC coherency domains. It
+ * is also used to translate sapicid into cpuids. We dont want to start
+ * cpus unless we know their cache domain.
+ */
+#ifdef PTC_NOTYET
+sn_sapicid_info_t sn_sapicid_info[NR_CPUS];
+#endif
+
+/**
+ * sn1_ptc_l_range - purge local translation cache
+ * @start: start of virtual address range
+ * @end: end of virtual address range
+ * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
+ *
+ * Purges the range specified from the local processor's translation cache
+ * (as opposed to the translation registers). Note that more than the specified
+ * range *may* be cleared from the cache by some processors.
+ *
+ * This is probably not good enough, but I don't want to try to make it better
+ * until I get some statistics on a running system. At a minimum, we should only
+ * send IPIs to 1 processor in each TLB domain & have it issue a ptc.g on it's
+ * own FSB. Also, we only have to serialize per FSB, not globally.
+ *
+ * More likely, we will have to do some work to reduce the frequency of calls to
+ * this routine.
+ */
+static inline void
+sn1_ptc_l_range(unsigned long start, unsigned long end, unsigned long nbits)
+{
+ do {
+ __asm__ __volatile__ ("ptc.l %0,%1" :: "r"(start), "r"(nbits<<2) : "memory");
+ start += (1UL << nbits);
+ } while (start < end);
+ ia64_srlz_d();
+}
+
+/**
+ * sn1_received_flush_tlb - cpu tlb flush routine
+ *
+ * Flushes the TLB of a given processor.
+ */
+void
+sn1_received_flush_tlb(void)
+{
+ unsigned long start, end, nbits;
+ unsigned int rid, saved_rid;
+ int cpu = smp_processor_id();
+ int result;
+ ptc_params_t *ptcParams;
+
+ ptcParams = ptcParamsNextByCpu[cpu*16];
+ if (ptcParams == ptcParamsEmpty)
+ return;
+
+ do {
+ start = ptcParams->start;
+ saved_rid = (unsigned int) ia64_get_rr(start);
+ end = ptcParams->end;
+ nbits = ptcParams->nbits;
+ rid = ptcParams->rid;
+
+ if (saved_rid != rid) {
+ ia64_set_rr(start, (unsigned long)rid);
+ ia64_srlz_d();
+ }
+
+ sn1_ptc_l_range(start, end, nbits);
+
+ if (saved_rid != rid)
+ ia64_set_rr(start, (unsigned long)saved_rid);
+
+ ia64_srlz_i();
+
+ result = atomic_dec(&ptcParams->unfinished_count);
+#ifdef PTCDEBUG
+ {
+ int i = ptcParams-&ptcParamArray[0];
+ ptcParamsResults[cpu][i] = (char) result;
+ ptcParamsCounts[cpu][i]++;
+ }
+#endif /* PTCDEBUG */
+
+ if (++ptcParams == &ptcParamArray[NUMPTC])
+ ptcParams = &ptcParamArray[0];
+
+ } while (ptcParams != ptcParamsEmpty);
+
+ ptcParamsNextByCpu[cpu*16] = ptcParams;
+}
+
+/**
+ * sn1_global_tlb_purge - flush a translation cache range on all processors
+ * @start: start of virtual address range to flush
+ * @end: end of virtual address range
+ * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
+ *
+ * Flushes the translation cache of all processors from @start to @end.
+ */
+void
+sn1_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
+{
+ ptc_params_t *params;
+ ptc_params_t *next;
+ unsigned long irqflags;
+#ifdef PTCDEBUG
+ ptc_params_t *nextnext;
+ int backlog = 0;
+#endif
+
+ if (smp_num_cpus == 1) {
+ sn1_ptc_l_range(start, end, nbits);
+ return;
+ }
+
+ if (in_interrupt()) {
+ /*
+ * If at interrupt level and cannot get spinlock,
+ * then do something useful by flushing own tlbflush queue
+ * so as to avoid a possible deadlock.
+ */
+ while (!spin_trylock(&ptcParamsLock)) {
+ local_irq_save(irqflags);
+ sn1_received_flush_tlb();
+ local_irq_restore(irqflags);
+ udelay(10); /* take it easier on the bus */
+ }
+ } else {
+ spin_lock(&ptcParamsLock);
+ }
+
+ if (!ptcInit) {
+ int cpu;
+ ptcInit = 1;
+ memset(ptcParamArray, 0, sizeof(ptcParamArray));
+ ptcParamsEmpty = &ptcParamArray[0];
+ for (cpu=0; cpu<NR_CPUS; cpu++)
+ ptcParamsNextByCpu[cpu*16] = &ptcParamArray[0];
+
+#ifdef PTCDEBUG
+ memset(ptcBacklog, 0, sizeof(ptcBacklog));
+ memset(ptcParamsCounts, 0, sizeof(ptcParamsCounts));
+ memset(ptcParamsResults, 0, sizeof(ptcParamsResults));
+#endif /* PTCDEBUG */
+ }
+
+ params = (ptc_params_t *) ptcParamsEmpty;
+ next = (ptc_params_t *) ptcParamsEmpty + 1;
+ if (next == &ptcParamArray[NUMPTC])
+ next = &ptcParamArray[0];
+
+#ifdef PTCDEBUG
+ nextnext = next + 1;
+ if (nextnext == &ptcParamArray[NUMPTC])
+ nextnext = &ptcParamArray[0];
+
+ if (ptcCountBacklog) {
+ /* quick count of backlog */
+ ptc_params_t *ptr;
+
+ /* check the current pointer to the beginning */
+ ptr = params;
+ while(--ptr >= &ptcParamArray[0]) {
+ if (atomic_read(&ptr->unfinished_count) == 0)
+ break;
+ ++backlog;
+ }
+
+ if (backlog) {
+ /* check the end of the array */
+ ptr = &ptcParamArray[NUMPTC];
+ while (--ptr > params) {
+ if (atomic_read(&ptr->unfinished_count) == 0)
+ break;
+ ++backlog;
+ }
+ }
+ ptcBacklog[backlog]++;
+ }
+#endif /* PTCDEBUG */
+
+ /* wait for the next entry to clear...should be rare */
+ if (atomic_read(&next->unfinished_count) > 0) {
+#ifdef PTCDEBUG
+ ptcParamsAllBusy++;
+
+ if (atomic_read(&nextnext->unfinished_count) == 0) {
+ if (atomic_read(&next->unfinished_count) > 0) {
+ panic("\nnonzero next zero nextnext %lx %lx\n",
+ (long)next, (long)nextnext);
+ }
+ }
+#endif
+
+ /* it could be this cpu that is behind */
+ local_irq_save(irqflags);
+ sn1_received_flush_tlb();
+ local_irq_restore(irqflags);
+
+ /* now we know it's not this cpu, so just wait */
+ while (atomic_read(&next->unfinished_count) > 0) {
+ barrier();
+ }
+ }
+
+ params->start = start;
+ params->end = end;
+ params->nbits = nbits;
+ params->rid = (unsigned int) ia64_get_rr(start);
+ atomic_set(¶ms->unfinished_count, smp_num_cpus);
+
+ /* The atomic_set above can hit memory *after* the update
+ * to ptcParamsEmpty below, which opens a timing window
+ * that other cpus can squeeze into!
+ */
+ mb();
+
+ /* everything is ready to process:
+ * -- global lock is held
+ * -- new entry + 1 is free
+ * -- new entry is set up
+ * so now:
+ * -- update the global next pointer
+ * -- unlock the global lock
+ * -- send IPI to notify other cpus
+ * -- process the data ourselves
+ */
+ ptcParamsEmpty = next;
+ spin_unlock(&ptcParamsLock);
+ smp_send_flush_tlb();
+
+ local_irq_save(irqflags);
+ sn1_received_flush_tlb();
+ local_irq_restore(irqflags);
+
+ /* Currently we don't think global TLB purges need to be atomic.
+ * All CPUs get sent IPIs, so if they haven't done the purge,
+ * they're busy with interrupts that are at the IPI level, which is
+ * priority 15. We're asserting that any code at that level
+ * shouldn't be using user TLB entries. To change this to wait
+ * for all the flushes to complete, enable the following code.
+ */
+#ifdef SN1_SYNCHRONOUS_GLOBAL_TLB_PURGE
+ /* this code is not tested */
+ /* wait for the flush to complete */
+ while (atomic_read(¶ms.unfinished_count) > 1)
+ barrier();
+
+ atomic_set(¶ms->unfinished_count, 0);
+#endif
+}
+
+/**
+ * sn_send_IPI_phys - send an IPI to a Nasid and slice
+ * @physid: physical cpuid to receive the interrupt.
+ * @vector: command to send
+ * @delivery_mode: delivery mechanism
+ *
+ * Sends an IPI (interprocessor interrupt) to the processor specified by
+ * @physid
+ *
+ * @delivery_mode can be one of the following
+ *
+ * %IA64_IPI_DM_INT - pend an interrupt
+ * %IA64_IPI_DM_PMI - pend a PMI
+ * %IA64_IPI_DM_NMI - pend an NMI
+ * %IA64_IPI_DM_INIT - pend an INIT interrupt
+ */
+void
+sn_send_IPI_phys(long physid, int vector, int delivery_mode)
+{
+ long *p;
+ long nasid, slice;
+
+ static int off[4] = {0x1800080, 0x1800088, 0x1a00080, 0x1a00088};
+
+ nasid = cpu_physical_id_to_nasid(physid);
+ slice = cpu_physical_id_to_slice(physid);
+
+ p = (long*)(0xc0000a0000000000LL | (nasid<<33) | off[slice]);
+
+#if defined(ZZZBRINGUP)
+ {
+ static int count=0;
+ if (count++ < 10) printk("ZZ sendIPI 0x%x vec %d, nasid 0x%lx, slice %ld, adr 0x%lx\n",
+ smp_processor_id(), vector, nasid, slice, (long)p);
+ }
+#endif
+ mb();
+ *p = (delivery_mode << 8) | (vector & 0xff);
+}
+
+
+/**
+ * sn1_send_IPI - send an IPI to a processor
+ * @cpuid: target of the IPI
+ * @vector: command to send
+ * @delivery_mode: delivery mechanism
+ * @redirect: redirect the IPI?
+ *
+ * Sends an IPI (interprocessor interrupt) to the processor specified by
+ * @cpuid. @delivery_mode can be one of the following
+ *
+ * %IA64_IPI_DM_INT - pend an interrupt
+ * %IA64_IPI_DM_PMI - pend a PMI
+ * %IA64_IPI_DM_NMI - pend an NMI
+ * %IA64_IPI_DM_INIT - pend an INIT interrupt
+ */
+void
+sn1_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
+{
+ long physid;
+
+ physid = cpu_physical_id(cpuid);
+
+ sn_send_IPI_phys(physid, vector, delivery_mode);
+}
+#ifdef CONFIG_SMP
+
+#ifdef PTC_NOTYET
+static void __init
+process_sal_ptc_domain_info(ia64_sal_ptc_domain_info_t *di, int domain)
+{
+ ia64_sal_ptc_domain_proc_entry_t *pe;
+ int i, sapicid, cpuid;
+
+ pe = __va(di->proc_list);
+ for (i=0; i<di->proc_count; i++, pe++) {
+ sapicid = id_eid_to_sapicid(pe->id, pe->eid);
+ cpuid = cpu_logical_id(sapicid);
+ sn_sapicid_info[cpuid].domain = domain;
+ sn_sapicid_info[cpuid].sapicid = sapicid;
+ }
+}
+
+
+static void __init
+process_sal_desc_ptc(ia64_sal_desc_ptc_t *ptc)
+{
+ ia64_sal_ptc_domain_info_t *di;
+ int i;
+
+ di = __va(ptc->domain_info);
+ for (i=0; i<ptc->num_domains; i++, di++) {
+ process_sal_ptc_domain_info(di, i);
+ }
+}
+#endif /* PTC_NOTYET */
+
+/**
+ * init_sn1_smp_config - setup PTC domains per processor
+ */
+void __init
+init_sn1_smp_config(void)
+{
+ if (!ia64_ptc_domain_info) {
+ printk("SMP: Can't find PTC domain info. Forcing UP mode\n");
+ smp_num_cpus = 1;
+ return;
+ }
+
+#ifdef PTC_NOTYET
+ memset (sn_sapicid_info, -1, sizeof(sn_sapicid_info));
+ process_sal_desc_ptc(ia64_ptc_domain_info);
+#endif
+}
+
+#else /* CONFIG_SMP */
+
+void __init
+init_sn1_smp_config(void)
+{
+
+#ifdef PTC_NOTYET
+ sn_sapicid_info[0].sapicid = hard_smp_processor_id();
+#endif
+}
+
+#endif /* CONFIG_SMP */
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)