patch-2.4.19 linux-2.4.19/arch/ia64/sn/io/sn2/shuberror.c
Next file: linux-2.4.19/arch/ia64/sn/io/stubs.c
Previous file: linux-2.4.19/arch/ia64/sn/io/sn2/shub_intr.c
Back to the patch index
Back to the overall index
- Lines: 479
- Date:
Fri Aug 2 17:39:43 2002
- Orig file:
linux-2.4.18/arch/ia64/sn/io/sn2/shuberror.c
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.18/arch/ia64/sn/io/sn2/shuberror.c linux-2.4.19/arch/ia64/sn/io/sn2/shuberror.c
@@ -0,0 +1,478 @@
+/* $Id: shuberror.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000,2002 Silicon Graphics, Inc. All rights reserved.
+ */
+
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/smp.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/io.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/sn_private.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/pci/pciio.h>
+#include <asm/sn/pci/pcibr.h>
+#include <asm/sn/xtalk/xtalk.h>
+#include <asm/sn/pci/pcibr_private.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/ioerror.h>
+#include <asm/sn/sn2/shubio.h>
+#include <asm/sn/bte.h>
+
+extern void hubni_eint_init(cnodeid_t cnode);
+extern void hubii_eint_init(cnodeid_t cnode);
+extern void hubii_eint_handler (int irq, void *arg, struct pt_regs *ep);
+int hubiio_crb_error_handler(devfs_handle_t hub_v, hubinfo_t hinfo);
+int hubiio_prb_error_handler(devfs_handle_t hub_v, hubinfo_t hinfo);
+extern void bte_crb_error_handler(devfs_handle_t hub_v, int btenum, int crbnum, ioerror_t *ioe);
+
+extern int maxcpus;
+
+#define HUB_ERROR_PERIOD (120 * HZ) /* 2 minutes */
+
+
+void
+hub_error_clear(nasid_t nasid)
+{
+ int i;
+ hubreg_t idsr;
+
+ /*
+ * Make sure spurious write response errors are cleared
+ * (values are from hub_set_prb())
+ */
+ for (i = 0; i <= HUB_WIDGET_ID_MAX - HUB_WIDGET_ID_MIN + 1; i++) {
+ iprb_t prb;
+
+ prb.iprb_regval = REMOTE_HUB_L(nasid, IIO_IOPRB_0 + (i * sizeof(hubreg_t)));
+
+ /* Clear out some fields */
+ prb.iprb_ovflow = 1;
+ prb.iprb_bnakctr = 0;
+ prb.iprb_anakctr = 0;
+
+ prb.iprb_xtalkctr = 3; /* approx. PIO credits for the widget */
+
+ REMOTE_HUB_S(nasid, IIO_IOPRB_0 + (i * sizeof(hubreg_t)), prb.iprb_regval);
+ }
+
+ REMOTE_HUB_S(nasid, IIO_IO_ERR_CLR, -1);
+ idsr = REMOTE_HUB_L(nasid, IIO_IIDSR);
+ REMOTE_HUB_S(nasid, IIO_IIDSR, (idsr & ~(IIO_IIDSR_SENT_MASK)));
+
+}
+
+
+/*
+ * Function : hub_error_init
+ * Purpose : initialize the error handling requirements for a given hub.
+ * Parameters : cnode, the compact nodeid.
+ * Assumptions : Called only once per hub, either by a local cpu. Or by a
+ * remote cpu, when this hub is headless.(cpuless)
+ * Returns : None
+ */
+
+void
+hub_error_init(cnodeid_t cnode)
+{
+ nasid_t nasid;
+
+ nasid = cnodeid_to_nasid(cnode);
+ hub_error_clear(nasid);
+
+
+ /*
+ * Now setup the hub ii error interrupt handler.
+ */
+
+ hubii_eint_init(cnode);
+
+ return;
+}
+
+/*
+ * Function : hubii_eint_init
+ * Parameters : cnode
+ * Purpose : to initialize the hub iio error interrupt.
+ * Assumptions : Called once per hub, by the cpu which will ultimately
+ * handle this interrupt.
+ * Returns : None.
+ */
+
+
+void
+hubii_eint_init(cnodeid_t cnode)
+{
+ int bit, rv;
+ ii_iidsr_u_t hubio_eint;
+ hubinfo_t hinfo;
+ cpuid_t intr_cpu;
+ devfs_handle_t hub_v;
+ ii_ilcsr_u_t ilcsr;
+ int bit_pos_to_irq(int bit);
+ int synergy_intr_connect(int bit, int cpuid);
+
+
+ hub_v = (devfs_handle_t)cnodeid_to_vertex(cnode);
+ ASSERT_ALWAYS(hub_v);
+ hubinfo_get(hub_v, &hinfo);
+
+ ASSERT(hinfo);
+ ASSERT(hinfo->h_cnodeid == cnode);
+
+ ilcsr.ii_ilcsr_regval = REMOTE_HUB_L(hinfo->h_nasid, IIO_ILCSR);
+
+ if ((ilcsr.ii_ilcsr_fld_s.i_llp_stat & 0x2) == 0) {
+ /*
+ * HUB II link is not up.
+ * Just disable LLP, and don't connect any interrupts.
+ */
+ ilcsr.ii_ilcsr_fld_s.i_llp_en = 0;
+ REMOTE_HUB_S(hinfo->h_nasid, IIO_ILCSR, ilcsr.ii_ilcsr_regval);
+ return;
+ }
+ /* Select a possible interrupt target where there is a free interrupt
+ * bit and also reserve the interrupt bit for this IO error interrupt
+ */
+ intr_cpu = intr_heuristic(hub_v,0,-1,0,hub_v,
+ "HUB IO error interrupt",&bit);
+ if (intr_cpu == CPU_NONE) {
+ printk("hubii_eint_init: intr_reserve_level failed, cnode %d", cnode);
+ return;
+ }
+
+ rv = intr_connect_level(intr_cpu, bit, 0, NULL);
+ request_irq(bit + (intr_cpu << 8), hubii_eint_handler, 0, "SN hub error", (void *)hub_v);
+ ASSERT_ALWAYS(rv >= 0);
+ hubio_eint.ii_iidsr_regval = 0;
+ hubio_eint.ii_iidsr_fld_s.i_enable = 1;
+ hubio_eint.ii_iidsr_fld_s.i_level = bit;/* Take the least significant bits*/
+ hubio_eint.ii_iidsr_fld_s.i_node = COMPACT_TO_NASID_NODEID(cnode);
+ hubio_eint.ii_iidsr_fld_s.i_pi_id = cpuid_to_subnode(intr_cpu);
+ REMOTE_HUB_S(hinfo->h_nasid, IIO_IIDSR, hubio_eint.ii_iidsr_regval);
+
+}
+
+
+/*ARGSUSED*/
+void
+hubii_eint_handler (int irq, void *arg, struct pt_regs *ep)
+{
+ devfs_handle_t hub_v;
+ hubinfo_t hinfo;
+ ii_wstat_u_t wstat;
+ hubreg_t idsr;
+
+
+ /* two levels of casting avoids compiler warning.!! */
+ hub_v = (devfs_handle_t)(long)(arg);
+ ASSERT(hub_v);
+
+ hubinfo_get(hub_v, &hinfo);
+
+ /*
+ * Identify the reason for error.
+ */
+ wstat.ii_wstat_regval = REMOTE_HUB_L(hinfo->h_nasid, IIO_WSTAT);
+
+ if (wstat.ii_wstat_fld_s.w_crazy) {
+ char *reason;
+ /*
+ * We can do a couple of things here.
+ * Look at the fields TX_MX_RTY/XT_TAIL_TO/XT_CRD_TO to check
+ * which of these caused the CRAZY bit to be set.
+ * You may be able to check if the Link is up really.
+ */
+ if (wstat.ii_wstat_fld_s.w_tx_mx_rty)
+ reason = "Micro Packet Retry Timeout";
+ else if (wstat.ii_wstat_fld_s.w_xt_tail_to)
+ reason = "Crosstalk Tail Timeout";
+ else if (wstat.ii_wstat_fld_s.w_xt_crd_to)
+ reason = "Crosstalk Credit Timeout";
+ else {
+ hubreg_t hubii_imem;
+ /*
+ * Check if widget 0 has been marked as shutdown, or
+ * if BTE 0/1 has been marked.
+ */
+ hubii_imem = REMOTE_HUB_L(hinfo->h_nasid, IIO_IMEM);
+ if (hubii_imem & IIO_IMEM_W0ESD)
+ reason = "Hub Widget 0 has been Shutdown";
+ else if (hubii_imem & IIO_IMEM_B0ESD)
+ reason = "BTE 0 has been shutdown";
+ else if (hubii_imem & IIO_IMEM_B1ESD)
+ reason = "BTE 1 has been shutdown";
+ else reason = "Unknown";
+
+ }
+ /*
+ * Note: we may never be able to print this, if the II talking
+ * to Xbow which hosts the console is dead.
+ */
+ printk("Hub %d to Xtalk Link failed (II_ECRAZY) Reason: %s",
+ hinfo->h_cnodeid, reason);
+ }
+
+ /*
+ * It's a toss as to which one among PRB/CRB to check first.
+ * Current decision is based on the severity of the errors.
+ * IO CRB errors tend to be more severe than PRB errors.
+ *
+ * It is possible for BTE errors to have been handled already, so we
+ * may not see any errors handled here.
+ */
+ (void)hubiio_crb_error_handler(hub_v, hinfo);
+ (void)hubiio_prb_error_handler(hub_v, hinfo);
+ /*
+ * If we reach here, it indicates crb/prb handlers successfully
+ * handled the error. So, re-enable II to send more interrupt
+ * and return.
+ */
+ REMOTE_HUB_S(hinfo->h_nasid, IIO_IECLR, 0xffffff);
+ idsr = REMOTE_HUB_L(hinfo->h_nasid, IIO_IIDSR) & ~IIO_IIDSR_SENT_MASK;
+ REMOTE_HUB_S(hinfo->h_nasid, IIO_IIDSR, idsr);
+}
+
+/*
+ * Free the hub CRB "crbnum" which encountered an error.
+ * Assumption is, error handling was successfully done,
+ * and we now want to return the CRB back to Hub for normal usage.
+ *
+ * In order to free the CRB, all that's needed is to de-allocate it
+ *
+ * Assumption:
+ * No other processor is mucking around with the hub control register.
+ * So, upper layer has to single thread this.
+ */
+void
+hubiio_crb_free(hubinfo_t hinfo, int crbnum)
+{
+ ii_icrb0_a_u_t icrba;
+
+ /*
+ * The hardware does NOT clear the mark bit, so it must get cleared
+ * here to be sure the error is not processed twice.
+ */
+ icrba.ii_icrb0_a_regval = REMOTE_HUB_L(hinfo->h_nasid, IIO_ICRB_A(crbnum));
+ icrba.a_valid = 0;
+ REMOTE_HUB_S(hinfo->h_nasid, IIO_ICRB_A(crbnum), icrba.ii_icrb0_a_regval);
+ /*
+ * Deallocate the register.
+ */
+
+ REMOTE_HUB_S(hinfo->h_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum));
+
+ /*
+ * Wait till hub indicates it's done.
+ */
+ while (REMOTE_HUB_L(hinfo->h_nasid, IIO_ICDR) & IIO_ICDR_PND)
+ us_delay(1);
+
+}
+
+
+/*
+ * Array of error names that get logged in CRBs
+ */
+char *hubiio_crb_errors[] = {
+ "Directory Error",
+ "CRB Poison Error",
+ "I/O Write Error",
+ "I/O Access Error",
+ "I/O Partial Write Error",
+ "I/O Partial Read Error",
+ "I/O Timeout Error",
+ "Xtalk Error Packet"
+};
+
+/*
+ * hubiio_crb_error_handler
+ *
+ * This routine gets invoked when a hub gets an error
+ * interrupt. So, the routine is running in interrupt context
+ * at error interrupt level.
+ * Action:
+ * It's responsible for identifying ALL the CRBs that are marked
+ * with error, and process them.
+ *
+ * If you find the CRB that's marked with error, map this to the
+ * reason it caused error, and invoke appropriate error handler.
+ *
+ * XXX Be aware of the information in the context register.
+ *
+ * NOTE:
+ * Use REMOTE_HUB_* macro instead of LOCAL_HUB_* so that the interrupt
+ * handler can be run on any node. (not necessarily the node
+ * corresponding to the hub that encountered error).
+ */
+
+int
+hubiio_crb_error_handler(devfs_handle_t hub_v, hubinfo_t hinfo)
+{
+ cnodeid_t cnode;
+ nasid_t nasid;
+ ii_icrb0_a_u_t icrba; /* II CRB Register A */
+ ii_icrb0_b_u_t icrbb; /* II CRB Register B */
+ ii_icrb0_c_u_t icrbc; /* II CRB Register C */
+ ii_icrb0_d_u_t icrbd; /* II CRB Register D */
+ int i;
+ int num_errors = 0; /* Num of errors handled */
+ ioerror_t ioerror;
+
+ nasid = hinfo->h_nasid;
+ cnode = NASID_TO_COMPACT_NODEID(nasid);
+
+ /*
+ * Scan through all CRBs in the Hub, and handle the errors
+ * in any of the CRBs marked.
+ */
+ for (i = 0; i < IIO_NUM_CRBS; i++) {
+ icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));
+
+ IOERROR_INIT(&ioerror);
+
+ /* read other CRB error registers. */
+ icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));
+ icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
+ icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
+
+ IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);
+ /* Check if this error is due to BTE operation,
+ * and handle it separately.
+ */
+ if (icrbd.d_bteop ||
+ ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
+ icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
+ (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
+ icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))){
+
+ int bte_num;
+
+ if (icrbd.d_bteop)
+ bte_num = icrbc.c_btenum;
+ else /* b_initiator bit 2 gives BTE number */
+ bte_num = (icrbb.b_initiator & 0x4) >> 2;
+
+ bte_crb_error_handler(hub_v, bte_num,
+ i, &ioerror);
+ hubiio_crb_free(hinfo, i);
+ num_errors++;
+ continue;
+ }
+
+ /*
+ * XXX
+ * Assuming the only other error that would reach here is
+ * crosstalk errors.
+ * If CRB times out on a message from Xtalk, it changes
+ * the message type to CRB.
+ *
+ * If we get here due to other errors (SN0net/CRB)
+ * what's the action ?
+ */
+
+ /*
+ * Pick out the useful fields in CRB, and
+ * tuck them away into ioerror structure.
+ */
+ IOERROR_SETVALUE(&ioerror,xtalkaddr,icrba.a_addr << IIO_ICRB_ADDR_SHFT);
+ IOERROR_SETVALUE(&ioerror,widgetnum,icrba.a_sidn);
+
+
+ if (icrba.a_iow){
+ /*
+ * XXX We shouldn't really have BRIDGE-specific code
+ * here, but alas....
+ *
+ * The BRIDGE (or XBRIDGE) sets the upper bit of TNUM
+ * to indicate a WRITE operation. It sets the next
+ * bit to indicate an INTERRUPT operation. The bottom
+ * 3 bits of TNUM indicate which device was responsible.
+ */
+ IOERROR_SETVALUE(&ioerror,widgetdev,
+ TNUM_TO_WIDGET_DEV(icrba.a_tnum));
+
+ }
+
+ }
+ return num_errors;
+}
+
+/*ARGSUSED*/
+/*
+ * hubii_prb_handler
+ * Handle the error reported in the PRB for wiget number wnum.
+ * This typically happens on a PIO write error.
+ * There is nothing much we can do in this interrupt context for
+ * PIO write errors. For e.g. QL scsi controller has the
+ * habit of flaking out on PIO writes.
+ * Print a message and try to continue for now
+ * Cleanup involes freeing the PRB register
+ */
+static void
+hubii_prb_handler(devfs_handle_t hub_v, hubinfo_t hinfo, int wnum)
+{
+ nasid_t nasid;
+
+ nasid = hinfo->h_nasid;
+ /*
+ * Clear error bit by writing to IECLR register.
+ */
+ REMOTE_HUB_S(nasid, IIO_IO_ERR_CLR, (1 << wnum));
+ /*
+ * PIO Write to Widget 'i' got into an error.
+ * Invoke hubiio_error_handler with this information.
+ */
+ printk( "Hub nasid %d got a PIO Write error from widget %d, cleaning up and continuing",
+ nasid, wnum);
+ /*
+ * XXX
+ * It may be necessary to adjust IO PRB counter
+ * to account for any lost credits.
+ */
+}
+
+int
+hubiio_prb_error_handler(devfs_handle_t hub_v, hubinfo_t hinfo)
+{
+ int wnum;
+ nasid_t nasid;
+ int num_errors = 0;
+ iprb_t iprb;
+
+ nasid = hinfo->h_nasid;
+ /*
+ * Check if IPRB0 has any error first.
+ */
+ iprb.iprb_regval = REMOTE_HUB_L(nasid, IIO_IOPRB(0));
+ if (iprb.iprb_error) {
+ num_errors++;
+ hubii_prb_handler(hub_v, hinfo, 0);
+ }
+ /*
+ * Look through PRBs 8 - F to see if any of them has error bit set.
+ * If true, invoke hub iio error handler for this widget.
+ */
+ for (wnum = HUB_WIDGET_ID_MIN; wnum <= HUB_WIDGET_ID_MAX; wnum++) {
+ iprb.iprb_regval = REMOTE_HUB_L(nasid, IIO_IOPRB(wnum));
+
+ if (!iprb.iprb_error)
+ continue;
+
+ num_errors++;
+ hubii_prb_handler(hub_v, hinfo, wnum);
+ }
+
+ return num_errors;
+}
+
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)