patch-2.4.25 linux-2.4.25/arch/ia64/kernel/mca.c
Next file: linux-2.4.25/arch/ia64/kernel/mca_asm.S
Previous file: linux-2.4.25/arch/ia64/kernel/ivt.S
Back to the patch index
Back to the overall index
- Lines: 336
- Date:
2004-02-18 05:36:30.000000000 -0800
- Orig file:
linux-2.4.24/arch/ia64/kernel/mca.c
- Orig date:
2003-11-28 10:26:19.000000000 -0800
diff -urN linux-2.4.24/arch/ia64/kernel/mca.c linux-2.4.25/arch/ia64/kernel/mca.c
@@ -36,6 +36,10 @@
* SAL 3.0 spec.
* 00/03/29 C. Fleckenstein Fixed PAL/SAL update issues, began MCA bug fixes, logging issues,
* added min save state dump, added INIT handler.
+ *
+ * 2003-12-08 Keith Owens <kaos@sgi.com>
+ * smp_call_function() must not be called from interrupt context (can
+ * deadlock on tasklist_lock). Use keventd to call smp_call_function().
*/
#include <linux/config.h>
#include <linux/types.h>
@@ -50,6 +54,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/smp.h>
+#include <linux/tqueue.h>
#include <asm/delay.h>
#include <asm/machvec.h>
@@ -80,19 +85,19 @@
u64 ia64_mca_stackframe[32];
u64 ia64_mca_bspstore[1024];
u64 ia64_init_stack[INIT_TASK_SIZE/8] __attribute__((aligned(16)));
-u64 ia64_mca_sal_data_area[1356];
-u64 ia64_tlb_functional;
u64 ia64_os_mca_recovery_successful;
-/* TODO: need to assign min-state structure to UC memory */
-u64 ia64_mca_min_state_save_info[MIN_STATE_AREA_SIZE] __attribute__((aligned(512)));
+u64 ia64_mca_serialize;
static void ia64_mca_wakeup_ipi_wait(void);
static void ia64_mca_wakeup(int cpu);
static void ia64_mca_wakeup_all(void);
static void ia64_log_init(int);
extern void ia64_monarch_init_handler (void);
extern void ia64_slave_init_handler (void);
+static u64 ia64_log_get(int sal_info_type, u8 **buffer);
extern struct hw_interrupt_type irq_type_iosapic_level;
+struct ia64_mca_tlb_info ia64_mca_tlb_list[NR_CPUS];
+
static struct irqaction cmci_irqaction = {
.handler = ia64_mca_cmc_int_handler,
.flags = SA_INTERRUPT,
@@ -151,7 +156,9 @@
*/
static int cpe_poll_enabled = 1;
-extern void salinfo_log_wakeup(int);
+extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size);
+
+static struct tq_struct cmc_disable_tq, cmc_enable_tq;
/*
* ia64_mca_log_sal_error_record
@@ -166,11 +173,13 @@
int
ia64_mca_log_sal_error_record(int sal_info_type, int called_from_init)
{
- int platform_err = 0;
+ u8 *buffer;
+ u64 size;
+ int platform_err;
- /* Get the MCA error record */
- if (!ia64_log_get(sal_info_type, (prfunc_t)printk))
- return platform_err; /* no record retrieved */
+ size = ia64_log_get(sal_info_type, &buffer);
+ if (!size)
+ return 0;
/* TODO:
* 1. analyze error logs to determine recoverability
@@ -178,8 +187,11 @@
* 3. set ia64_os_mca_recovery_successful flag, if applicable
*/
- salinfo_log_wakeup(sal_info_type);
+ salinfo_log_wakeup(sal_info_type, buffer, size);
platform_err = ia64_log_print(sal_info_type, (prfunc_t)printk);
+ /* Clear logs from corrected errors in case there's no user-level logger */
+ if (sal_info_type == SAL_INFO_TYPE_CPE || sal_info_type == SAL_INFO_TYPE_CMC)
+ ia64_sal_clear_state_info(sal_info_type);
return platform_err;
}
@@ -462,26 +474,6 @@
#endif /* PLATFORM_MCA_HANDLERS */
/*
- * routine to process and prepare to dump min_state_save
- * information for debugging purposes.
- */
-void
-ia64_process_min_state_save (pal_min_state_area_t *pmss)
-{
- int i, max = MIN_STATE_AREA_SIZE;
- u64 *tpmss_ptr = (u64 *)pmss;
- u64 *return_min_state_ptr = ia64_mca_min_state_save_info;
-
- for (i=0;i<max;i++) {
-
- /* copy min-state register info for eventual return to PAL */
- *return_min_state_ptr++ = *tpmss_ptr;
-
- tpmss_ptr++; /* skip to next entry */
- }
-}
-
-/*
* ia64_mca_cmc_vector_setup
*
* Setup the corrected machine check vector register in the processor and
@@ -620,6 +612,36 @@
}
/*
+ * ia64_mca_cmc_vector_disable_keventd
+ *
+ * Called via keventd (smp_call_function() is not safe in interrupt context) to
+ * disable the cmc interrupt vector.
+ *
+ * Note: needs preempt_disable() if you apply the preempt patch to 2.4.
+ */
+static void
+ia64_mca_cmc_vector_disable_keventd(void *unused)
+{
+ ia64_mca_cmc_vector_disable(NULL);
+ smp_call_function(ia64_mca_cmc_vector_disable, NULL, 1, 0);
+}
+
+/*
+ * ia64_mca_cmc_vector_enable_keventd
+ *
+ * Called via keventd (smp_call_function() is not safe in interrupt context) to
+ * enable the cmc interrupt vector.
+ *
+ * Note: needs preempt_disable() if you apply the preempt patch to 2.4.
+ */
+static void
+ia64_mca_cmc_vector_enable_keventd(void *unused)
+{
+ smp_call_function(ia64_mca_cmc_vector_enable, NULL, 1, 0);
+ ia64_mca_cmc_vector_enable(NULL);
+}
+
+/*
* ia64_mca_init
*
* Do all the system level mca specific initialization.
@@ -652,6 +674,9 @@
IA64_MCA_DEBUG("ia64_mca_init: begin\n");
+ INIT_TQUEUE(&cmc_disable_tq, ia64_mca_cmc_vector_disable_keventd, NULL);
+ INIT_TQUEUE(&cmc_enable_tq, ia64_mca_cmc_vector_enable_keventd, NULL);
+
/* initialize recovery success indicator */
ia64_os_mca_recovery_successful = 0;
@@ -834,7 +859,7 @@
irr = ia64_get_irr3();
break;
}
- } while (!(irr & (1 << irr_bit))) ;
+ } while (!(irr & (1UL << irr_bit))) ;
}
/*
@@ -950,6 +975,9 @@
void
ia64_return_to_sal_check(void)
{
+ pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
+ &ia64_sal_to_os_handoff_state.proc_state_param;
+
/* Copy over some relevant stuff from the sal_to_os_mca_handoff
* so that it can be used at the time of os_mca_to_sal_handoff
*/
@@ -959,15 +987,22 @@
ia64_os_to_sal_handoff_state.imots_sal_check_ra =
ia64_sal_to_os_handoff_state.imsto_sal_check_ra;
- /* Cold Boot for uncorrectable MCA */
- ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT;
+ /*
+ * Did we correct the error? At the moment the only error that
+ * we fix is a TLB error, if any other kind of error occurred
+ * we must reboot.
+ */
+ if (psp->cc == 1 && psp->bc == 1 && psp->rc == 1 && psp->uc == 1)
+ ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT;
+ else
+ ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED;
/* Default = tell SAL to return to same context */
ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT;
- /* Register pointer to new min state values */
ia64_os_to_sal_handoff_state.imots_new_min_state =
- ia64_mca_min_state_save_info;
+ (u64 *)ia64_sal_to_os_handoff_state.pal_min_state;
+
}
/*
@@ -1056,14 +1091,7 @@
cmc_polling_enabled = 1;
spin_unlock(&cmc_history_lock);
-
- /*
- * We rely on the local_irq_enable() above so
- * that this can't deadlock.
- */
- ia64_mca_cmc_vector_disable(NULL);
-
- smp_call_function(ia64_mca_cmc_vector_disable, NULL, 1, 0);
+ schedule_task(&cmc_disable_tq);
/*
* Corrected errors will still be corrected, but
@@ -1157,19 +1185,7 @@
if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) {
printk(KERN_WARNING "%s: Returning to interrupt driven CMC handler\n", __FUNCTION__);
-
- /*
- * The cmc interrupt handler enabled irqs, so
- * this can't deadlock.
- */
- smp_call_function(ia64_mca_cmc_vector_enable, NULL, 1, 0);
-
- /*
- * Turn off interrupts before re-enabling the
- * cmc vector locally. Make sure we get out.
- */
- local_irq_disable();
- ia64_mca_cmc_vector_enable(NULL);
+ schedule_task(&cmc_enable_tq);
cmc_polling_enabled = 0;
} else {
@@ -1416,12 +1432,12 @@
* Get the current MCA log from SAL and copy it into the OS log buffer.
*
* Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
- * prfunc (fn ptr of log output function)
* Outputs : size (total record length)
+ * *buffer (ptr to error record)
*
*/
u64
-ia64_log_get(int sal_info_type, prfunc_t prfunc)
+ia64_log_get(int sal_info_type, u8 **buffer)
{
sal_log_record_header_t *log_buffer;
u64 total_len = 0;
@@ -1439,6 +1455,7 @@
IA64_LOG_UNLOCK(sal_info_type);
IA64_MCA_DEBUG("ia64_log_get: SAL error record type %d retrieved. "
"Record length = %ld\n", sal_info_type, total_len);
+ *buffer = (u8 *) log_buffer;
return total_len;
} else {
IA64_LOG_UNLOCK(sal_info_type);
@@ -1483,7 +1500,7 @@
void
ia64_log_rec_header_print (sal_log_record_header_t *lh, prfunc_t prfunc)
{
- prfunc("+Err Record ID: %d SAL Rev: %2x.%02x\n", lh->id,
+ prfunc("+Err Record ID: %ld SAL Rev: %2x.%02x\n", lh->id,
lh->revision.major, lh->revision.minor);
prfunc("+Time: %02x/%02x/%02x%02x %02x:%02x:%02x Severity %d\n",
lh->timestamp.slh_month, lh->timestamp.slh_day,
@@ -1606,13 +1623,13 @@
if (info->dl)
prfunc(" Line: Data,");
prfunc(" Operation: %s,", pal_cache_op[info->op]);
- if (info->wv)
+ if (info->wiv)
prfunc(" Way: %d,", info->way);
if (cache_check_info->valid.target_identifier)
/* Hope target address is saved in target_identifier */
if (info->tv)
prfunc(" Target Addr: 0x%lx,", target_addr);
- if (info->mc)
+ if (info->mcc)
prfunc(" MC: Corrected");
prfunc("\n");
}
@@ -1648,13 +1665,13 @@
prfunc(" Failure: Data Translation Cache");
if (info->itr) {
prfunc(" Failure: Instruction Translation Register");
- prfunc(" ,Slot: %d", info->tr_slot);
+ prfunc(" ,Slot: %ld", info->tr_slot);
}
if (info->dtr) {
prfunc(" Failure: Data Translation Register");
- prfunc(" ,Slot: %d", info->tr_slot);
+ prfunc(" ,Slot: %ld", info->tr_slot);
}
- if (info->mc)
+ if (info->mcc)
prfunc(" ,MC: Corrected");
prfunc("\n");
}
@@ -1700,7 +1717,7 @@
prfunc(" ,Error: Internal");
if (info->eb)
prfunc(" ,Error: External");
- if (info->mc)
+ if (info->mcc)
prfunc(" ,MC: Corrected");
if (info->tv)
prfunc(" ,Target Address: 0x%lx", targ_addr);
@@ -2148,9 +2165,6 @@
if (slpi->valid.psi_static_struct) {
spsi = (sal_processor_static_info_t *)p_data;
- /* copy interrupted context PAL min-state info */
- ia64_process_min_state_save(&spsi->min_state_area);
-
/* Print branch register contents if valid */
if (spsi->valid.br)
ia64_log_processor_regs_print(spsi->br, 8, "Branch", "br",
@@ -2376,7 +2390,8 @@
ia64_log_rec_header_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc);
break;
case SAL_INFO_TYPE_INIT:
- prfunc("+MCA INIT ERROR LOG (UNIMPLEMENTED)\n");
+ prfunc("+CPU %d: SAL log contains INIT error record\n", smp_processor_id());
+ ia64_log_rec_header_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc);
break;
case SAL_INFO_TYPE_CMC:
prfunc("+BEGIN HARDWARE ERROR STATE AT CMC\n");
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)