[PATCH] kexec uses incorrect nmi handler on x86/x86_64 V2

Don Zickus dzickus at redhat.com
Tue May 2 21:16:10 CEST 2006


I have modified the priority to be the highest on the die chain as per the
discussion.  For some reason I misread the code originally and thought
zero was the higher priority.  

I also corrected a vector number for send_IPI_allbutself().  Originally I
posted DIE_NMI_IPI but then later realized that wasn't the vector number
the function was looking for.  Surprisingly it passed my tests??  

Cheers,
Don

-------------- next part --------------
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index 2b0cfce..a1df61b 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -23,6 +23,7 @@ #include <asm/nmi.h>
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
 #include <mach_ipi.h>
+#include <asm/kdebug.h>
 
 
 /* This keeps a track of which one is crashing cpu. */
@@ -93,31 +94,47 @@ static void crash_save_self(struct pt_re
 #ifdef CONFIG_SMP
 static atomic_t waiting_for_crash_ipi;
 
-static int crash_nmi_callback(struct pt_regs *regs, int cpu)
+int crash_nmi_exceptions_notify(struct notifier_block *self,
+				unsigned long val, void *data)
 {
+	struct die_args *args = (struct die_args *)data;
 	struct pt_regs fixed_regs;
-
-	/* Don't do anything if this handler is invoked on crashing cpu.
-	 * Otherwise, system will completely hang. Crashing cpu can get
-	 * an NMI if system was initially booted with nmi_watchdog parameter.
-	 */
-	if (cpu == crashing_cpu)
-		return 1;
-	local_irq_disable();
-
-	if (!user_mode_vm(regs)) {
-		crash_fixup_ss_esp(&fixed_regs, regs);
-		regs = &fixed_regs;
+	int cpu = smp_processor_id();
+	int ret = NOTIFY_DONE;
+	
+	switch(val) {
+	case DIE_NMI_IPI:
+		/*
+		 * Don't do anything if this handler is invoked on crashing cpu.
+		 * Otherwise, system will completely hang. Crashing cpu can get
+		 * an NMI if system was initially booted with nmi_watchdog parameter.
+		 */
+		if (cpu == crashing_cpu)
+			return NOTIFY_STOP;
+		local_irq_disable();
+
+		if (!user_mode_vm(args->regs)) {
+			crash_fixup_ss_esp(&fixed_regs, args->regs);
+			args->regs = &fixed_regs;
+		}
+		crash_save_this_cpu(args->regs, cpu);
+		disable_local_APIC();
+		atomic_dec(&waiting_for_crash_ipi);
+		/* Assume hlt works */
+		halt();
+		for(;;);
+		break;
+	default:
+		break;
 	}
-	crash_save_this_cpu(regs, cpu);
-	disable_local_APIC();
-	atomic_dec(&waiting_for_crash_ipi);
-	/* Assume hlt works */
-	halt();
-	for(;;);
-
-	return 1;
+	return ret;
 }
+						
+static struct notifier_block crash_nmi_exceptions_nb = {
+	.notifier_call = crash_nmi_exceptions_notify,
+	.next = NULL,
+	.priority = INT_MAX
+};
 
 /*
  * By using the NMI code instead of a vector we just sneak thru the
@@ -135,7 +152,7 @@ static void nmi_shootdown_cpus(void)
 
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
 	/* Would it be better to replace the trap vector here? */
-	set_nmi_callback(crash_nmi_callback);
+	register_die_notifier(&crash_nmi_exceptions_nb);
 	/* Ensure the new callback function is set before sending
 	 * out the NMI
 	 */
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index 4e6c3b7..5e01cc4 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -23,6 +23,7 @@ #include <asm/hardirq.h>
 #include <asm/nmi.h>
 #include <asm/hw_irq.h>
 #include <asm/mach_apic.h>
+#include <asm/kdebug.h>
 
 /* This keeps a track of which one is crashing cpu. */
 static int crashing_cpu;
@@ -95,30 +96,46 @@ static void crash_save_self(struct pt_re
 #ifdef CONFIG_SMP
 static atomic_t waiting_for_crash_ipi;
 
-static int crash_nmi_callback(struct pt_regs *regs, int cpu)
+int crash_nmi_exceptions_notify(struct notifier_block *self,
+				unsigned long val, void *data)
 {
-	/*
-	 * Don't do anything if this handler is invoked on crashing cpu.
-	 * Otherwise, system will completely hang. Crashing cpu can get
-	 * an NMI if system was initially booted with nmi_watchdog parameter.
-	 */
-	if (cpu == crashing_cpu)
-		return 1;
-	local_irq_disable();
-
-	crash_save_this_cpu(regs, cpu);
-	disable_local_APIC();
-	atomic_dec(&waiting_for_crash_ipi);
-	/* Assume hlt works */
-	for(;;)
-		asm("hlt");
-
-	return 1;
+	struct die_args *args = (struct die_args *)data;
+	int cpu = smp_processor_id();
+	int ret = NOTIFY_DONE;
+	
+	switch(val) {
+	case DIE_NMI_IPI:
+		/*
+		 * Don't do anything if this handler is invoked on crashing cpu.
+		 * Otherwise, system will completely hang. Crashing cpu can get
+		 * an NMI if system was initially booted with nmi_watchdog parameter.
+		 */
+		if (cpu == crashing_cpu)
+			return NOTIFY_STOP;
+		local_irq_disable();
+
+		crash_save_this_cpu(args->regs, cpu);
+		disable_local_APIC();
+		atomic_dec(&waiting_for_crash_ipi);
+		/* Assume hlt works */
+		for(;;)
+			asm("hlt");
+		break;
+	default:
+		break;
+	}
+	return ret;
 }
+						
+static struct notifier_block crash_nmi_exceptions_nb = {
+	.notifier_call = crash_nmi_exceptions_notify,
+	.next = NULL,
+	.priority = INT_MAX
+};
 
 static void smp_send_nmi_allbutself(void)
 {
-	send_IPI_allbutself(APIC_DM_NMI);
+	send_IPI_allbutself(NMI_VECTOR);
 }
 
 /*
@@ -132,7 +149,7 @@ static void nmi_shootdown_cpus(void)
 	unsigned long msecs;
 
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
-	set_nmi_callback(crash_nmi_callback);
+	register_die_notifier(&crash_nmi_exceptions_nb);
 
 	/*
 	 * Ensure the new callback function is set before sending


More information about the discuss mailing list