From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/traps: Use an Interrupt Stack Table for #DB

PV guests can use architectural corner cases to cause #DB to be raised after
transitioning into supervisor mode.

Use an interrupt stack table for #DB to prevent the exception being taken with
a guest controlled stack pointer.

This is part of XSA-260 / CVE-2018-8897

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 0a452ae..8d1e7be 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -757,6 +757,7 @@ void load_system_tables(void)
 			[IST_MCE - 1] = stack_top + IST_MCE * PAGE_SIZE,
 			[IST_DF  - 1] = stack_top + IST_DF  * PAGE_SIZE,
 			[IST_NMI - 1] = stack_top + IST_NMI * PAGE_SIZE,
+			[IST_DB  - 1] = stack_top + IST_DB  * PAGE_SIZE,
 
 			[IST_MAX ... ARRAY_SIZE(tss->ist) - 1] =
 				0x8600111111111111ul,
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 3d4ac59..819a31c 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -353,13 +353,13 @@ static void show_guest_stack(struct vcpu *v, const struct cpu_user_regs *regs)
 /*
  * Notes for get_stack_trace_bottom() and get_stack_dump_bottom()
  *
- * Stack pages 0, 1 and 2:
+ * Stack pages 0 - 3:
  *   These are all 1-page IST stacks.  Each of these stacks have an exception
  *   frame and saved register state at the top.  The interesting bound for a
  *   trace is the word adjacent to this, while the bound for a dump is the
  *   very top, including the exception frame.
  *
- * Stack pages 3, 4 and 5:
+ * Stack pages 4 and 5:
  *   None of these are particularly interesting.  With MEMORY_GUARD, page 5 is
  *   explicitly not present, so attempting to dump or trace it is
  *   counterproductive.  Without MEMORY_GUARD, it is possible for a call chain
@@ -380,12 +380,12 @@ unsigned long get_stack_trace_bottom(unsigned long sp)
 {
     switch ( get_stack_page(sp) )
     {
-    case 0 ... 2:
+    case 0 ... 3:
         return ROUNDUP(sp, PAGE_SIZE) -
             offsetof(struct cpu_user_regs, es) - sizeof(unsigned long);
 
 #ifndef MEMORY_GUARD
-    case 3 ... 5:
+    case 4 ... 5:
 #endif
     case 6 ... 7:
         return ROUNDUP(sp, STACK_SIZE) -
@@ -400,11 +400,11 @@ unsigned long get_stack_dump_bottom(unsigned long sp)
 {
     switch ( get_stack_page(sp) )
     {
-    case 0 ... 2:
+    case 0 ... 3:
         return ROUNDUP(sp, PAGE_SIZE) - sizeof(unsigned long);
 
 #ifndef MEMORY_GUARD
-    case 3 ... 5:
+    case 4 ... 5:
 #endif
     case 6 ... 7:
         return ROUNDUP(sp, STACK_SIZE) - sizeof(unsigned long);
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index ccece08..6ae4d6f 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -723,7 +723,7 @@ ENTRY(device_not_available)
 ENTRY(debug)
         pushq $0
         movl  $TRAP_debug,4(%rsp)
-        jmp   handle_exception
+        jmp   handle_ist_exception
 
 ENTRY(int3)
         pushq $0
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index db9988a..6e0e935 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -404,7 +404,8 @@ struct __packed __cacheline_aligned tss_struct {
 #define IST_DF   1UL
 #define IST_NMI  2UL
 #define IST_MCE  3UL
-#define IST_MAX  3UL
+#define IST_DB   4UL
+#define IST_MAX  4UL
 
 /* Set the interrupt stack table used by a particular interrupt
  * descriptor table entry. */
@@ -423,6 +424,7 @@ static inline void enable_each_ist(idt_entry_t *idt)
     set_ist(&idt[TRAP_double_fault],  IST_DF);
     set_ist(&idt[TRAP_nmi],           IST_NMI);
     set_ist(&idt[TRAP_machine_check], IST_MCE);
+    set_ist(&idt[TRAP_debug],         IST_DB);
 }
 
 static inline void disable_each_ist(idt_entry_t *idt)
@@ -430,6 +432,7 @@ static inline void disable_each_ist(idt_entry_t *idt)
     set_ist(&idt[TRAP_double_fault],  IST_NONE);
     set_ist(&idt[TRAP_nmi],           IST_NONE);
     set_ist(&idt[TRAP_machine_check], IST_NONE);
+    set_ist(&idt[TRAP_debug],         IST_NONE);
 }
 
 #define IDT_ENTRIES 256
