From: Jan Beulich <jbeulich@suse.com>
Subject: x86: don't store possibly stale TLB flush time stamp

While the timing window is extremely narrow, it is theoretically
possible for an update to the TLB flush clock and a subsequent flush
IPI to happen between the read and write parts of the update of the
per-page stamp. Exclude this possibility by disabling interrupts
across the update, preventing the IPI to be serviced in the middle.

This is XSA-241.

Reported-by: Jann Horn <jannh@google.com>
Suggested-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
---
v3: Fix ARM build issue.
v2: Rebase on top of staging + (237-240)

--- a/xen/arch/arm/smp.c
+++ b/xen/arch/arm/smp.c
@@ -1,3 +1,4 @@
+#include <xen/mm.h>
 #include <asm/system.h>
 #include <asm/smp.h>
 #include <asm/cpregs.h>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -488,7 +488,7 @@ void update_cr3(struct vcpu *v)
     make_cr3(v, cr3_mfn);
 }
 
-static inline void page_set_tlbflush_timestamp(struct page_info *page)
+static inline void set_tlbflush_timestamp(struct page_info *page)
 {
     /*
      * Record TLB information for flush later. We do not stamp page tables
@@ -499,7 +499,7 @@ static inline void page_set_tlbflush_timestamp(struct page_info *page)
      */
     if ( !(page->count_info & PGC_page_table) ||
          !shadow_mode_enabled(page_get_owner(page)) )
-        page->tlbflush_timestamp = tlbflush_current_time();
+        page_set_tlbflush_timestamp(page);
 }
 
 const char __section(".bss.page_aligned.const") __aligned(PAGE_SIZE)
@@ -2232,7 +2232,7 @@ static int _put_final_page_type(struct page_info *page, unsigned long type,
             dec_linear_entries(ptpg);
         }
         ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying);
-        page_set_tlbflush_timestamp(page);
+        set_tlbflush_timestamp(page);
         smp_wmb();
         page->u.inuse.type_info--;
     }
@@ -2240,7 +2240,7 @@ static int _put_final_page_type(struct page_info *page, unsigned long type,
     {
         ASSERT((page->u.inuse.type_info &
                 (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
-        page_set_tlbflush_timestamp(page);
+        set_tlbflush_timestamp(page);
         smp_wmb();
         page->u.inuse.type_info |= PGT_validated;
     }
@@ -2294,7 +2294,7 @@ static int _put_page_type(struct page_info *page, bool preemptible,
             if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) )
             {
                 /*
-                 * page_set_tlbflush_timestamp() accesses the same union
+                 * set_tlbflush_timestamp() accesses the same union
                  * linear_pt_count lives in. Unvalidated page table pages,
                  * however, should occur during domain destruction only
                  * anyway.  Updating of linear_pt_count luckily is not
@@ -2306,7 +2306,7 @@ static int _put_page_type(struct page_info *page, bool preemptible,
                 ptpg = NULL;
             }
 
-            page_set_tlbflush_timestamp(page);
+            set_tlbflush_timestamp(page);
         }
 
         if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 86186cccdf..7b16ed985d 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1464,7 +1464,7 @@ void shadow_free(struct domain *d, mfn_t smfn)
          * TLBs when we reuse the page.  Because the destructors leave the
          * contents of the pages in place, we can delay TLB flushes until
          * just before the allocator hands the page out again. */
-        sp->tlbflush_timestamp = tlbflush_current_time();
+        page_set_tlbflush_timestamp(sp);
         perfc_decr(shadow_alloc_count);
         page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
         sp = next;
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 0b9f6cc6df..0d6e53ccfa 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1358,7 +1358,7 @@ static void free_heap_pages(
         /* If a page has no owner it will need no safety TLB flush. */
         pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
         if ( pg[i].u.free.need_tlbflush )
-            pg[i].tlbflush_timestamp = tlbflush_current_time();
+            page_set_tlbflush_timestamp(&pg[i]);
 
         /* This page is not a guest frame any more. */
         page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
diff --git a/xen/include/asm-arm/flushtlb.h b/xen/include/asm-arm/flushtlb.h
index a8e8a05363..83ff9fa8b3 100644
--- a/xen/include/asm-arm/flushtlb.h
+++ b/xen/include/asm-arm/flushtlb.h
@@ -12,6 +12,11 @@ static inline void tlbflush_filter(cpumask_t *mask, uint32_t page_timestamp) {}
 
 #define tlbflush_current_time()                 (0)
 
+static inline void page_set_tlbflush_timestamp(struct page_info *page)
+{
+    page->tlbflush_timestamp = tlbflush_current_time();
+}
+
 #if defined(CONFIG_ARM_32)
 # include <asm/arm32/flushtlb.h>
 #elif defined(CONFIG_ARM_64)
diff --git a/xen/include/asm-x86/flushtlb.h b/xen/include/asm-x86/flushtlb.h
index 131b31cfcc..413db692e1 100644
--- a/xen/include/asm-x86/flushtlb.h
+++ b/xen/include/asm-x86/flushtlb.h
@@ -23,6 +23,20 @@ DECLARE_PER_CPU(u32, tlbflush_time);
 
 #define tlbflush_current_time() tlbflush_clock
 
+static inline void page_set_tlbflush_timestamp(struct page_info *page)
+{
+    /*
+     * Prevent storing a stale time stamp, which could happen if an update
+     * to tlbflush_clock plus a subsequent flush IPI happen between the
+     * reading of tlbflush_clock and the writing of the struct page_info
+     * field.
+     */
+    ASSERT(local_irq_is_enabled());
+    local_irq_disable();
+    page->tlbflush_timestamp = tlbflush_current_time();
+    local_irq_enable();
+}
+
 /*
  * @cpu_stamp is the timestamp at last TLB flush for the CPU we are testing.
  * @lastuse_stamp is a timestamp taken when the PFN we are testing was last 
-- 
2.14.1

