From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/tsx: Implement controls for RTM force-abort mode

The CPUID bit and MSR are deliberately not exposed to guests, because they
won't exist on newer processors.  As vPMU isn't security supported, the
misbehaviour of PCR3 isn't expected to impact production deployments.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: 6be613f29b4205349275d24367bd4c82fb2960dd
master date: 2019-03-12 17:05:21 +0000

diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index 1c826ef..0bf6852 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1781,7 +1781,7 @@ Use Virtual Processor ID support if available.  This prevents the need for TLB
 flushes on VM entry and exit, increasing performance.
 
 ### vpmu
-> `= ( <boolean> | { bts | ipc | arch [, ...] } )`
+> `= ( <boolean> | { bts | ipc | arch | rtm-abort=<bool> [, ...] } )`
 
 > Default: `off`
 
@@ -1807,6 +1807,21 @@ in the Pre-Defined Architectural Performance Events table from the Intel 64
 and IA-32 Architectures Software Developer's Manual, Volume 3B, System
 Programming Guide, Part 2.
 
+vpmu=rtm-abort controls a trade-off between working Restricted Transactional
+Memory, and working performance counters.
+
+All processors released to date (Q1 2019) supporting Transactional Memory
+Extensions suffer an erratum which has been addressed in microcode.
+
+Processors based on the Skylake microarchitecture with up-to-date
+microcode internally use performance counter 3 to work around the erratum.
+A consequence is that the counter gets reprogrammed whenever an `XBEGIN`
+instruction is executed.
+
+An alternative mode exists where PCR3 behaves as before, at the cost of
+`XBEGIN` unconditionally aborting.  Enabling `rtm-abort` mode will
+activate this alternative mode.
+
 If a boolean is not used, combinations of flags are allowed, comma separated.
 For example, vpmu=arch,bts.
 
diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
index 184f8ad..288fc48 100644
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -149,7 +149,11 @@ static const char *str_e8b[32] =
 
 static const char *str_7d0[32] =
 {
-    [0 ... 25] = "REZ",
+    [0 ... 11] = "REZ",
+
+    [12] = "REZ",           [13] = "tsx-force-abort",
+
+    [14 ... 25] = "REZ",
 
     [26] = "ibrsb",         [27] = "stibp",
     [28] = "l1d_flush",     [29] = "arch_caps",
diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
index fe54720..a9a8ec5 100644
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -356,6 +356,9 @@ static void Intel_errata_workarounds(struct cpuinfo_x86 *c)
 	if (c->x86 == 6 && cpu_has_clflush &&
 	    (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
 		__set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
+
+	if (cpu_has_tsx_force_abort && opt_rtm_abort)
+		wrmsrl(MSR_TSX_FORCE_ABORT, TSX_FORCE_ABORT_RTM);
 }
 
 
diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
index 2f9ddf6..91cb43e 100644
--- a/xen/arch/x86/cpu/vpmu.c
+++ b/xen/arch/x86/cpu/vpmu.c
@@ -53,6 +53,7 @@ CHECK_pmu_params;
 static unsigned int __read_mostly opt_vpmu_enabled;
 unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
 unsigned int __read_mostly vpmu_features = 0;
+bool __read_mostly opt_rtm_abort;
 static void parse_vpmu_params(char *s);
 custom_param("vpmu", parse_vpmu_params);
 
@@ -63,6 +64,8 @@ static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
 
 static int parse_vpmu_param(char *s, unsigned int len)
 {
+    int val;
+
     if ( !*s || !len )
         return 0;
     if ( !strncmp(s, "bts", len) )
@@ -71,6 +74,8 @@ static int parse_vpmu_param(char *s, unsigned int len)
         vpmu_features |= XENPMU_FEATURE_IPC_ONLY;
     else if ( !strncmp(s, "arch", len) )
         vpmu_features |= XENPMU_FEATURE_ARCH_ONLY;
+    else if ( (val = parse_boolean("rtm-abort", s, s + len)) >= 0 )
+        opt_rtm_abort = val;
     else
         return 1;
     return 0;
@@ -97,6 +102,10 @@ static void __init parse_vpmu_params(char *s)
                 break;
             p = sep + 1;
         }
+
+        if ( !vpmu_features ) /* rtm-abort doesn't imply vpmu=1 */
+            break;
+
         /* fall through */
     case 1:
         /* Default VPMU mode */
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index ca64f6a..9582950 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3936,6 +3936,8 @@ int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
     case MSR_PRED_CMD:
     case MSR_FLUSH_CMD:
         /* Write-only */
+    case MSR_TSX_FORCE_ABORT:
+        /* Not offered to guests. */
         goto gp_fault;
 
     case MSR_SPEC_CTRL:
@@ -4179,6 +4181,8 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
 
     case MSR_ARCH_CAPABILITIES:
         /* Read-only */
+    case MSR_TSX_FORCE_ABORT:
+        /* Not offered to guests. */
         goto gp_fault;
 
     case MSR_AMD64_NB_CFG:
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 75a1dbf..404bdce 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2517,6 +2517,8 @@ static int priv_op_read_msr(unsigned int reg, uint64_t *val,
     case MSR_PRED_CMD:
     case MSR_FLUSH_CMD:
         /* Write-only */
+    case MSR_TSX_FORCE_ABORT:
+        /* Not offered to guests. */
         break;
 
     case MSR_SPEC_CTRL:
@@ -2745,6 +2747,8 @@ static int priv_op_write_msr(unsigned int reg, uint64_t val,
     case MSR_INTEL_PLATFORM_INFO:
     case MSR_ARCH_CAPABILITIES:
         /* The MSR is read-only. */
+    case MSR_TSX_FORCE_ABORT:
+        /* Not offered to guests. */
         break;
 
     case MSR_SPEC_CTRL:
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 89ff249..370e89e 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -93,6 +93,7 @@ XEN_CPUFEATURE(XEN_LBR,         (FSCAPINTS+0)*32+24) /* Xen uses MSR_DEBUGCTL.LB
 #define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
 #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
 #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
+#define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
 #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)
 #define cpu_has_svm		boot_cpu_has(X86_FEATURE_SVM)
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index 7bb382f..29ece6a 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -53,6 +53,9 @@
 #define MSR_FLUSH_CMD			0x0000010b
 #define FLUSH_CMD_L1D			(_AC(1, ULL) << 0)
 
+#define MSR_TSX_FORCE_ABORT             0x0000010f
+#define TSX_FORCE_ABORT_RTM             (_AC(1, ULL) <<  0)
+
 /* Intel MSRs. Some also available on other CPUs */
 #define MSR_IA32_PERFCTR0		0x000000c1
 #define MSR_IA32_A_PERFCTR0		0x000004c1
diff --git a/xen/include/asm-x86/vpmu.h b/xen/include/asm-x86/vpmu.h
index 75b1973..37f521f 100644
--- a/xen/include/asm-x86/vpmu.h
+++ b/xen/include/asm-x86/vpmu.h
@@ -127,6 +127,7 @@ static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
 
 extern unsigned int vpmu_mode;
 extern unsigned int vpmu_features;
+extern bool opt_rtm_abort;
 
 /* Context switch */
 static inline void vpmu_switch_from(struct vcpu *prev)
diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
index 5eedd07..ed6fbfc 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -236,6 +236,7 @@ XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */
 XEN_CPUFEATURE(IBPB,          8*32+12) /*A  IBPB support only (no IBRS, used by AMD) */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
+XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
 XEN_CPUFEATURE(IBRSB,         9*32+26) /*A  IBRS and IBPB support (used by Intel) */
 XEN_CPUFEATURE(STIBP,         9*32+27) /*A! STIBP */
 XEN_CPUFEATURE(L1D_FLUSH,     9*32+28) /*S  MSR_FLUSH_CMD and L1D flush. */
