debuggers.hg

view xen/include/asm-x86/domain.h @ 17092:03d13b696027

Provide fast write emulation path to release shadow lock.

Basically we can consider shadow fault logic into two parts,
with 1st part to cover logistic work like validating guest
page table or fix shadow table, and the 2nd part for write
emulation.

However there's one scenario we can optimize to skip the
1st part. For previous successfully emulated virtual frame,
it's very likely approaching at write emulation logic again
if next adjacent shadow fault is hitting same virtual frame.
It's wasteful to re-walk 1st part which is already covered
by last shadow fault. In this case, actually we can jump to
emulation code early, without any lock acquisition until
final shadow validation for write emulation. By perfc counts
on 64bit SMP HVM guest, 89% of total shadow write emulation
are observed falling into this fast path when doing kernel
build in guest.

Signed-off-by Kevin Tian <kevin.tian@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Feb 15 12:33:11 2008 +0000 (2008-02-15)
parents ad0f20f5590a
children 8612d3d9578a
line source
1 #ifndef __ASM_DOMAIN_H__
2 #define __ASM_DOMAIN_H__
4 #include <xen/config.h>
5 #include <xen/mm.h>
6 #include <asm/hvm/vcpu.h>
7 #include <asm/hvm/domain.h>
8 #include <asm/e820.h>
10 #define has_32bit_shinfo(d) ((d)->arch.has_32bit_shinfo)
11 #define is_pv_32bit_domain(d) ((d)->arch.is_32bit_pv)
12 #define is_pv_32bit_vcpu(v) (is_pv_32bit_domain((v)->domain))
13 #ifdef __x86_64__
14 #define is_pv_32on64_domain(d) (is_pv_32bit_domain(d))
15 #else
16 #define is_pv_32on64_domain(d) (0)
17 #endif
18 #define is_pv_32on64_vcpu(v) (is_pv_32on64_domain((v)->domain))
19 #define IS_COMPAT(d) (is_pv_32on64_domain(d))
21 struct trap_bounce {
22 uint32_t error_code;
23 uint8_t flags; /* TBF_ */
24 uint16_t cs;
25 unsigned long eip;
26 };
28 #define MAPHASH_ENTRIES 8
29 #define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
30 #define MAPHASHENT_NOTINUSE ((u16)~0U)
31 struct mapcache_vcpu {
32 /* Shadow of mapcache_domain.epoch. */
33 unsigned int shadow_epoch;
35 /* Lock-free per-VCPU hash of recently-used mappings. */
36 struct vcpu_maphash_entry {
37 unsigned long mfn;
38 uint16_t idx;
39 uint16_t refcnt;
40 } hash[MAPHASH_ENTRIES];
41 };
43 #define MAPCACHE_ORDER 10
44 #define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
45 struct mapcache_domain {
46 /* The PTEs that provide the mappings, and a cursor into the array. */
47 l1_pgentry_t *l1tab;
48 unsigned int cursor;
50 /* Protects map_domain_page(). */
51 spinlock_t lock;
53 /* Garbage mappings are flushed from TLBs in batches called 'epochs'. */
54 unsigned int epoch;
55 u32 tlbflush_timestamp;
57 /* Which mappings are in use, and which are garbage to reap next epoch? */
58 unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
59 unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
60 };
62 void mapcache_domain_init(struct domain *);
63 void mapcache_vcpu_init(struct vcpu *);
65 /* x86/64: toggle guest between kernel and user modes. */
66 void toggle_guest_mode(struct vcpu *);
68 /*
69 * Initialise a hypercall-transfer page. The given pointer must be mapped
70 * in Xen virtual address space (accesses are not validated or checked).
71 */
72 void hypercall_page_initialise(struct domain *d, void *);
74 /************************************************/
75 /* shadow paging extension */
76 /************************************************/
77 struct shadow_domain {
78 spinlock_t lock; /* shadow domain lock */
79 int locker; /* processor which holds the lock */
80 const char *locker_function; /* Func that took it */
81 unsigned int opt_flags; /* runtime tunable optimizations on/off */
82 struct list_head pinned_shadows;
84 /* Memory allocation */
85 struct list_head freelists[SHADOW_MAX_ORDER + 1];
86 struct list_head p2m_freelist;
87 unsigned int total_pages; /* number of pages allocated */
88 unsigned int free_pages; /* number of pages on freelists */
89 unsigned int p2m_pages; /* number of pages allocates to p2m */
91 /* 1-to-1 map for use when HVM vcpus have paging disabled */
92 pagetable_t unpaged_pagetable;
94 /* Shadow hashtable */
95 struct shadow_page_info **hash_table;
96 int hash_walking; /* Some function is walking the hash table */
98 /* Fast MMIO path heuristic */
99 int has_fast_mmio_entries;
100 };
102 struct shadow_vcpu {
103 #if CONFIG_PAGING_LEVELS >= 3
104 /* PAE guests: per-vcpu shadow top-level table */
105 l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
106 /* PAE guests: per-vcpu cache of the top-level *guest* entries */
107 l3_pgentry_t gl3e[4] __attribute__((__aligned__(32)));
108 #endif
109 /* Non-PAE guests: pointer to guest top-level pagetable */
110 void *guest_vtable;
111 /* Last MFN that we emulated a write to as unshadow heuristics. */
112 unsigned long last_emulated_mfn_for_unshadow;
113 /* MFN of the last shadow that we shot a writeable mapping in */
114 unsigned long last_writeable_pte_smfn;
115 /* Last frame number that we emulated a write to. */
116 unsigned long last_emulated_frame;
117 /* Last MFN that we emulated a write successfully */
118 unsigned long last_emulated_mfn;
119 };
121 /************************************************/
122 /* hardware assisted paging */
123 /************************************************/
124 struct hap_domain {
125 spinlock_t lock;
126 int locker;
127 const char *locker_function;
129 struct list_head freelist;
130 unsigned int total_pages; /* number of pages allocated */
131 unsigned int free_pages; /* number of pages on freelists */
132 unsigned int p2m_pages; /* number of pages allocates to p2m */
133 };
135 /************************************************/
136 /* p2m handling */
137 /************************************************/
138 struct p2m_domain {
139 /* Lock that protects updates to the p2m */
140 spinlock_t lock;
141 int locker; /* processor which holds the lock */
142 const char *locker_function; /* Func that took it */
144 /* Pages used to construct the p2m */
145 struct list_head pages;
147 /* Functions to call to get or free pages for the p2m */
148 struct page_info * (*alloc_page )(struct domain *d);
149 void (*free_page )(struct domain *d,
150 struct page_info *pg);
152 /* Highest guest frame that's ever been mapped in the p2m */
153 unsigned long max_mapped_pfn;
154 };
156 /************************************************/
157 /* common paging data structure */
158 /************************************************/
159 struct log_dirty_domain {
160 /* log-dirty lock */
161 spinlock_t lock;
162 int locker; /* processor that holds the lock */
163 const char *locker_function; /* func that took it */
165 /* log-dirty radix tree to record dirty pages */
166 mfn_t top;
167 unsigned int allocs;
168 unsigned int failed_allocs;
170 /* log-dirty mode stats */
171 unsigned int fault_count;
172 unsigned int dirty_count;
174 /* functions which are paging mode specific */
175 int (*enable_log_dirty )(struct domain *d);
176 int (*disable_log_dirty )(struct domain *d);
177 void (*clean_dirty_bitmap )(struct domain *d);
178 };
180 struct paging_domain {
181 /* flags to control paging operation */
182 u32 mode;
183 /* extension for shadow paging support */
184 struct shadow_domain shadow;
185 /* extension for hardware-assited paging */
186 struct hap_domain hap;
187 /* log dirty support */
188 struct log_dirty_domain log_dirty;
189 };
191 struct paging_vcpu {
192 /* Pointers to mode-specific entry points. */
193 struct paging_mode *mode;
194 /* HVM guest: last emulate was to a pagetable */
195 unsigned int last_write_was_pt:1;
196 /* HVM guest: last write emulation succeeds */
197 unsigned int last_write_emul_ok:1;
198 /* Translated guest: virtual TLB */
199 struct shadow_vtlb *vtlb;
200 spinlock_t vtlb_lock;
202 /* paging support extension */
203 struct shadow_vcpu shadow;
204 };
206 struct arch_domain
207 {
208 l1_pgentry_t *mm_perdomain_pt;
209 #ifdef CONFIG_X86_64
210 l2_pgentry_t *mm_perdomain_l2;
211 l3_pgentry_t *mm_perdomain_l3;
212 #endif
214 #ifdef CONFIG_X86_32
215 /* map_domain_page() mapping cache. */
216 struct mapcache_domain mapcache;
217 #endif
219 #ifdef CONFIG_COMPAT
220 unsigned int hv_compat_vstart;
221 l3_pgentry_t *mm_arg_xlat_l3;
222 #endif
224 /* I/O-port admin-specified access capabilities. */
225 struct rangeset *ioport_caps;
227 struct hvm_domain hvm_domain;
229 struct paging_domain paging;
230 struct p2m_domain p2m ;
232 /* Shadow translated domain: P2M mapping */
233 pagetable_t phys_table;
235 /* Pseudophysical e820 map (XENMEM_memory_map). */
236 struct e820entry e820[3];
237 unsigned int nr_e820;
239 /* Maximum physical-address bitwidth supported by this guest. */
240 unsigned int physaddr_bitsize;
242 /* Is a 32-bit PV (non-HVM) guest? */
243 bool_t is_32bit_pv;
244 /* Is shared-info page in 32-bit format? */
245 bool_t has_32bit_shinfo;
247 /* Continuable domain_relinquish_resources(). */
248 enum {
249 RELMEM_not_started,
250 RELMEM_xen_l4,
251 RELMEM_dom_l4,
252 RELMEM_xen_l3,
253 RELMEM_dom_l3,
254 RELMEM_xen_l2,
255 RELMEM_dom_l2,
256 RELMEM_done,
257 } relmem;
258 struct list_head relmem_list;
259 } __cacheline_aligned;
261 #ifdef CONFIG_X86_PAE
262 struct pae_l3_cache {
263 /*
264 * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
265 * supplies a >=4GB PAE L3 table. We need two because we cannot set up
266 * an L3 table while we are currently running on it (without using
267 * expensive atomic 64-bit operations).
268 */
269 l3_pgentry_t table[2][4] __attribute__((__aligned__(32)));
270 unsigned long high_mfn; /* The >=4GB MFN being shadowed. */
271 unsigned int inuse_idx; /* Which of the two cache slots is in use? */
272 spinlock_t lock;
273 };
274 #define pae_l3_cache_init(c) spin_lock_init(&(c)->lock)
275 #else /* !CONFIG_X86_PAE */
276 struct pae_l3_cache { };
277 #define pae_l3_cache_init(c) ((void)0)
278 #endif
280 struct arch_vcpu
281 {
282 /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */
283 struct vcpu_guest_context guest_context
284 __attribute__((__aligned__(16)));
286 struct pae_l3_cache pae_l3_cache;
288 unsigned long flags; /* TF_ */
290 void (*schedule_tail) (struct vcpu *);
292 void (*ctxt_switch_from) (struct vcpu *);
293 void (*ctxt_switch_to) (struct vcpu *);
295 /* Record information required to continue execution after migration */
296 void *continue_info;
298 /* Bounce information for propagating an exception to guest OS. */
299 struct trap_bounce trap_bounce;
301 /* I/O-port access bitmap. */
302 XEN_GUEST_HANDLE(uint8) iobmp; /* Guest kernel vaddr of the bitmap. */
303 int iobmp_limit; /* Number of ports represented in the bitmap. */
304 int iopl; /* Current IOPL for this VCPU. */
306 #ifdef CONFIG_X86_32
307 struct desc_struct int80_desc;
308 #endif
309 #ifdef CONFIG_X86_64
310 struct trap_bounce int80_bounce;
311 unsigned long syscall32_callback_eip;
312 unsigned long sysenter_callback_eip;
313 unsigned short syscall32_callback_cs;
314 unsigned short sysenter_callback_cs;
315 bool_t syscall32_disables_events;
316 bool_t sysenter_disables_events;
317 #endif
319 /* Virtual Machine Extensions */
320 struct hvm_vcpu hvm_vcpu;
322 /*
323 * Every domain has a L1 pagetable of its own. Per-domain mappings
324 * are put in this table (eg. the current GDT is mapped here).
325 */
326 l1_pgentry_t *perdomain_ptes;
328 #ifdef CONFIG_X86_64
329 pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */
330 #endif
331 pagetable_t guest_table; /* (MFN) guest notion of cr3 */
332 /* guest_table holds a ref to the page, and also a type-count unless
333 * shadow refcounts are in use */
334 pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */
335 pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */
336 unsigned long cr3; /* (MA) value to install in HW CR3 */
338 /* Current LDT details. */
339 unsigned long shadow_ldt_mapcnt;
341 struct paging_vcpu paging;
343 /* Guest-specified relocation of vcpu_info. */
344 unsigned long vcpu_info_mfn;
346 #ifdef CONFIG_X86_32
347 /* map_domain_page() mapping cache. */
348 struct mapcache_vcpu mapcache;
349 #endif
351 } __cacheline_aligned;
353 /* Shorthands to improve code legibility. */
354 #define hvm_vmx hvm_vcpu.u.vmx
355 #define hvm_svm hvm_vcpu.u.svm
357 /* Continue the current hypercall via func(data) on specified cpu. */
358 int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data);
360 /* Clean up CR4 bits that are not under guest control. */
361 unsigned long pv_guest_cr4_fixup(unsigned long guest_cr4);
363 /* Convert between guest-visible and real CR4 values. */
364 #define pv_guest_cr4_to_real_cr4(c) \
365 (((c) | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE))) & ~X86_CR4_DE)
366 #define real_cr4_to_pv_guest_cr4(c) \
367 ((c) & ~(X86_CR4_PGE | X86_CR4_PSE))
369 #endif /* __ASM_DOMAIN_H__ */
371 /*
372 * Local variables:
373 * mode: C
374 * c-set-style: "BSD"
375 * c-basic-offset: 4
376 * tab-width: 4
377 * indent-tabs-mode: nil
378 * End:
379 */