debuggers.hg

view xen/include/asm-x86/domain.h @ 22855:1d1eec7e1fb4

xl: Perform minimal validation of virtual disk file while parsing config file

This patch performs some very basic validation on the virtual disk
file passed through the config file. This validation ensures that we
don't go too far with the initialization like spawn qemu and more
while there could be some potentially fundamental issues.

[ Patch fixed up to work with PHYSTYPE_EMPTY 22808:6ec61438713a -iwj ]

Signed-off-by: Kamala Narasimhan <kamala.narasimhan@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
author Kamala Narasimhan <kamala.narasimhan@gmail.com>
date Tue Jan 25 18:09:49 2011 +0000 (2011-01-25)
parents 6663214f06ac
children
line source
1 #ifndef __ASM_DOMAIN_H__
2 #define __ASM_DOMAIN_H__
4 #include <xen/config.h>
5 #include <xen/mm.h>
6 #include <asm/hvm/vcpu.h>
7 #include <asm/hvm/domain.h>
8 #include <asm/e820.h>
9 #include <asm/mce.h>
10 #include <public/vcpu.h>
12 #define has_32bit_shinfo(d) ((d)->arch.has_32bit_shinfo)
13 #define is_pv_32bit_domain(d) ((d)->arch.is_32bit_pv)
14 #define is_pv_32bit_vcpu(v) (is_pv_32bit_domain((v)->domain))
15 #ifdef __x86_64__
16 #define is_pv_32on64_domain(d) (is_pv_32bit_domain(d))
17 #else
18 #define is_pv_32on64_domain(d) (0)
19 #endif
20 #define is_pv_32on64_vcpu(v) (is_pv_32on64_domain((v)->domain))
22 #define is_hvm_pv_evtchn_domain(d) (is_hvm_domain(d) && \
23 d->arch.hvm_domain.irq.callback_via_type == HVMIRQ_callback_vector)
24 #define is_hvm_pv_evtchn_vcpu(v) (is_hvm_pv_evtchn_domain(v->domain))
26 #define VCPU_TRAP_NMI 1
27 #define VCPU_TRAP_MCE 2
28 #define VCPU_TRAP_LAST VCPU_TRAP_MCE
30 #define nmi_state async_exception_state(VCPU_TRAP_NMI)
31 #define mce_state async_exception_state(VCPU_TRAP_MCE)
33 #define nmi_pending nmi_state.pending
34 #define mce_pending mce_state.pending
36 struct trap_bounce {
37 uint32_t error_code;
38 uint8_t flags; /* TBF_ */
39 uint16_t cs;
40 unsigned long eip;
41 };
43 #define MAPHASH_ENTRIES 8
44 #define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
45 #define MAPHASHENT_NOTINUSE ((u16)~0U)
46 struct mapcache_vcpu {
47 /* Shadow of mapcache_domain.epoch. */
48 unsigned int shadow_epoch;
50 /* Lock-free per-VCPU hash of recently-used mappings. */
51 struct vcpu_maphash_entry {
52 unsigned long mfn;
53 uint16_t idx;
54 uint16_t refcnt;
55 } hash[MAPHASH_ENTRIES];
56 };
58 #define MAPCACHE_ORDER 10
59 #define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
60 struct mapcache_domain {
61 /* The PTEs that provide the mappings, and a cursor into the array. */
62 l1_pgentry_t *l1tab;
63 unsigned int cursor;
65 /* Protects map_domain_page(). */
66 spinlock_t lock;
68 /* Garbage mappings are flushed from TLBs in batches called 'epochs'. */
69 unsigned int epoch;
70 u32 tlbflush_timestamp;
72 /* Which mappings are in use, and which are garbage to reap next epoch? */
73 unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
74 unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
75 };
77 void mapcache_domain_init(struct domain *);
78 void mapcache_vcpu_init(struct vcpu *);
80 /* x86/64: toggle guest between kernel and user modes. */
81 void toggle_guest_mode(struct vcpu *);
83 /*
84 * Initialise a hypercall-transfer page. The given pointer must be mapped
85 * in Xen virtual address space (accesses are not validated or checked).
86 */
87 void hypercall_page_initialise(struct domain *d, void *);
89 /************************************************/
90 /* shadow paging extension */
91 /************************************************/
92 struct shadow_domain {
93 spinlock_t lock; /* shadow domain lock */
94 int locker; /* processor which holds the lock */
95 const char *locker_function; /* Func that took it */
96 unsigned int opt_flags; /* runtime tunable optimizations on/off */
97 struct page_list_head pinned_shadows;
99 /* Memory allocation */
100 struct page_list_head freelist;
101 unsigned int total_pages; /* number of pages allocated */
102 unsigned int free_pages; /* number of pages on freelists */
103 unsigned int p2m_pages; /* number of pages allocates to p2m */
105 /* 1-to-1 map for use when HVM vcpus have paging disabled */
106 pagetable_t unpaged_pagetable;
108 /* Shadow hashtable */
109 struct page_info **hash_table;
110 int hash_walking; /* Some function is walking the hash table */
112 /* Fast MMIO path heuristic */
113 int has_fast_mmio_entries;
115 /* reflect guest table dirty status, incremented by write
116 * emulation and remove write permission
117 */
118 atomic_t gtable_dirty_version;
120 /* OOS */
121 int oos_active;
122 int oos_off;
124 int pagetable_dying_op;
125 };
127 struct shadow_vcpu {
128 #if CONFIG_PAGING_LEVELS >= 3
129 /* PAE guests: per-vcpu shadow top-level table */
130 l3_pgentry_t l3table[4] __attribute__((__aligned__(32)));
131 /* PAE guests: per-vcpu cache of the top-level *guest* entries */
132 l3_pgentry_t gl3e[4] __attribute__((__aligned__(32)));
133 #endif
134 /* Non-PAE guests: pointer to guest top-level pagetable */
135 void *guest_vtable;
136 /* Last MFN that we emulated a write to as unshadow heuristics. */
137 unsigned long last_emulated_mfn_for_unshadow;
138 /* MFN of the last shadow that we shot a writeable mapping in */
139 unsigned long last_writeable_pte_smfn;
140 /* Last frame number that we emulated a write to. */
141 unsigned long last_emulated_frame;
142 /* Last MFN that we emulated a write successfully */
143 unsigned long last_emulated_mfn;
145 /* Shadow out-of-sync: pages that this vcpu has let go out of sync */
146 mfn_t oos[SHADOW_OOS_PAGES];
147 mfn_t oos_snapshot[SHADOW_OOS_PAGES];
148 struct oos_fixup {
149 int next;
150 mfn_t smfn[SHADOW_OOS_FIXUPS];
151 unsigned long off[SHADOW_OOS_FIXUPS];
152 } oos_fixup[SHADOW_OOS_PAGES];
154 int pagetable_dying;
155 };
157 /************************************************/
158 /* hardware assisted paging */
159 /************************************************/
160 struct hap_domain {
161 spinlock_t lock;
162 int locker;
163 const char *locker_function;
165 struct page_list_head freelist;
166 unsigned int total_pages; /* number of pages allocated */
167 unsigned int free_pages; /* number of pages on freelists */
168 unsigned int p2m_pages; /* number of pages allocates to p2m */
169 };
171 /************************************************/
172 /* common paging data structure */
173 /************************************************/
174 struct log_dirty_domain {
175 /* log-dirty lock */
176 spinlock_t lock;
177 int locker; /* processor that holds the lock */
178 const char *locker_function; /* func that took it */
180 /* log-dirty radix tree to record dirty pages */
181 mfn_t top;
182 unsigned int allocs;
183 unsigned int failed_allocs;
185 /* log-dirty mode stats */
186 unsigned int fault_count;
187 unsigned int dirty_count;
189 /* functions which are paging mode specific */
190 int (*enable_log_dirty )(struct domain *d);
191 int (*disable_log_dirty )(struct domain *d);
192 void (*clean_dirty_bitmap )(struct domain *d);
193 };
195 struct paging_domain {
196 /* flags to control paging operation */
197 u32 mode;
198 /* extension for shadow paging support */
199 struct shadow_domain shadow;
200 /* extension for hardware-assited paging */
201 struct hap_domain hap;
202 /* log dirty support */
203 struct log_dirty_domain log_dirty;
204 /* alloc/free pages from the pool for paging-assistance structures
205 * (used by p2m and log-dirty code for their tries) */
206 struct page_info * (*alloc_page)(struct domain *d);
207 void (*free_page)(struct domain *d, struct page_info *pg);
208 };
210 struct paging_vcpu {
211 /* Pointers to mode-specific entry points. */
212 const struct paging_mode *mode;
213 /* HVM guest: last emulate was to a pagetable */
214 unsigned int last_write_was_pt:1;
215 /* HVM guest: last write emulation succeeds */
216 unsigned int last_write_emul_ok:1;
217 /* Translated guest: virtual TLB */
218 struct shadow_vtlb *vtlb;
219 spinlock_t vtlb_lock;
221 /* paging support extension */
222 struct shadow_vcpu shadow;
223 };
225 #define MAX_CPUID_INPUT 40
226 typedef xen_domctl_cpuid_t cpuid_input_t;
228 struct p2m_domain;
229 struct time_scale {
230 int shift;
231 u32 mul_frac;
232 };
234 struct arch_domain
235 {
236 #ifdef CONFIG_X86_64
237 struct page_info **mm_perdomain_pt_pages;
238 l2_pgentry_t *mm_perdomain_l2;
239 l3_pgentry_t *mm_perdomain_l3;
241 unsigned int hv_compat_vstart;
242 #else
243 l1_pgentry_t *mm_perdomain_pt;
245 /* map_domain_page() mapping cache. */
246 struct mapcache_domain mapcache;
247 #endif
249 bool_t s3_integrity;
251 /* I/O-port admin-specified access capabilities. */
252 struct rangeset *ioport_caps;
253 uint32_t pci_cf8;
255 struct list_head pdev_list;
256 struct hvm_domain hvm_domain;
258 struct paging_domain paging;
259 struct p2m_domain *p2m;
261 /* NB. protected by d->event_lock and by irq_desc[irq].lock */
262 int *irq_pirq;
263 int *pirq_irq;
264 /* pirq to emulated irq and vice versa */
265 int *emuirq_pirq;
266 int *pirq_emuirq;
268 /* Shared page for notifying that explicit PIRQ EOI is required. */
269 unsigned long *pirq_eoi_map;
270 unsigned long pirq_eoi_map_mfn;
272 /* Pseudophysical e820 map (XENMEM_memory_map). */
273 struct e820entry e820[3];
274 unsigned int nr_e820;
276 /* Maximum physical-address bitwidth supported by this guest. */
277 unsigned int physaddr_bitsize;
279 /* Is a 32-bit PV (non-HVM) guest? */
280 bool_t is_32bit_pv;
281 /* Is shared-info page in 32-bit format? */
282 bool_t has_32bit_shinfo;
283 /* Domain cannot handle spurious page faults? */
284 bool_t suppress_spurious_page_faults;
286 /* Continuable domain_relinquish_resources(). */
287 enum {
288 RELMEM_not_started,
289 RELMEM_xen,
290 RELMEM_l4,
291 RELMEM_l3,
292 RELMEM_l2,
293 RELMEM_done,
294 } relmem;
295 struct page_list_head relmem_list;
297 cpuid_input_t cpuids[MAX_CPUID_INPUT];
299 /* For Guest vMCA handling */
300 struct domain_mca_msrs *vmca_msrs;
302 /* TSC management (emulation, pv, scaling, stats) */
303 int tsc_mode; /* see include/asm-x86/time.h */
304 bool_t vtsc; /* tsc is emulated (may change after migrate) */
305 s_time_t vtsc_last; /* previous TSC value (guarantee monotonicity) */
306 spinlock_t vtsc_lock;
307 uint64_t vtsc_offset; /* adjustment for save/restore/migrate */
308 uint32_t tsc_khz; /* cached khz for certain emulated cases */
309 struct time_scale vtsc_to_ns; /* scaling for certain emulated cases */
310 struct time_scale ns_to_vtsc; /* scaling for certain emulated cases */
311 uint32_t incarnation; /* incremented every restore or live migrate
312 (possibly other cases in the future */
313 uint64_t vtsc_kerncount; /* for hvm, counts all vtsc */
314 uint64_t vtsc_usercount; /* not used for hvm */
315 } __cacheline_aligned;
317 #define has_arch_pdevs(d) (!list_empty(&(d)->arch.pdev_list))
318 #define has_arch_mmios(d) (!rangeset_is_empty((d)->iomem_caps))
320 #ifdef CONFIG_X86_64
321 #define perdomain_pt_pgidx(v) \
322 ((v)->vcpu_id >> (PAGETABLE_ORDER - GDT_LDT_VCPU_SHIFT))
323 #define perdomain_ptes(d, v) \
324 ((l1_pgentry_t *)page_to_virt((d)->arch.mm_perdomain_pt_pages \
325 [perdomain_pt_pgidx(v)]) + (((v)->vcpu_id << GDT_LDT_VCPU_SHIFT) & \
326 (L1_PAGETABLE_ENTRIES - 1)))
327 #define perdomain_pt_page(d, n) ((d)->arch.mm_perdomain_pt_pages[n])
328 #else
329 #define perdomain_ptes(d, v) \
330 ((d)->arch.mm_perdomain_pt + ((v)->vcpu_id << GDT_LDT_VCPU_SHIFT))
331 #define perdomain_pt_page(d, n) \
332 (virt_to_page((d)->arch.mm_perdomain_pt) + (n))
333 #endif
336 #ifdef __i386__
337 struct pae_l3_cache {
338 /*
339 * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
340 * supplies a >=4GB PAE L3 table. We need two because we cannot set up
341 * an L3 table while we are currently running on it (without using
342 * expensive atomic 64-bit operations).
343 */
344 l3_pgentry_t table[2][4] __attribute__((__aligned__(32)));
345 unsigned long high_mfn; /* The >=4GB MFN being shadowed. */
346 unsigned int inuse_idx; /* Which of the two cache slots is in use? */
347 spinlock_t lock;
348 };
349 #define pae_l3_cache_init(c) spin_lock_init(&(c)->lock)
350 #else /* !defined(__i386__) */
351 struct pae_l3_cache { };
352 #define pae_l3_cache_init(c) ((void)0)
353 #endif
355 struct arch_vcpu
356 {
357 /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */
358 struct vcpu_guest_context guest_context
359 __attribute__((__aligned__(16)));
361 struct pae_l3_cache pae_l3_cache;
363 unsigned long flags; /* TF_ */
365 void (*schedule_tail) (struct vcpu *);
367 void (*ctxt_switch_from) (struct vcpu *);
368 void (*ctxt_switch_to) (struct vcpu *);
370 /* Bounce information for propagating an exception to guest OS. */
371 struct trap_bounce trap_bounce;
373 /* I/O-port access bitmap. */
374 XEN_GUEST_HANDLE(uint8) iobmp; /* Guest kernel vaddr of the bitmap. */
375 int iobmp_limit; /* Number of ports represented in the bitmap. */
376 int iopl; /* Current IOPL for this VCPU. */
378 #ifdef CONFIG_X86_32
379 struct desc_struct int80_desc;
380 #endif
381 #ifdef CONFIG_X86_64
382 struct trap_bounce int80_bounce;
383 unsigned long syscall32_callback_eip;
384 unsigned long sysenter_callback_eip;
385 unsigned short syscall32_callback_cs;
386 unsigned short sysenter_callback_cs;
387 bool_t syscall32_disables_events;
388 bool_t sysenter_disables_events;
389 #endif
391 /* Virtual Machine Extensions */
392 struct hvm_vcpu hvm_vcpu;
394 /*
395 * Every domain has a L1 pagetable of its own. Per-domain mappings
396 * are put in this table (eg. the current GDT is mapped here).
397 */
398 l1_pgentry_t *perdomain_ptes;
400 #ifdef CONFIG_X86_64
401 pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */
402 #endif
403 pagetable_t guest_table; /* (MFN) guest notion of cr3 */
404 /* guest_table holds a ref to the page, and also a type-count unless
405 * shadow refcounts are in use */
406 pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */
407 pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */
408 unsigned long cr3; /* (MA) value to install in HW CR3 */
410 /*
411 * The save area for Processor Extended States and the bitmask of the
412 * XSAVE/XRSTOR features. They are used by: 1) when a vcpu (which has
413 * dirtied FPU/SSE) is scheduled out we XSAVE the states here; 2) in
414 * #NM handler, we XRSTOR the states we XSAVE-ed;
415 */
416 void *xsave_area;
417 uint64_t xcr0;
418 /* Accumulated eXtended features mask for using XSAVE/XRESTORE by Xen
419 * itself, as we can never know whether guest OS depends on content
420 * preservation whenever guest OS clears one feature flag (for example,
421 * temporarily).
422 * However, processor should not be able to touch eXtended states before
423 * it explicitly enables it via xcr0.
424 */
425 uint64_t xcr0_accum;
427 /* Current LDT details. */
428 unsigned long shadow_ldt_mapcnt;
429 spinlock_t shadow_ldt_lock;
431 struct paging_vcpu paging;
433 /* Guest-specified relocation of vcpu_info. */
434 unsigned long vcpu_info_mfn;
436 #ifdef CONFIG_X86_32
437 /* map_domain_page() mapping cache. */
438 struct mapcache_vcpu mapcache;
439 #endif
441 uint32_t gdbsx_vcpu_event;
443 /* A secondary copy of the vcpu time info. */
444 XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
446 #ifdef CONFIG_COMPAT
447 void *compat_arg_xlat;
448 #endif
450 } __cacheline_aligned;
452 /* Shorthands to improve code legibility. */
453 #define hvm_vmx hvm_vcpu.u.vmx
454 #define hvm_svm hvm_vcpu.u.svm
456 void vcpu_show_execution_state(struct vcpu *);
457 void vcpu_show_registers(const struct vcpu *);
459 /* Clean up CR4 bits that are not under guest control. */
460 unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
462 /* Convert between guest-visible and real CR4 values. */
463 #define pv_guest_cr4_to_real_cr4(v) \
464 (((v)->arch.guest_context.ctrlreg[4] \
465 | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE)) \
466 | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0) \
467 | ((cpu_has_xsave)? X86_CR4_OSXSAVE : 0)) \
468 & ~X86_CR4_DE)
469 #define real_cr4_to_pv_guest_cr4(c) \
470 ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | X86_CR4_OSXSAVE))
472 void domain_cpuid(struct domain *d,
473 unsigned int input,
474 unsigned int sub_input,
475 unsigned int *eax,
476 unsigned int *ebx,
477 unsigned int *ecx,
478 unsigned int *edx);
480 #endif /* __ASM_DOMAIN_H__ */
482 /*
483 * Local variables:
484 * mode: C
485 * c-set-style: "BSD"
486 * c-basic-offset: 4
487 * tab-width: 4
488 * indent-tabs-mode: nil
489 * End:
490 */