debuggers.hg

view xen/include/asm-x86/mm.h @ 6680:d0a4f770a5f4

phys_to_mach and mach_to_phys tables contain long entries, not
32-bit entries.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Sep 06 18:01:24 2005 +0000 (2005-09-06)
parents dd668f7527cb
children 8db9c5873b9b
line source
2 #ifndef __ASM_X86_MM_H__
3 #define __ASM_X86_MM_H__
5 #include <xen/config.h>
6 #include <xen/cpumask.h>
7 #include <xen/list.h>
8 #include <asm/io.h>
9 #include <asm/uaccess.h>
11 /*
12 * Per-page-frame information.
13 *
14 * Every architecture must ensure the following:
15 * 1. 'struct pfn_info' contains a 'struct list_head list'.
16 * 2. Provide a PFN_ORDER() macro for accessing the order of a free page.
17 */
18 #define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
20 struct pfn_info
21 {
22 /* Each frame can be threaded onto a doubly-linked list. */
23 struct list_head list;
25 /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
26 u32 tlbflush_timestamp;
28 /* Reference count and various PGC_xxx flags and fields. */
29 u32 count_info;
31 /* Context-dependent fields follow... */
32 union {
34 /* Page is in use: ((count_info & PGC_count_mask) != 0). */
35 struct {
36 /* Owner of this page (NULL if page is anonymous). */
37 u32 _domain; /* pickled format */
38 /* Type reference count and various PGT_xxx flags and fields. */
39 unsigned long type_info;
40 } inuse;
42 /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
43 struct {
44 /* Mask of possibly-tainted TLBs. */
45 cpumask_t cpumask;
46 /* Order-size of the free chunk this page is the head of. */
47 u8 order;
48 } free;
50 } u;
51 };
53 /* The following page types are MUTUALLY EXCLUSIVE. */
54 #define PGT_none (0U<<29) /* no special uses of this page */
55 #define PGT_l1_page_table (1U<<29) /* using this page as an L1 page table? */
56 #define PGT_l2_page_table (2U<<29) /* using this page as an L2 page table? */
57 #define PGT_l3_page_table (3U<<29) /* using this page as an L3 page table? */
58 #define PGT_l4_page_table (4U<<29) /* using this page as an L4 page table? */
59 #define PGT_gdt_page (5U<<29) /* using this page in a GDT? */
60 #define PGT_ldt_page (6U<<29) /* using this page in an LDT? */
61 #define PGT_writable_page (7U<<29) /* has writable mappings of this page? */
63 #define PGT_l1_shadow PGT_l1_page_table
64 #define PGT_l2_shadow PGT_l2_page_table
65 #define PGT_l3_shadow PGT_l3_page_table
66 #define PGT_l4_shadow PGT_l4_page_table
67 #define PGT_hl2_shadow (5U<<29)
68 #define PGT_snapshot (6U<<29)
69 #define PGT_writable_pred (7U<<29) /* predicted gpfn with writable ref */
71 #define PGT_fl1_shadow (5U<<29)
72 #define PGT_type_mask (7U<<29) /* Bits 29-31. */
74 /* Has this page been validated for use as its current type? */
75 #define _PGT_validated 28
76 #define PGT_validated (1U<<_PGT_validated)
77 /* Owning guest has pinned this page to its current type? */
78 #define _PGT_pinned 27
79 #define PGT_pinned (1U<<_PGT_pinned)
80 #if defined(__i386__)
81 /* The 11 most significant bits of virt address if this is a page table. */
82 #define PGT_va_shift 16
83 #define PGT_va_mask (((1U<<11)-1)<<PGT_va_shift)
84 /* Is the back pointer still mutable (i.e. not fixed yet)? */
85 #define PGT_va_mutable (((1U<<11)-1)<<PGT_va_shift)
86 /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
87 #define PGT_va_unknown (((1U<<11)-2)<<PGT_va_shift)
88 #elif defined(__x86_64__)
89 /* The 27 most significant bits of virt address if this is a page table. */
90 #define PGT_va_shift 32
91 #define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
92 /* Is the back pointer still mutable (i.e. not fixed yet)? */
93 #define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
94 /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
95 #define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
96 #endif
98 /* 16-bit count of uses of this frame as its current type. */
99 #define PGT_count_mask ((1U<<16)-1)
101 #define PGT_mfn_mask ((1U<<20)-1) /* mfn mask for shadow types */
103 #define PGT_score_shift 20
104 #define PGT_score_mask (((1U<<4)-1)<<PGT_score_shift)
106 /* Cleared when the owning guest 'frees' this page. */
107 #define _PGC_allocated 31
108 #define PGC_allocated (1U<<_PGC_allocated)
109 /* Set when fullshadow mode marks a page out-of-sync */
110 #define _PGC_out_of_sync 30
111 #define PGC_out_of_sync (1U<<_PGC_out_of_sync)
112 /* Set when fullshadow mode is using a page as a page table */
113 #define _PGC_page_table 29
114 #define PGC_page_table (1U<<_PGC_page_table)
115 /* 29-bit count of references to this frame. */
116 #define PGC_count_mask ((1U<<29)-1)
118 /* We trust the slab allocator in slab.c, and our use of it. */
119 #define PageSlab(page) (1)
120 #define PageSetSlab(page) ((void)0)
121 #define PageClearSlab(page) ((void)0)
123 #define IS_XEN_HEAP_FRAME(_pfn) (page_to_phys(_pfn) < xenheap_phys_end)
125 #if defined(__i386__)
126 #define pickle_domptr(_d) ((u32)(unsigned long)(_d))
127 #define unpickle_domptr(_d) ((struct domain *)(unsigned long)(_d))
128 #define PRtype_info "08lx" /* should only be used for printk's */
129 #elif defined(__x86_64__)
130 static inline struct domain *unpickle_domptr(u32 _domain)
131 { return (_domain == 0) ? NULL : __va(_domain); }
132 static inline u32 pickle_domptr(struct domain *domain)
133 { return (domain == NULL) ? 0 : (u32)__pa(domain); }
134 #define PRtype_info "016lx"/* should only be used for printk's */
135 #endif
137 #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
138 #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
140 #define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \
141 do { \
142 page_set_owner((_pfn), (_dom)); \
143 /* The incremented type count is intended to pin to 'writable'. */ \
144 (_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; \
145 wmb(); /* install valid domain ptr before updating refcnt. */ \
146 spin_lock(&(_dom)->page_alloc_lock); \
147 /* _dom holds an allocation reference */ \
148 ASSERT((_pfn)->count_info == 0); \
149 (_pfn)->count_info |= PGC_allocated | 1; \
150 if ( unlikely((_dom)->xenheap_pages++ == 0) ) \
151 get_knownalive_domain(_dom); \
152 list_add_tail(&(_pfn)->list, &(_dom)->xenpage_list); \
153 spin_unlock(&(_dom)->page_alloc_lock); \
154 } while ( 0 )
156 extern struct pfn_info *frame_table;
157 extern unsigned long max_page;
158 void init_frametable(void);
160 int alloc_page_type(struct pfn_info *page, unsigned long type);
161 void free_page_type(struct pfn_info *page, unsigned long type);
162 extern void invalidate_shadow_ldt(struct vcpu *d);
163 extern int shadow_remove_all_write_access(
164 struct domain *d, unsigned long gpfn, unsigned long gmfn);
165 extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
166 extern int _shadow_mode_refcounts(struct domain *d);
168 static inline void put_page(struct pfn_info *page)
169 {
170 u32 nx, x, y = page->count_info;
172 do {
173 x = y;
174 nx = x - 1;
175 }
176 while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
178 if ( unlikely((nx & PGC_count_mask) == 0) )
179 free_domheap_page(page);
180 }
183 static inline int get_page(struct pfn_info *page,
184 struct domain *domain)
185 {
186 u32 x, nx, y = page->count_info;
187 u32 d, nd = page->u.inuse._domain;
188 u32 _domain = pickle_domptr(domain);
190 do {
191 x = y;
192 nx = x + 1;
193 d = nd;
194 if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
195 unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
196 unlikely(d != _domain) ) /* Wrong owner? */
197 {
198 if ( !_shadow_mode_refcounts(domain) )
199 DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" PRtype_info "\n",
200 page_to_pfn(page), domain, unpickle_domptr(d),
201 x, page->u.inuse.type_info);
202 return 0;
203 }
204 __asm__ __volatile__(
205 LOCK_PREFIX "cmpxchg8b %3"
206 : "=d" (nd), "=a" (y), "=c" (d),
207 "=m" (*(volatile u64 *)(&page->count_info))
208 : "0" (d), "1" (x), "c" (d), "b" (nx) );
209 }
210 while ( unlikely(nd != d) || unlikely(y != x) );
212 return 1;
213 }
215 void put_page_type(struct pfn_info *page);
216 int get_page_type(struct pfn_info *page, unsigned long type);
217 int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
218 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
220 static inline void put_page_and_type(struct pfn_info *page)
221 {
222 put_page_type(page);
223 put_page(page);
224 }
227 static inline int get_page_and_type(struct pfn_info *page,
228 struct domain *domain,
229 unsigned long type)
230 {
231 int rc = get_page(page, domain);
233 if ( likely(rc) && unlikely(!get_page_type(page, type)) )
234 {
235 put_page(page);
236 rc = 0;
237 }
239 return rc;
240 }
242 #define ASSERT_PAGE_IS_TYPE(_p, _t) \
243 ASSERT(((_p)->u.inuse.type_info & PGT_type_mask) == (_t)); \
244 ASSERT(((_p)->u.inuse.type_info & PGT_count_mask) != 0)
245 #define ASSERT_PAGE_IS_DOMAIN(_p, _d) \
246 ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
247 ASSERT(page_get_owner(_p) == (_d))
249 int check_descriptor(struct desc_struct *d);
251 /*
252 * The MPT (machine->physical mapping table) is an array of word-sized
253 * values, indexed on machine frame number. It is expected that guest OSes
254 * will use it to store a "physical" frame number to give the appearance of
255 * contiguous (or near contiguous) physical memory.
256 */
257 #undef machine_to_phys_mapping
258 #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
259 #define INVALID_M2P_ENTRY (~0UL)
260 #define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1))))
261 #define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
263 #define set_pfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
264 #define get_pfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)])
266 /*
267 * The phys_to_machine_mapping is the reversed mapping of MPT for full
268 * virtualization. It is only used by shadow_mode_translate()==true
269 * guests, so we steal the address space that would have normally
270 * been used by the read-only MPT map.
271 */
272 #define phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START)
273 #define INVALID_MFN (~0UL)
274 #define VALID_MFN(_mfn) (!((_mfn) & (1U<<31)))
276 #define set_mfn_from_pfn(pfn, mfn) (phys_to_machine_mapping[(pfn)] = (mfn))
277 static inline unsigned long get_mfn_from_pfn(unsigned long pfn)
278 {
279 unsigned long mfn;
280 l1_pgentry_t pte;
282 if ( (__copy_from_user(&pte, &phys_to_machine_mapping[pfn],
283 sizeof(pte)) == 0) &&
284 (l1e_get_flags(pte) & _PAGE_PRESENT) )
285 mfn = l1e_get_pfn(pte);
286 else
287 mfn = INVALID_MFN;
289 return mfn;
290 }
292 #ifdef MEMORY_GUARD
293 void memguard_init(void);
294 void memguard_guard_range(void *p, unsigned long l);
295 void memguard_unguard_range(void *p, unsigned long l);
296 #else
297 #define memguard_init() ((void)0)
298 #define memguard_guard_range(_p,_l) ((void)0)
299 #define memguard_unguard_range(_p,_l) ((void)0)
300 #endif
302 void memguard_guard_stack(void *p);
304 /* Writable Pagetables */
305 struct ptwr_info {
306 /* Linear address where the guest is updating the p.t. page. */
307 unsigned long l1va;
308 /* Copy of the p.t. page, taken before guest is given write access. */
309 l1_pgentry_t *page;
310 /* A temporary Xen mapping of the actual p.t. page. */
311 l1_pgentry_t *pl1e;
312 /* Index in L2 page table where this L1 p.t. is always hooked. */
313 unsigned int l2_idx; /* NB. Only used for PTWR_PT_ACTIVE. */
314 /* Info about last ptwr update batch. */
315 unsigned int prev_nr_updates;
316 /* Exec domain which created writable mapping. */
317 struct vcpu *vcpu;
318 /* EIP of the address which took the original write fault
319 used for stats collection only */
320 unsigned long eip;
321 };
323 #define PTWR_PT_ACTIVE 0
324 #define PTWR_PT_INACTIVE 1
326 #define PTWR_CLEANUP_ACTIVE 1
327 #define PTWR_CLEANUP_INACTIVE 2
329 int ptwr_init(struct domain *);
330 void ptwr_destroy(struct domain *);
331 void ptwr_flush(struct domain *, const int);
332 int ptwr_do_page_fault(struct domain *, unsigned long,
333 struct cpu_user_regs *);
334 int revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
336 void cleanup_writable_pagetable(struct domain *d);
337 #define sync_pagetable_state(d) cleanup_writable_pagetable(d)
339 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
341 #ifndef NDEBUG
343 #define AUDIT_SHADOW_ALREADY_LOCKED ( 1u << 0 )
344 #define AUDIT_ERRORS_OK ( 1u << 1 )
345 #define AUDIT_QUIET ( 1u << 2 )
347 void _audit_domain(struct domain *d, int flags);
348 #define audit_domain(_d) _audit_domain((_d), AUDIT_ERRORS_OK)
349 void audit_domains(void);
351 #else
353 #define _audit_domain(_d, _f) ((void)0)
354 #define audit_domain(_d) ((void)0)
355 #define audit_domains() ((void)0)
357 #endif
359 #ifdef PERF_ARRAYS
361 void ptwr_eip_stat_reset();
362 void ptwr_eip_stat_print();
364 #else
366 #define ptwr_eip_stat_reset() ((void)0)
367 #define ptwr_eip_stat_print() ((void)0)
369 #endif
371 int new_guest_cr3(unsigned long pfn);
373 void propagate_page_fault(unsigned long addr, u16 error_code);
375 extern int __sync_lazy_execstate(void);
377 /*
378 * Caller must own d's BIGLOCK, is responsible for flushing the TLB, and must
379 * hold a reference to the page.
380 */
381 int update_grant_va_mapping(
382 unsigned long va, l1_pgentry_t _nl1e,
383 struct domain *d, struct vcpu *v);
384 int update_grant_pte_mapping(
385 unsigned long pte_addr, l1_pgentry_t _nl1e,
386 struct domain *d, struct vcpu *v);
387 int clear_grant_va_mapping(unsigned long addr, unsigned long frame);
388 int clear_grant_pte_mapping(
389 unsigned long addr, unsigned long frame, struct domain *d);
391 #endif /* __ASM_X86_MM_H__ */