debuggers.hg

view xen/include/asm-x86/mm.h @ 4629:6375127fdf23

bitkeeper revision 1.1311.1.1 (426641eeBv97w6sl983zxeR4Dc3Utg)

Cleanup page table handling. Add macros to access page table
entries, fixup plenty of places in the code to use the page
table types instead of "unsigned long".

Signed-off-by: Gerd Knorr <kraxel@bytesex.org>
Signed-off-by: michael.fetterman@cl.cam.ac.uk
author mafetter@fleming.research
date Wed Apr 20 11:50:06 2005 +0000 (2005-04-20)
parents 602b1e28aba3
children 1803018b3b05
line source
2 #ifndef __ASM_X86_MM_H__
3 #define __ASM_X86_MM_H__
5 #include <xen/config.h>
6 #include <xen/list.h>
7 #include <asm/io.h>
8 #include <asm/uaccess.h>
10 /*
11 * Per-page-frame information.
12 *
13 * Every architecture must ensure the following:
14 * 1. 'struct pfn_info' contains a 'struct list_head list'.
15 * 2. Provide a PFN_ORDER() macro for accessing the order of a free page.
16 */
17 #define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
19 struct pfn_info
20 {
21 /* Each frame can be threaded onto a doubly-linked list. */
22 struct list_head list;
24 /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
25 u32 tlbflush_timestamp;
27 /* Reference count and various PGC_xxx flags and fields. */
28 u32 count_info;
30 /* Context-dependent fields follow... */
31 union {
33 /* Page is in use: ((count_info & PGC_count_mask) != 0). */
34 struct {
35 /* Owner of this page (NULL if page is anonymous). */
36 u32 _domain; /* pickled format */
37 /* Type reference count and various PGT_xxx flags and fields. */
38 u32 type_info;
39 } PACKED inuse;
41 /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
42 struct {
43 /* Mask of possibly-tainted TLBs. */
44 u32 cpu_mask;
45 /* Order-size of the free chunk this page is the head of. */
46 u8 order;
47 } PACKED free;
49 } PACKED u;
51 } PACKED;
53 /* The following page types are MUTUALLY EXCLUSIVE. */
54 #define PGT_none (0<<29) /* no special uses of this page */
55 #define PGT_l1_page_table (1<<29) /* using this page as an L1 page table? */
56 #define PGT_l2_page_table (2<<29) /* using this page as an L2 page table? */
57 #define PGT_l3_page_table (3<<29) /* using this page as an L3 page table? */
58 #define PGT_l4_page_table (4<<29) /* using this page as an L4 page table? */
59 #define PGT_gdt_page (5<<29) /* using this page in a GDT? */
60 #define PGT_ldt_page (6<<29) /* using this page in an LDT? */
61 #define PGT_writable_page (7<<29) /* has writable mappings of this page? */
63 #define PGT_l1_shadow PGT_l1_page_table
64 #define PGT_l2_shadow PGT_l2_page_table
65 #define PGT_l3_shadow PGT_l3_page_table
66 #define PGT_l4_shadow PGT_l4_page_table
67 #define PGT_hl2_shadow (5<<29)
68 #define PGT_snapshot (6<<29)
69 #define PGT_writable_pred (7<<29) /* predicted gpfn with writable ref */
71 #define PGT_type_mask (7<<29) /* Bits 29-31. */
73 /* Has this page been validated for use as its current type? */
74 #define _PGT_validated 28
75 #define PGT_validated (1U<<_PGT_validated)
76 /* Owning guest has pinned this page to its current type? */
77 #define _PGT_pinned 27
78 #define PGT_pinned (1U<<_PGT_pinned)
79 /* The 10 most significant bits of virt address if this is a page table. */
80 #define PGT_va_shift 17
81 #define PGT_va_mask (((1U<<10)-1)<<PGT_va_shift)
82 /* Is the back pointer still mutable (i.e. not fixed yet)? */
83 #define PGT_va_mutable (((1U<<10)-1)<<PGT_va_shift)
84 /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
85 #define PGT_va_unknown (((1U<<10)-2)<<PGT_va_shift)
86 /* 17-bit count of uses of this frame as its current type. */
87 #define PGT_count_mask ((1U<<17)-1)
89 #define PGT_mfn_mask ((1U<<20)-1) /* mfn mask for shadow types */
91 #define PGT_score_shift 20
92 #define PGT_score_mask (((1U<<4)-1)<<PGT_score_shift)
94 /* Cleared when the owning guest 'frees' this page. */
95 #define _PGC_allocated 31
96 #define PGC_allocated (1U<<_PGC_allocated)
97 /* Set when fullshadow mode marks a page out-of-sync */
98 #define _PGC_out_of_sync 30
99 #define PGC_out_of_sync (1U<<_PGC_out_of_sync)
100 /* Set when fullshadow mode is using a page as a page table */
101 #define _PGC_page_table 29
102 #define PGC_page_table (1U<<_PGC_page_table)
103 /* 29-bit count of references to this frame. */
104 #define PGC_count_mask ((1U<<29)-1)
106 /* We trust the slab allocator in slab.c, and our use of it. */
107 #define PageSlab(page) (1)
108 #define PageSetSlab(page) ((void)0)
109 #define PageClearSlab(page) ((void)0)
111 #define IS_XEN_HEAP_FRAME(_pfn) (page_to_phys(_pfn) < xenheap_phys_end)
113 #if defined(__i386__)
114 #define pickle_domptr(_d) ((u32)(unsigned long)(_d))
115 #define unpickle_domptr(_d) ((struct domain *)(unsigned long)(_d))
116 #elif defined(__x86_64__)
117 static inline struct domain *unpickle_domptr(u32 _domain)
118 { return (_domain == 0) ? NULL : __va(_domain); }
119 static inline u32 pickle_domptr(struct domain *domain)
120 { return (domain == NULL) ? 0 : (u32)__pa(domain); }
121 #endif
123 #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
124 #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
126 #define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \
127 do { \
128 page_set_owner((_pfn), (_dom)); \
129 /* The incremented type count is intended to pin to 'writable'. */ \
130 (_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; \
131 wmb(); /* install valid domain ptr before updating refcnt. */ \
132 spin_lock(&(_dom)->page_alloc_lock); \
133 /* _dom holds an allocation reference */ \
134 ASSERT((_pfn)->count_info == 0); \
135 (_pfn)->count_info |= PGC_allocated | 1; \
136 if ( unlikely((_dom)->xenheap_pages++ == 0) ) \
137 get_knownalive_domain(_dom); \
138 list_add_tail(&(_pfn)->list, &(_dom)->xenpage_list); \
139 spin_unlock(&(_dom)->page_alloc_lock); \
140 } while ( 0 )
142 extern struct pfn_info *frame_table;
143 extern unsigned long frame_table_size;
144 extern unsigned long max_page;
145 void init_frametable(void);
147 int alloc_page_type(struct pfn_info *page, unsigned int type);
148 void free_page_type(struct pfn_info *page, unsigned int type);
149 extern void invalidate_shadow_ldt(struct exec_domain *d);
150 extern int shadow_remove_all_write_access(
151 struct domain *d, unsigned long gpfn, unsigned long gmfn);
152 extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
153 extern int _shadow_mode_enabled(struct domain *d);
155 static inline void put_page(struct pfn_info *page)
156 {
157 u32 nx, x, y = page->count_info;
159 do {
160 x = y;
161 nx = x - 1;
162 }
163 while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
165 if ( unlikely((nx & PGC_count_mask) == 0) )
166 free_domheap_page(page);
167 }
170 static inline int get_page(struct pfn_info *page,
171 struct domain *domain)
172 {
173 u32 x, nx, y = page->count_info;
174 u32 d, nd = page->u.inuse._domain;
175 u32 _domain = pickle_domptr(domain);
177 do {
178 x = y;
179 nx = x + 1;
180 d = nd;
181 if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
182 unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
183 unlikely(d != _domain) ) /* Wrong owner? */
184 {
185 if ( !_shadow_mode_enabled(domain) )
186 DPRINTK("Error pfn %p: rd=%p, od=%p, caf=%08x, taf=%08x\n",
187 page_to_pfn(page), domain, unpickle_domptr(d),
188 x, page->u.inuse.type_info);
189 return 0;
190 }
191 __asm__ __volatile__(
192 LOCK_PREFIX "cmpxchg8b %3"
193 : "=d" (nd), "=a" (y), "=c" (d),
194 "=m" (*(volatile u64 *)(&page->count_info))
195 : "0" (d), "1" (x), "c" (d), "b" (nx) );
196 }
197 while ( unlikely(nd != d) || unlikely(y != x) );
199 return 1;
200 }
202 void put_page_type(struct pfn_info *page);
203 int get_page_type(struct pfn_info *page, u32 type);
204 int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
205 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
207 static inline void put_page_and_type(struct pfn_info *page)
208 {
209 put_page_type(page);
210 put_page(page);
211 }
214 static inline int get_page_and_type(struct pfn_info *page,
215 struct domain *domain,
216 u32 type)
217 {
218 int rc = get_page(page, domain);
220 if ( likely(rc) && unlikely(!get_page_type(page, type)) )
221 {
222 put_page(page);
223 rc = 0;
224 }
226 return rc;
227 }
229 #define ASSERT_PAGE_IS_TYPE(_p, _t) \
230 ASSERT(((_p)->u.inuse.type_info & PGT_type_mask) == (_t)); \
231 ASSERT(((_p)->u.inuse.type_info & PGT_count_mask) != 0)
232 #define ASSERT_PAGE_IS_DOMAIN(_p, _d) \
233 ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
234 ASSERT(page_get_owner(_p) == (_d))
236 int check_descriptor(struct desc_struct *d);
238 /*
239 * The MPT (machine->physical mapping table) is an array of word-sized
240 * values, indexed on machine frame number. It is expected that guest OSes
241 * will use it to store a "physical" frame number to give the appearance of
242 * contiguous (or near contiguous) physical memory.
243 */
244 #undef machine_to_phys_mapping
245 #define machine_to_phys_mapping ((u32 *)RDWR_MPT_VIRT_START)
246 #define INVALID_M2P_ENTRY (~0U)
247 #define VALID_M2P(_e) (!((_e) & (1U<<31)))
248 #define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
250 /*
251 * The phys_to_machine_mapping is the reversed mapping of MPT for full
252 * virtualization. It is only used by shadow_mode_translate()==true
253 * guests, so we steal the address space that would have normally
254 * been used by the read-only MPT map.
255 */
256 #define __phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START)
257 #define INVALID_MFN (~0UL)
258 #define VALID_MFN(_mfn) (!((_mfn) & (1U<<31)))
260 /* Returns the machine physical */
261 static inline unsigned long phys_to_machine_mapping(unsigned long pfn)
262 {
263 unsigned long mfn;
264 l1_pgentry_t pte;
266 if (!__copy_from_user(&pte, (__phys_to_machine_mapping + pfn),
267 sizeof(pte))
268 && (l1e_get_flags(pte) & _PAGE_PRESENT) )
269 mfn = l1e_get_pfn(pte);
270 else
271 mfn = INVALID_MFN;
273 return mfn;
274 }
275 #define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn)
277 #define DEFAULT_GDT_ENTRIES (LAST_RESERVED_GDT_ENTRY+1)
278 #define DEFAULT_GDT_ADDRESS ((unsigned long)gdt_table)
280 #ifdef MEMORY_GUARD
281 void *memguard_init(void *heap_start);
282 void memguard_guard_stack(void *p);
283 void memguard_guard_range(void *p, unsigned long l);
284 void memguard_unguard_range(void *p, unsigned long l);
285 #else
286 #define memguard_init(_s) (_s)
287 #define memguard_guard_stack(_p) ((void)0)
288 #define memguard_guard_range(_p,_l) ((void)0)
289 #define memguard_unguard_range(_p,_l) ((void)0)
290 #endif
292 /* Writable Pagetables */
293 struct ptwr_info {
294 /* Linear address where the guest is updating the p.t. page. */
295 unsigned long l1va;
296 /* Copy of the p.t. page, taken before guest is given write access. */
297 l1_pgentry_t *page;
298 /* A temporary Xen mapping of the actual p.t. page. */
299 l1_pgentry_t *pl1e;
300 /* Index in L2 page table where this L1 p.t. is always hooked. */
301 unsigned int l2_idx; /* NB. Only used for PTWR_PT_ACTIVE. */
302 /* Info about last ptwr update batch. */
303 unsigned int prev_nr_updates;
304 };
306 #define PTWR_PT_ACTIVE 0
307 #define PTWR_PT_INACTIVE 1
309 #define PTWR_CLEANUP_ACTIVE 1
310 #define PTWR_CLEANUP_INACTIVE 2
312 int ptwr_init(struct domain *);
313 void ptwr_destroy(struct domain *);
314 void ptwr_flush(struct domain *, const int);
315 int ptwr_do_page_fault(struct domain *, unsigned long);
317 #define cleanup_writable_pagetable(_d) \
318 do { \
319 if ( unlikely(VM_ASSIST((_d), VMASST_TYPE_writable_pagetables)) ) { \
320 if ( (_d)->arch.ptwr[PTWR_PT_ACTIVE].l1va ) \
321 ptwr_flush((_d), PTWR_PT_ACTIVE); \
322 if ( (_d)->arch.ptwr[PTWR_PT_INACTIVE].l1va ) \
323 ptwr_flush((_d), PTWR_PT_INACTIVE); \
324 } \
325 } while ( 0 )
327 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
329 #ifndef NDEBUG
331 #define AUDIT_ALREADY_LOCKED ( 1u << 0 )
332 #define AUDIT_ERRORS_OK ( 1u << 1 )
333 #define AUDIT_QUIET ( 1u << 2 )
335 void _audit_domain(struct domain *d, int flags);
336 #define audit_domain(_d) _audit_domain((_d), 0)
337 void audit_domains(void);
339 #else
341 #define _audit_domain(_d, _f) ((void)0)
342 #define audit_domain(_d) ((void)0)
343 #define audit_domains() ((void)0)
345 #endif
347 int new_guest_cr3(unsigned long pfn);
349 void propagate_page_fault(unsigned long addr, u16 error_code);
351 /*
352 * Caller must own d's BIGLOCK, is responsible for flushing the TLB, and must
353 * hold a reference to the page.
354 */
355 int update_grant_va_mapping(unsigned long va,
356 l1_pgentry_t _nl1e,
357 struct domain *d,
358 struct exec_domain *ed);
359 #endif /* __ASM_X86_MM_H__ */