/root/src/xen/xen/arch/x86/pv/ro-page-fault.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * arch/x86/pv/ro-page-fault.c |
3 | | * |
4 | | * Read-only page fault emulation for PV guests |
5 | | * |
6 | | * Copyright (c) 2002-2005 K A Fraser |
7 | | * Copyright (c) 2004 Christian Limpach |
8 | | * |
9 | | * This program is free software; you can redistribute it and/or modify |
10 | | * it under the terms of the GNU General Public License as published by |
11 | | * the Free Software Foundation; either version 2 of the License, or |
12 | | * (at your option) any later version. |
13 | | * |
14 | | * This program is distributed in the hope that it will be useful, |
15 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | * GNU General Public License for more details. |
18 | | * |
19 | | * You should have received a copy of the GNU General Public License |
20 | | * along with this program; If not, see <http://www.gnu.org/licenses/>. |
21 | | */ |
22 | | |
23 | | #include <xen/guest_access.h> |
24 | | #include <xen/rangeset.h> |
25 | | #include <xen/sched.h> |
26 | | #include <xen/trace.h> |
27 | | |
28 | | #include <asm/domain.h> |
29 | | #include <asm/mm.h> |
30 | | #include <asm/pci.h> |
31 | | #include <asm/pv/mm.h> |
32 | | |
33 | | #include "emulate.h" |
34 | | #include "mm.h" |
35 | | |
36 | | /* Override macros from asm/page.h to make them work with mfn_t */ |
37 | | #undef mfn_to_page |
38 | 0 | #define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn)) |
39 | | #undef page_to_mfn |
40 | | #define page_to_mfn(pg) _mfn(__page_to_mfn(pg)) |
41 | | |
42 | | /********************* |
43 | | * Writable Pagetables |
44 | | */ |
45 | | |
46 | | struct ptwr_emulate_ctxt { |
47 | | unsigned long cr2; |
48 | | l1_pgentry_t pte; |
49 | | }; |
50 | | |
51 | | static int ptwr_emulated_read(enum x86_segment seg, unsigned long offset, |
52 | | void *p_data, unsigned int bytes, |
53 | | struct x86_emulate_ctxt *ctxt) |
54 | 0 | { |
55 | 0 | unsigned int rc = bytes; |
56 | 0 | unsigned long addr = offset; |
57 | 0 |
|
58 | 0 | if ( !__addr_ok(addr) || |
59 | 0 | (rc = __copy_from_user(p_data, (void *)addr, bytes)) ) |
60 | 0 | { |
61 | 0 | x86_emul_pagefault(0, addr + bytes - rc, ctxt); /* Read fault. */ |
62 | 0 | return X86EMUL_EXCEPTION; |
63 | 0 | } |
64 | 0 |
|
65 | 0 | return X86EMUL_OKAY; |
66 | 0 | } |
67 | | |
68 | | static int ptwr_emulated_update(unsigned long addr, paddr_t old, paddr_t val, |
69 | | unsigned int bytes, unsigned int do_cmpxchg, |
70 | | struct x86_emulate_ctxt *ctxt) |
71 | 0 | { |
72 | 0 | unsigned long mfn; |
73 | 0 | unsigned long unaligned_addr = addr; |
74 | 0 | struct page_info *page; |
75 | 0 | l1_pgentry_t pte, ol1e, nl1e, *pl1e; |
76 | 0 | struct vcpu *v = current; |
77 | 0 | struct domain *d = v->domain; |
78 | 0 | struct ptwr_emulate_ctxt *ptwr_ctxt = ctxt->data; |
79 | 0 | int ret; |
80 | 0 |
|
81 | 0 | /* Only allow naturally-aligned stores within the original %cr2 page. */ |
82 | 0 | if ( unlikely(((addr ^ ptwr_ctxt->cr2) & PAGE_MASK) || |
83 | 0 | (addr & (bytes - 1))) ) |
84 | 0 | { |
85 | 0 | gdprintk(XENLOG_WARNING, "bad access (cr2=%lx, addr=%lx, bytes=%u)\n", |
86 | 0 | ptwr_ctxt->cr2, addr, bytes); |
87 | 0 | return X86EMUL_UNHANDLEABLE; |
88 | 0 | } |
89 | 0 |
|
90 | 0 | /* Turn a sub-word access into a full-word access. */ |
91 | 0 | if ( bytes != sizeof(paddr_t) ) |
92 | 0 | { |
93 | 0 | paddr_t full; |
94 | 0 | unsigned int rc, offset = addr & (sizeof(paddr_t) - 1); |
95 | 0 |
|
96 | 0 | /* Align address; read full word. */ |
97 | 0 | addr &= ~(sizeof(paddr_t) - 1); |
98 | 0 | if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 ) |
99 | 0 | { |
100 | 0 | x86_emul_pagefault(0, /* Read fault. */ |
101 | 0 | addr + sizeof(paddr_t) - rc, |
102 | 0 | ctxt); |
103 | 0 | return X86EMUL_EXCEPTION; |
104 | 0 | } |
105 | 0 | /* Mask out bits provided by caller. */ |
106 | 0 | full &= ~((((paddr_t)1 << (bytes * 8)) - 1) << (offset * 8)); |
107 | 0 | /* Shift the caller value and OR in the missing bits. */ |
108 | 0 | val &= (((paddr_t)1 << (bytes * 8)) - 1); |
109 | 0 | val <<= (offset) * 8; |
110 | 0 | val |= full; |
111 | 0 | /* Also fill in missing parts of the cmpxchg old value. */ |
112 | 0 | old &= (((paddr_t)1 << (bytes * 8)) - 1); |
113 | 0 | old <<= (offset) * 8; |
114 | 0 | old |= full; |
115 | 0 | } |
116 | 0 |
|
117 | 0 | pte = ptwr_ctxt->pte; |
118 | 0 | mfn = l1e_get_pfn(pte); |
119 | 0 | page = mfn_to_page(_mfn(mfn)); |
120 | 0 |
|
121 | 0 | /* We are looking only for read-only mappings of p.t. pages. */ |
122 | 0 | ASSERT((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) == _PAGE_PRESENT); |
123 | 0 | ASSERT(mfn_valid(_mfn(mfn))); |
124 | 0 | ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table); |
125 | 0 | ASSERT((page->u.inuse.type_info & PGT_count_mask) != 0); |
126 | 0 | ASSERT(page_get_owner(page) == d); |
127 | 0 |
|
128 | 0 | /* Check the new PTE. */ |
129 | 0 | nl1e = l1e_from_intpte(val); |
130 | 0 | switch ( ret = get_page_from_l1e(nl1e, d, d) ) |
131 | 0 | { |
132 | 0 | default: |
133 | 0 | if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) && |
134 | 0 | !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) |
135 | 0 | { |
136 | 0 | /* |
137 | 0 | * If this is an upper-half write to a PAE PTE then we assume that |
138 | 0 | * the guest has simply got the two writes the wrong way round. We |
139 | 0 | * zap the PRESENT bit on the assumption that the bottom half will |
140 | 0 | * be written immediately after we return to the guest. |
141 | 0 | */ |
142 | 0 | gdprintk(XENLOG_DEBUG, "ptwr_emulate: fixing up invalid PAE PTE %" |
143 | 0 | PRIpte"\n", l1e_get_intpte(nl1e)); |
144 | 0 | l1e_remove_flags(nl1e, _PAGE_PRESENT); |
145 | 0 | } |
146 | 0 | else |
147 | 0 | { |
148 | 0 | gdprintk(XENLOG_WARNING, "could not get_page_from_l1e()\n"); |
149 | 0 | return X86EMUL_UNHANDLEABLE; |
150 | 0 | } |
151 | 0 | break; |
152 | 0 | case 0: |
153 | 0 | break; |
154 | 0 | case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: |
155 | 0 | ASSERT(!(ret & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); |
156 | 0 | l1e_flip_flags(nl1e, ret); |
157 | 0 | break; |
158 | 0 | } |
159 | 0 |
|
160 | 0 | nl1e = adjust_guest_l1e(nl1e, d); |
161 | 0 |
|
162 | 0 | /* Checked successfully: do the update (write or cmpxchg). */ |
163 | 0 | pl1e = map_domain_page(_mfn(mfn)); |
164 | 0 | pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK)); |
165 | 0 | if ( do_cmpxchg ) |
166 | 0 | { |
167 | 0 | bool okay; |
168 | 0 | intpte_t t = old; |
169 | 0 |
|
170 | 0 | ol1e = l1e_from_intpte(old); |
171 | 0 | okay = paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e), |
172 | 0 | &t, l1e_get_intpte(nl1e), _mfn(mfn)); |
173 | 0 | okay = (okay && t == old); |
174 | 0 |
|
175 | 0 | if ( !okay ) |
176 | 0 | { |
177 | 0 | unmap_domain_page(pl1e); |
178 | 0 | put_page_from_l1e(nl1e, d); |
179 | 0 | return X86EMUL_RETRY; |
180 | 0 | } |
181 | 0 | } |
182 | 0 | else |
183 | 0 | { |
184 | 0 | ol1e = *pl1e; |
185 | 0 | if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, mfn, v, 0) ) |
186 | 0 | BUG(); |
187 | 0 | } |
188 | 0 |
|
189 | 0 | trace_ptwr_emulation(addr, nl1e); |
190 | 0 |
|
191 | 0 | unmap_domain_page(pl1e); |
192 | 0 |
|
193 | 0 | /* Finally, drop the old PTE. */ |
194 | 0 | put_page_from_l1e(ol1e, d); |
195 | 0 |
|
196 | 0 | return X86EMUL_OKAY; |
197 | 0 | } |
198 | | |
199 | | static int ptwr_emulated_write(enum x86_segment seg, unsigned long offset, |
200 | | void *p_data, unsigned int bytes, |
201 | | struct x86_emulate_ctxt *ctxt) |
202 | 0 | { |
203 | 0 | paddr_t val = 0; |
204 | 0 |
|
205 | 0 | if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes - 1)) || !bytes ) |
206 | 0 | { |
207 | 0 | gdprintk(XENLOG_WARNING, "bad write size (addr=%lx, bytes=%u)\n", |
208 | 0 | offset, bytes); |
209 | 0 | return X86EMUL_UNHANDLEABLE; |
210 | 0 | } |
211 | 0 |
|
212 | 0 | memcpy(&val, p_data, bytes); |
213 | 0 |
|
214 | 0 | return ptwr_emulated_update(offset, 0, val, bytes, 0, ctxt); |
215 | 0 | } |
216 | | |
217 | | static int ptwr_emulated_cmpxchg(enum x86_segment seg, unsigned long offset, |
218 | | void *p_old, void *p_new, unsigned int bytes, |
219 | | struct x86_emulate_ctxt *ctxt) |
220 | 0 | { |
221 | 0 | paddr_t old = 0, new = 0; |
222 | 0 |
|
223 | 0 | if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes - 1)) ) |
224 | 0 | { |
225 | 0 | gdprintk(XENLOG_WARNING, "bad cmpxchg size (addr=%lx, bytes=%u)\n", |
226 | 0 | offset, bytes); |
227 | 0 | return X86EMUL_UNHANDLEABLE; |
228 | 0 | } |
229 | 0 |
|
230 | 0 | memcpy(&old, p_old, bytes); |
231 | 0 | memcpy(&new, p_new, bytes); |
232 | 0 |
|
233 | 0 | return ptwr_emulated_update(offset, old, new, bytes, 1, ctxt); |
234 | 0 | } |
235 | | |
236 | | static const struct x86_emulate_ops ptwr_emulate_ops = { |
237 | | .read = ptwr_emulated_read, |
238 | | .insn_fetch = ptwr_emulated_read, |
239 | | .write = ptwr_emulated_write, |
240 | | .cmpxchg = ptwr_emulated_cmpxchg, |
241 | | .validate = pv_emul_is_mem_write, |
242 | | .cpuid = pv_emul_cpuid, |
243 | | }; |
244 | | |
245 | | /* Write page fault handler: check if guest is trying to modify a PTE. */ |
246 | | static int ptwr_do_page_fault(struct x86_emulate_ctxt *ctxt, |
247 | | unsigned long addr, l1_pgentry_t pte) |
248 | 0 | { |
249 | 0 | struct ptwr_emulate_ctxt ptwr_ctxt = { |
250 | 0 | .cr2 = addr, |
251 | 0 | .pte = pte, |
252 | 0 | }; |
253 | 0 | struct page_info *page; |
254 | 0 | int rc; |
255 | 0 |
|
256 | 0 | if ( !get_page_from_mfn(l1e_get_mfn(pte), current->domain) ) |
257 | 0 | return X86EMUL_UNHANDLEABLE; |
258 | 0 |
|
259 | 0 | page = l1e_get_page(pte); |
260 | 0 | if ( !page_lock(page) ) |
261 | 0 | { |
262 | 0 | put_page(page); |
263 | 0 | return X86EMUL_UNHANDLEABLE; |
264 | 0 | } |
265 | 0 |
|
266 | 0 | if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table ) |
267 | 0 | { |
268 | 0 | page_unlock(page); |
269 | 0 | put_page(page); |
270 | 0 | return X86EMUL_UNHANDLEABLE; |
271 | 0 | } |
272 | 0 |
|
273 | 0 | ctxt->data = &ptwr_ctxt; |
274 | 0 | rc = x86_emulate(ctxt, &ptwr_emulate_ops); |
275 | 0 |
|
276 | 0 | page_unlock(page); |
277 | 0 | put_page(page); |
278 | 0 |
|
279 | 0 | return rc; |
280 | 0 | } |
281 | | |
282 | | /***************************************** |
283 | | * fault handling for read-only MMIO pages |
284 | | */ |
285 | | |
286 | | static const struct x86_emulate_ops mmio_ro_emulate_ops = { |
287 | | .read = x86emul_unhandleable_rw, |
288 | | .insn_fetch = ptwr_emulated_read, |
289 | | .write = mmio_ro_emulated_write, |
290 | | .validate = pv_emul_is_mem_write, |
291 | | .cpuid = pv_emul_cpuid, |
292 | | }; |
293 | | |
294 | | static const struct x86_emulate_ops mmcfg_intercept_ops = { |
295 | | .read = x86emul_unhandleable_rw, |
296 | | .insn_fetch = ptwr_emulated_read, |
297 | | .write = mmcfg_intercept_write, |
298 | | .validate = pv_emul_is_mem_write, |
299 | | .cpuid = pv_emul_cpuid, |
300 | | }; |
301 | | |
302 | | /* Check if guest is trying to modify a r/o MMIO page. */ |
303 | | static int mmio_ro_do_page_fault(struct x86_emulate_ctxt *ctxt, |
304 | | unsigned long addr, l1_pgentry_t pte) |
305 | 0 | { |
306 | 0 | struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = addr }; |
307 | 0 | mfn_t mfn = l1e_get_mfn(pte); |
308 | 0 |
|
309 | 0 | if ( mfn_valid(mfn) ) |
310 | 0 | { |
311 | 0 | struct page_info *page = mfn_to_page(mfn); |
312 | 0 | const struct domain *owner = page_get_owner_and_reference(page); |
313 | 0 |
|
314 | 0 | if ( owner ) |
315 | 0 | put_page(page); |
316 | 0 | if ( owner != dom_io ) |
317 | 0 | return X86EMUL_UNHANDLEABLE; |
318 | 0 | } |
319 | 0 |
|
320 | 0 | ctxt->data = &mmio_ro_ctxt; |
321 | 0 | if ( pci_ro_mmcfg_decode(mfn_x(mfn), &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) ) |
322 | 0 | return x86_emulate(ctxt, &mmcfg_intercept_ops); |
323 | 0 | else |
324 | 0 | return x86_emulate(ctxt, &mmio_ro_emulate_ops); |
325 | 0 | } |
326 | | |
327 | | int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs) |
328 | 0 | { |
329 | 0 | l1_pgentry_t pte; |
330 | 0 | const struct domain *currd = current->domain; |
331 | 0 | unsigned int addr_size = is_pv_32bit_domain(currd) ? 32 : BITS_PER_LONG; |
332 | 0 | struct x86_emulate_ctxt ctxt = { |
333 | 0 | .regs = regs, |
334 | 0 | .vendor = currd->arch.cpuid->x86_vendor, |
335 | 0 | .addr_size = addr_size, |
336 | 0 | .sp_size = addr_size, |
337 | 0 | .lma = addr_size > 32, |
338 | 0 | }; |
339 | 0 | int rc; |
340 | 0 | bool mmio_ro; |
341 | 0 |
|
342 | 0 | /* Attempt to read the PTE that maps the VA being accessed. */ |
343 | 0 | pte = guest_get_eff_l1e(addr); |
344 | 0 |
|
345 | 0 | /* We are only looking for read-only mappings */ |
346 | 0 | if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT | _PAGE_RW)) != _PAGE_PRESENT) ) |
347 | 0 | return 0; |
348 | 0 |
|
349 | 0 | mmio_ro = is_hardware_domain(currd) && |
350 | 0 | rangeset_contains_singleton(mmio_ro_ranges, l1e_get_pfn(pte)); |
351 | 0 | if ( mmio_ro ) |
352 | 0 | rc = mmio_ro_do_page_fault(&ctxt, addr, pte); |
353 | 0 | else |
354 | 0 | rc = ptwr_do_page_fault(&ctxt, addr, pte); |
355 | 0 |
|
356 | 0 | switch ( rc ) |
357 | 0 | { |
358 | 0 | case X86EMUL_EXCEPTION: |
359 | 0 | /* |
360 | 0 | * This emulation covers writes to: |
361 | 0 | * - L1 pagetables. |
362 | 0 | * - MMCFG space or read-only MFNs. |
363 | 0 | * We tolerate #PF (from hitting an adjacent page or a successful |
364 | 0 | * concurrent pagetable update). Anything else is an emulation bug, |
365 | 0 | * or a guest playing with the instruction stream under Xen's feet. |
366 | 0 | */ |
367 | 0 | if ( ctxt.event.type == X86_EVENTTYPE_HW_EXCEPTION && |
368 | 0 | ctxt.event.vector == TRAP_page_fault ) |
369 | 0 | pv_inject_event(&ctxt.event); |
370 | 0 | else |
371 | 0 | gdprintk(XENLOG_WARNING, |
372 | 0 | "Unexpected event (type %u, vector %#x) from emulation\n", |
373 | 0 | ctxt.event.type, ctxt.event.vector); |
374 | 0 |
|
375 | 0 | /* Fallthrough */ |
376 | 0 | case X86EMUL_OKAY: |
377 | 0 | if ( ctxt.retire.singlestep ) |
378 | 0 | pv_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC); |
379 | 0 |
|
380 | 0 | /* Fallthrough */ |
381 | 0 | case X86EMUL_RETRY: |
382 | 0 | if ( mmio_ro ) |
383 | 0 | perfc_incr(mmio_ro_emulations); |
384 | 0 | else |
385 | 0 | perfc_incr(ptwr_emulations); |
386 | 0 | return EXCRET_fault_fixed; |
387 | 0 | } |
388 | 0 |
|
389 | 0 | return 0; |
390 | 0 | } |
391 | | |
392 | | /* |
393 | | * Local variables: |
394 | | * mode: C |
395 | | * c-file-style: "BSD" |
396 | | * c-basic-offset: 4 |
397 | | * tab-width: 4 |
398 | | * indent-tabs-mode: nil |
399 | | * End: |
400 | | */ |