debuggers.hg
changeset 17602:b6aa55ca599e
shadow: track video RAM dirty bits
This adds a new HVM op that enables tracking dirty bits of a range of
video RAM. The idea is to optimize just for the most common case
(only one guest mapping, with sometimes some temporary other
mappings), which permits to keep the overhead on shadow as low as
possible.
Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
This adds a new HVM op that enables tracking dirty bits of a range of
video RAM. The idea is to optimize just for the most common case
(only one guest mapping, with sometimes some temporary other
mappings), which permits to keep the overhead on shadow as low as
possible.
Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Fri May 02 15:08:27 2008 +0100 (2008-05-02) |
parents | cd5fa4e7993f |
children | 6bd4625a20ee |
files | tools/ioemu/hw/cirrus_vga.c tools/ioemu/hw/vga.c tools/ioemu/hw/vga_int.h tools/libxc/xc_misc.c tools/libxc/xenctrl.h xen/arch/ia64/vmx/vmx_hypercall.c xen/arch/x86/hvm/hvm.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/private.h xen/include/asm-ia64/config.h xen/include/asm-powerpc/types.h xen/include/asm-x86/shadow.h xen/include/asm-x86/types.h xen/include/public/hvm/hvm_op.h xen/include/xen/sched.h |
line diff
1.1 --- a/tools/ioemu/hw/cirrus_vga.c Fri May 02 14:35:27 2008 +0100 1.2 +++ b/tools/ioemu/hw/cirrus_vga.c Fri May 02 15:08:27 2008 +0100 1.3 @@ -234,8 +234,6 @@ typedef struct CirrusVGAState { 1.4 int cirrus_linear_io_addr; 1.5 int cirrus_linear_bitblt_io_addr; 1.6 int cirrus_mmio_io_addr; 1.7 - unsigned long cirrus_lfb_addr; 1.8 - unsigned long cirrus_lfb_end; 1.9 uint32_t cirrus_addr_mask; 1.10 uint32_t linear_mmio_mask; 1.11 uint8_t cirrus_shadow_gr0; 1.12 @@ -2657,11 +2655,11 @@ static void cirrus_update_memory_access( 1.13 1.14 mode = s->gr[0x05] & 0x7; 1.15 if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) { 1.16 - if (s->cirrus_lfb_addr && s->cirrus_lfb_end && !s->map_addr) { 1.17 + if (s->lfb_addr && s->lfb_end && !s->map_addr) { 1.18 void *vram_pointer, *old_vram; 1.19 1.20 - vram_pointer = set_vram_mapping(s->cirrus_lfb_addr, 1.21 - s->cirrus_lfb_end); 1.22 + vram_pointer = set_vram_mapping(s->lfb_addr, 1.23 + s->lfb_end); 1.24 if (!vram_pointer) 1.25 fprintf(stderr, "NULL vram_pointer\n"); 1.26 else { 1.27 @@ -2669,21 +2667,21 @@ static void cirrus_update_memory_access( 1.28 VGA_RAM_SIZE); 1.29 qemu_free(old_vram); 1.30 } 1.31 - s->map_addr = s->cirrus_lfb_addr; 1.32 - s->map_end = s->cirrus_lfb_end; 1.33 + s->map_addr = s->lfb_addr; 1.34 + s->map_end = s->lfb_end; 1.35 } 1.36 s->cirrus_linear_write[0] = cirrus_linear_mem_writeb; 1.37 s->cirrus_linear_write[1] = cirrus_linear_mem_writew; 1.38 s->cirrus_linear_write[2] = cirrus_linear_mem_writel; 1.39 } else { 1.40 generic_io: 1.41 - if (s->cirrus_lfb_addr && s->cirrus_lfb_end && s->map_addr) { 1.42 + if (s->lfb_addr && s->lfb_end && s->map_addr) { 1.43 void *old_vram; 1.44 1.45 old_vram = vga_update_vram((VGAState *)s, NULL, VGA_RAM_SIZE); 1.46 1.47 - unset_vram_mapping(s->cirrus_lfb_addr, 1.48 - s->cirrus_lfb_end, 1.49 + unset_vram_mapping(s->lfb_addr, 1.50 + s->lfb_end, 1.51 old_vram); 1.52 1.53 s->map_addr = s->map_end = 0; 1.54 @@ -3049,27 +3047,27 @@ void cirrus_stop_acc(CirrusVGAState *s) 1.55 if (s->map_addr){ 1.56 int error; 1.57 s->map_addr = 0; 1.58 - error = unset_vram_mapping(s->cirrus_lfb_addr, 1.59 - s->cirrus_lfb_end, s->vram_ptr); 1.60 + error = unset_vram_mapping(s->lfb_addr, 1.61 + s->lfb_end, s->vram_ptr); 1.62 fprintf(stderr, "cirrus_stop_acc:unset_vram_mapping.\n"); 1.63 } 1.64 } 1.65 1.66 void cirrus_restart_acc(CirrusVGAState *s) 1.67 { 1.68 - if (s->cirrus_lfb_addr && s->cirrus_lfb_end) { 1.69 + if (s->lfb_addr && s->lfb_end) { 1.70 void *vram_pointer, *old_vram; 1.71 fprintf(stderr, "cirrus_vga_load:re-enable vga acc.lfb_addr=0x%lx, lfb_end=0x%lx.\n", 1.72 - s->cirrus_lfb_addr, s->cirrus_lfb_end); 1.73 - vram_pointer = set_vram_mapping(s->cirrus_lfb_addr ,s->cirrus_lfb_end); 1.74 + s->lfb_addr, s->lfb_end); 1.75 + vram_pointer = set_vram_mapping(s->lfb_addr ,s->lfb_end); 1.76 if (!vram_pointer){ 1.77 fprintf(stderr, "cirrus_vga_load:NULL vram_pointer\n"); 1.78 } else { 1.79 old_vram = vga_update_vram((VGAState *)s, vram_pointer, 1.80 VGA_RAM_SIZE); 1.81 qemu_free(old_vram); 1.82 - s->map_addr = s->cirrus_lfb_addr; 1.83 - s->map_end = s->cirrus_lfb_end; 1.84 + s->map_addr = s->lfb_addr; 1.85 + s->map_end = s->lfb_end; 1.86 } 1.87 } 1.88 } 1.89 @@ -3120,8 +3118,8 @@ static void cirrus_vga_save(QEMUFile *f, 1.90 1.91 vga_acc = (!!s->map_addr); 1.92 qemu_put_8s(f, &vga_acc); 1.93 - qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr); 1.94 - qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end); 1.95 + qemu_put_be64s(f, (uint64_t*)&s->lfb_addr); 1.96 + qemu_put_be64s(f, (uint64_t*)&s->lfb_end); 1.97 qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 1.98 } 1.99 1.100 @@ -3175,8 +3173,8 @@ static int cirrus_vga_load(QEMUFile *f, 1.101 qemu_get_be32s(f, &s->hw_cursor_y); 1.102 1.103 qemu_get_8s(f, &vga_acc); 1.104 - qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_addr); 1.105 - qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_end); 1.106 + qemu_get_be64s(f, (uint64_t*)&s->lfb_addr); 1.107 + qemu_get_be64s(f, (uint64_t*)&s->lfb_end); 1.108 qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 1.109 if (vga_acc){ 1.110 cirrus_restart_acc(s); 1.111 @@ -3337,11 +3335,11 @@ static void cirrus_pci_lfb_map(PCIDevice 1.112 /* XXX: add byte swapping apertures */ 1.113 cpu_register_physical_memory(addr, s->vram_size, 1.114 s->cirrus_linear_io_addr); 1.115 - s->cirrus_lfb_addr = addr; 1.116 - s->cirrus_lfb_end = addr + VGA_RAM_SIZE; 1.117 - 1.118 - if (s->map_addr && (s->cirrus_lfb_addr != s->map_addr) && 1.119 - (s->cirrus_lfb_end != s->map_end)) 1.120 + s->lfb_addr = addr; 1.121 + s->lfb_end = addr + VGA_RAM_SIZE; 1.122 + 1.123 + if (s->map_addr && (s->lfb_addr != s->map_addr) && 1.124 + (s->lfb_end != s->map_end)) 1.125 fprintf(logfile, "cirrus vga map change while on lfb mode\n"); 1.126 1.127 cpu_register_physical_memory(addr + 0x1000000, 0x400000,
2.1 --- a/tools/ioemu/hw/vga.c Fri May 02 14:35:27 2008 +0100 2.2 +++ b/tools/ioemu/hw/vga.c Fri May 02 15:08:27 2008 +0100 2.3 @@ -1086,6 +1086,9 @@ static void vga_draw_text(VGAState *s, i 2.4 vga_draw_glyph8_func *vga_draw_glyph8; 2.5 vga_draw_glyph9_func *vga_draw_glyph9; 2.6 2.7 + /* Disable dirty bit tracking */ 2.8 + xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL); 2.9 + 2.10 if (s->ds->dpy_colourdepth != NULL && s->ds->depth != 0) 2.11 s->ds->dpy_colourdepth(s->ds, 0); 2.12 s->rgb_to_pixel = 2.13 @@ -1485,7 +1488,7 @@ void check_sse2(void) 2.14 static void vga_draw_graphic(VGAState *s, int full_update) 2.15 { 2.16 int y1, y, update, linesize, y_start, double_scan, mask, depth; 2.17 - int width, height, shift_control, line_offset, bwidth, ds_depth; 2.18 + int width, height, shift_control, line_offset, bwidth, ds_depth, bits; 2.19 ram_addr_t page0, page1; 2.20 int disp_width, multi_scan, multi_run; 2.21 uint8_t *d; 2.22 @@ -1533,6 +1536,7 @@ static void vga_draw_graphic(VGAState *s 2.23 } else { 2.24 v = VGA_DRAW_LINE4; 2.25 } 2.26 + bits = 4; 2.27 } else if (shift_control == 1) { 2.28 full_update |= update_palette16(s); 2.29 if (s->sr[0x01] & 8) { 2.30 @@ -1541,28 +1545,35 @@ static void vga_draw_graphic(VGAState *s 2.31 } else { 2.32 v = VGA_DRAW_LINE2; 2.33 } 2.34 + bits = 4; 2.35 } else { 2.36 switch(s->get_bpp(s)) { 2.37 default: 2.38 case 0: 2.39 full_update |= update_palette256(s); 2.40 v = VGA_DRAW_LINE8D2; 2.41 + bits = 4; 2.42 break; 2.43 case 8: 2.44 full_update |= update_palette256(s); 2.45 v = VGA_DRAW_LINE8; 2.46 + bits = 8; 2.47 break; 2.48 case 15: 2.49 v = VGA_DRAW_LINE15; 2.50 + bits = 16; 2.51 break; 2.52 case 16: 2.53 v = VGA_DRAW_LINE16; 2.54 + bits = 16; 2.55 break; 2.56 case 24: 2.57 v = VGA_DRAW_LINE24; 2.58 + bits = 24; 2.59 break; 2.60 case 32: 2.61 v = VGA_DRAW_LINE32; 2.62 + bits = 32; 2.63 break; 2.64 } 2.65 } 2.66 @@ -1590,12 +1601,72 @@ static void vga_draw_graphic(VGAState *s 2.67 width, height, v, line_offset, s->cr[9], s->cr[0x17], s->line_compare, s->sr[0x01]); 2.68 #endif 2.69 2.70 - for (y = 0; y < s->vram_size; y += TARGET_PAGE_SIZE) 2.71 - if (vram_dirty(s, y, TARGET_PAGE_SIZE)) 2.72 + y = 0; 2.73 + 2.74 + if (height - 1 > s->line_compare || multi_run || (s->cr[0x17] & 3) != 3 2.75 + || !s->lfb_addr) { 2.76 + /* Tricky things happen, disable dirty bit tracking */ 2.77 + xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL); 2.78 + 2.79 + for ( ; y < s->vram_size; y += TARGET_PAGE_SIZE) 2.80 + if (vram_dirty(s, y, TARGET_PAGE_SIZE)) 2.81 + cpu_physical_memory_set_dirty(s->vram_offset + y); 2.82 + } else { 2.83 + /* Tricky things won't have any effect, i.e. we are in the very simple 2.84 + * (and very usual) case of a linear buffer. */ 2.85 + unsigned long end; 2.86 + 2.87 + for ( ; y < ((s->start_addr * 4) & TARGET_PAGE_MASK); y += TARGET_PAGE_SIZE) 2.88 + /* We will not read that anyway. */ 2.89 cpu_physical_memory_set_dirty(s->vram_offset + y); 2.90 2.91 + if (y < (s->start_addr * 4)) { 2.92 + /* start address not aligned on a page, track dirtyness by hand. */ 2.93 + if (vram_dirty(s, y, TARGET_PAGE_SIZE)) 2.94 + cpu_physical_memory_set_dirty(s->vram_offset + y); 2.95 + y += TARGET_PAGE_SIZE; 2.96 + } 2.97 + 2.98 + /* use page table dirty bit tracking for the inner of the LFB */ 2.99 + end = s->start_addr * 4 + height * line_offset; 2.100 + { 2.101 + unsigned long npages = ((end & TARGET_PAGE_MASK) - y) / TARGET_PAGE_SIZE; 2.102 + const int width = sizeof(unsigned long) * 8; 2.103 + unsigned long bitmap[(npages + width - 1) / width]; 2.104 + int err; 2.105 + 2.106 + if (!(err = xc_hvm_track_dirty_vram(xc_handle, domid, 2.107 + (s->lfb_addr + y) / TARGET_PAGE_SIZE, npages, bitmap))) { 2.108 + int i, j; 2.109 + for (i = 0; i < sizeof(bitmap) / sizeof(*bitmap); i++) { 2.110 + unsigned long map = bitmap[i]; 2.111 + for (j = i * width; map && j < npages; map >>= 1, j++) 2.112 + if (map & 1) 2.113 + cpu_physical_memory_set_dirty(s->vram_offset + y 2.114 + + j * TARGET_PAGE_SIZE); 2.115 + } 2.116 + y += npages * TARGET_PAGE_SIZE; 2.117 + } else { 2.118 + /* ENODATA just means we have changed mode and will succeed 2.119 + * next time */ 2.120 + if (err != -ENODATA) 2.121 + fprintf(stderr, "track_dirty_vram(%lx, %lx) failed (%d)\n", s->lfb_addr + y, npages, err); 2.122 + } 2.123 + } 2.124 + 2.125 + for ( ; y < s->vram_size && y < end; y += TARGET_PAGE_SIZE) 2.126 + /* failed or end address not aligned on a page, track dirtyness by 2.127 + * hand. */ 2.128 + if (vram_dirty(s, y, TARGET_PAGE_SIZE)) 2.129 + cpu_physical_memory_set_dirty(s->vram_offset + y); 2.130 + 2.131 + for ( ; y < s->vram_size; y += TARGET_PAGE_SIZE) 2.132 + /* We will not read that anyway. */ 2.133 + cpu_physical_memory_set_dirty(s->vram_offset + y); 2.134 + } 2.135 + 2.136 addr1 = (s->start_addr * 4); 2.137 - bwidth = width * 4; 2.138 + bwidth = (width * bits + 7) / 8; 2.139 y_start = -1; 2.140 page_min = 0; 2.141 page_max = 0; 2.142 @@ -1681,6 +1752,10 @@ static void vga_draw_blank(VGAState *s, 2.143 return; 2.144 if (s->last_scr_width <= 0 || s->last_scr_height <= 0) 2.145 return; 2.146 + 2.147 + /* Disable dirty bit tracking */ 2.148 + xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL); 2.149 + 2.150 s->rgb_to_pixel = 2.151 rgb_to_pixel_dup_table[get_depth_index(s->ds)]; 2.152 if (s->ds->depth == 8)
3.1 --- a/tools/ioemu/hw/vga_int.h Fri May 02 14:35:27 2008 +0100 3.2 +++ b/tools/ioemu/hw/vga_int.h Fri May 02 15:08:27 2008 +0100 3.3 @@ -87,6 +87,8 @@ 3.4 unsigned int vram_size; \ 3.5 unsigned long bios_offset; \ 3.6 unsigned int bios_size; \ 3.7 + unsigned long lfb_addr; \ 3.8 + unsigned long lfb_end; \ 3.9 PCIDevice *pci_dev; \ 3.10 uint32_t latch; \ 3.11 uint8_t sr_index; \
4.1 --- a/tools/libxc/xc_misc.c Fri May 02 14:35:27 2008 +0100 4.2 +++ b/tools/libxc/xc_misc.c Fri May 02 15:08:27 2008 +0100 4.3 @@ -236,6 +236,37 @@ int xc_hvm_set_pci_link_route( 4.4 return rc; 4.5 } 4.6 4.7 +int xc_hvm_track_dirty_vram( 4.8 + int xc_handle, domid_t dom, 4.9 + uint64_t first_pfn, uint64_t nr, 4.10 + unsigned long *dirty_bitmap) 4.11 +{ 4.12 + DECLARE_HYPERCALL; 4.13 + struct xen_hvm_track_dirty_vram arg; 4.14 + int rc; 4.15 + 4.16 + hypercall.op = __HYPERVISOR_hvm_op; 4.17 + hypercall.arg[0] = HVMOP_track_dirty_vram; 4.18 + hypercall.arg[1] = (unsigned long)&arg; 4.19 + 4.20 + arg.domid = dom; 4.21 + arg.first_pfn = first_pfn; 4.22 + arg.nr = nr; 4.23 + set_xen_guest_handle(arg.dirty_bitmap, (uint8_t *)dirty_bitmap); 4.24 + 4.25 + if ( (rc = lock_pages(&arg, sizeof(arg))) != 0 ) 4.26 + { 4.27 + PERROR("Could not lock memory"); 4.28 + return rc; 4.29 + } 4.30 + 4.31 + rc = do_xen_hypercall(xc_handle, &hypercall); 4.32 + 4.33 + unlock_pages(&arg, sizeof(arg)); 4.34 + 4.35 + return rc; 4.36 +} 4.37 + 4.38 void *xc_map_foreign_pages(int xc_handle, uint32_t dom, int prot, 4.39 const xen_pfn_t *arr, int num) 4.40 {
5.1 --- a/tools/libxc/xenctrl.h Fri May 02 14:35:27 2008 +0100 5.2 +++ b/tools/libxc/xenctrl.h Fri May 02 15:08:27 2008 +0100 5.3 @@ -882,6 +882,22 @@ int xc_hvm_set_pci_link_route( 5.4 int xc_handle, domid_t dom, uint8_t link, uint8_t isa_irq); 5.5 5.6 5.7 +/* 5.8 + * Track dirty bit changes in the VRAM area 5.9 + * 5.10 + * All of this is done atomically: 5.11 + * - get the dirty bitmap since the last call 5.12 + * - set up dirty tracking area for period up to the next call 5.13 + * - clear the dirty tracking area. 5.14 + * 5.15 + * Returns -ENODATA and does not fill bitmap if the area has changed since the 5.16 + * last call. 5.17 + */ 5.18 +int xc_hvm_track_dirty_vram( 5.19 + int xc_handle, domid_t dom, 5.20 + uint64_t first_pfn, uint64_t nr, 5.21 + unsigned long *bitmap); 5.22 + 5.23 typedef enum { 5.24 XC_ERROR_NONE = 0, 5.25 XC_INTERNAL_ERROR = 1,
6.1 --- a/xen/arch/ia64/vmx/vmx_hypercall.c Fri May 02 14:35:27 2008 +0100 6.2 +++ b/xen/arch/ia64/vmx/vmx_hypercall.c Fri May 02 15:08:27 2008 +0100 6.3 @@ -200,6 +200,10 @@ do_hvm_op(unsigned long op, XEN_GUEST_HA 6.4 rc = 0; 6.5 break; 6.6 6.7 + case HVMOP_track_dirty_vram: 6.8 + rc = -ENOSYS; 6.9 + break; 6.10 + 6.11 default: 6.12 gdprintk(XENLOG_INFO, "Bad HVM op %ld.\n", op); 6.13 rc = -ENOSYS;
7.1 --- a/xen/arch/x86/hvm/hvm.c Fri May 02 14:35:27 2008 +0100 7.2 +++ b/xen/arch/x86/hvm/hvm.c Fri May 02 15:08:27 2008 +0100 7.3 @@ -2345,6 +2345,54 @@ long do_hvm_op(unsigned long op, XEN_GUE 7.4 rc = guest_handle_is_null(arg) ? hvmop_flush_tlb_all() : -ENOSYS; 7.5 break; 7.6 7.7 + case HVMOP_track_dirty_vram: 7.8 + { 7.9 + struct xen_hvm_track_dirty_vram a; 7.10 + struct domain *d; 7.11 + 7.12 + if ( copy_from_guest(&a, arg, 1) ) 7.13 + return -EFAULT; 7.14 + 7.15 + if ( a.domid == DOMID_SELF ) 7.16 + { 7.17 + d = rcu_lock_current_domain(); 7.18 + } 7.19 + else 7.20 + { 7.21 + if ( (d = rcu_lock_domain_by_id(a.domid)) == NULL ) 7.22 + return -ESRCH; 7.23 + if ( !IS_PRIV_FOR(current->domain, d) ) 7.24 + { 7.25 + rc = -EPERM; 7.26 + goto param_fail2; 7.27 + } 7.28 + } 7.29 + 7.30 + rc = -EINVAL; 7.31 + if ( !is_hvm_domain(d) ) 7.32 + goto param_fail2; 7.33 + 7.34 + rc = xsm_hvm_param(d, op); 7.35 + if ( rc ) 7.36 + goto param_fail2; 7.37 + 7.38 + rc = -ESRCH; 7.39 + if ( d->is_dying ) 7.40 + goto param_fail2; 7.41 + 7.42 + rc = -EINVAL; 7.43 + if ( !shadow_mode_enabled(d)) 7.44 + goto param_fail2; 7.45 + if ( d->vcpu[0] == NULL ) 7.46 + goto param_fail2; 7.47 + 7.48 + rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap); 7.49 + 7.50 + param_fail2: 7.51 + rcu_unlock_domain(d); 7.52 + break; 7.53 + } 7.54 + 7.55 default: 7.56 { 7.57 gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
8.1 --- a/xen/arch/x86/mm/shadow/common.c Fri May 02 14:35:27 2008 +0100 8.2 +++ b/xen/arch/x86/mm/shadow/common.c Fri May 02 15:08:27 2008 +0100 8.3 @@ -2589,6 +2589,13 @@ void shadow_teardown(struct domain *d) 8.4 * calls now that we've torn down the bitmap */ 8.5 d->arch.paging.mode &= ~PG_log_dirty; 8.6 8.7 + if (d->dirty_vram) { 8.8 + xfree(d->dirty_vram->sl1ma); 8.9 + xfree(d->dirty_vram->dirty_bitmap); 8.10 + xfree(d->dirty_vram); 8.11 + d->dirty_vram = NULL; 8.12 + } 8.13 + 8.14 shadow_unlock(d); 8.15 } 8.16 8.17 @@ -2849,6 +2856,164 @@ void shadow_clean_dirty_bitmap(struct do 8.18 shadow_blow_tables(d); 8.19 shadow_unlock(d); 8.20 } 8.21 + 8.22 + 8.23 +/**************************************************************************/ 8.24 +/* VRAM dirty tracking support */ 8.25 +int shadow_track_dirty_vram(struct domain *d, 8.26 + unsigned long begin_pfn, 8.27 + unsigned long nr, 8.28 + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap) 8.29 +{ 8.30 + int rc; 8.31 + unsigned long end_pfn = begin_pfn + nr; 8.32 + unsigned long dirty_size = (nr + 7) / 8; 8.33 + int flush_tlb = 0; 8.34 + 8.35 + if (end_pfn < begin_pfn 8.36 + || begin_pfn > d->arch.p2m->max_mapped_pfn 8.37 + || end_pfn >= d->arch.p2m->max_mapped_pfn) 8.38 + return -EINVAL; 8.39 + 8.40 + shadow_lock(d); 8.41 + 8.42 + if ( d->dirty_vram && (!nr || 8.43 + ( begin_pfn != d->dirty_vram->begin_pfn 8.44 + || end_pfn != d->dirty_vram->end_pfn )) ) { 8.45 + /* Different tracking, tear the previous down. */ 8.46 + gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", d->dirty_vram->begin_pfn, d->dirty_vram->end_pfn); 8.47 + xfree(d->dirty_vram->sl1ma); 8.48 + xfree(d->dirty_vram->dirty_bitmap); 8.49 + xfree(d->dirty_vram); 8.50 + d->dirty_vram = NULL; 8.51 + } 8.52 + 8.53 + if ( !nr ) { 8.54 + rc = 0; 8.55 + goto out; 8.56 + } 8.57 + 8.58 + /* This should happen seldomly (Video mode change), 8.59 + * no need to be careful. */ 8.60 + if ( !d->dirty_vram ) { 8.61 + unsigned long i; 8.62 + p2m_type_t t; 8.63 + 8.64 + /* Just recount from start. */ 8.65 + for ( i = begin_pfn; i < end_pfn; i++ ) 8.66 + flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, i, &t)); 8.67 + 8.68 + gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn); 8.69 + 8.70 + rc = -ENOMEM; 8.71 + if ( (d->dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL ) 8.72 + goto out; 8.73 + d->dirty_vram->begin_pfn = begin_pfn; 8.74 + d->dirty_vram->end_pfn = end_pfn; 8.75 + 8.76 + if ( (d->dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL ) 8.77 + goto out_dirty_vram; 8.78 + memset(d->dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr); 8.79 + 8.80 + if ( (d->dirty_vram->dirty_bitmap = xmalloc_array(uint8_t, dirty_size)) == NULL ) 8.81 + goto out_sl1ma; 8.82 + memset(d->dirty_vram->dirty_bitmap, 0, dirty_size); 8.83 + 8.84 + /* Tell the caller that this time we could not track dirty bits. */ 8.85 + rc = -ENODATA; 8.86 + } else { 8.87 + int i; 8.88 +#ifdef __i386__ 8.89 + unsigned long map_mfn = INVALID_MFN; 8.90 + void *map_sl1p = NULL; 8.91 +#endif 8.92 + 8.93 + /* Iterate over VRAM to track dirty bits. */ 8.94 + for ( i = 0; i < nr; i++ ) { 8.95 + p2m_type_t t; 8.96 + mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t); 8.97 + struct page_info *page = mfn_to_page(mfn); 8.98 + u32 count_info = page->u.inuse.type_info & PGT_count_mask; 8.99 + int dirty = 0; 8.100 + paddr_t sl1ma = d->dirty_vram->sl1ma[i]; 8.101 + 8.102 + switch (count_info) { 8.103 + case 0: 8.104 + /* No guest reference, nothing to track. */ 8.105 + break; 8.106 + case 1: 8.107 + /* One guest reference. */ 8.108 + if ( sl1ma == INVALID_PADDR ) { 8.109 + /* We don't know which sl1e points to this, too bad. */ 8.110 + dirty = 1; 8.111 + /* TODO: Heuristics for finding the single mapping of 8.112 + * this gmfn */ 8.113 + flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, begin_pfn + i, &t)); 8.114 + } else { 8.115 + /* Hopefully the most common case: only one mapping, 8.116 + * whose dirty bit we can use. */ 8.117 + l1_pgentry_t *sl1e; 8.118 +#ifdef __i386__ 8.119 + void *sl1p = map_sl1p; 8.120 + unsigned long sl1mfn = paddr_to_pfn(sl1ma); 8.121 + 8.122 + if ( sl1mfn != map_mfn ) { 8.123 + if ( map_sl1p ) 8.124 + sh_unmap_domain_page(map_sl1p); 8.125 + map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn)); 8.126 + map_mfn = sl1mfn; 8.127 + } 8.128 + sl1e = sl1p + (sl1ma & ~PAGE_MASK); 8.129 +#else 8.130 + sl1e = maddr_to_virt(sl1ma); 8.131 +#endif 8.132 + 8.133 + if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY ) { 8.134 + dirty = 1; 8.135 + /* Note: this is atomic, so we may clear a 8.136 + * _PAGE_ACCESSED set by another processor. */ 8.137 + l1e_remove_flags(*sl1e, _PAGE_DIRTY); 8.138 + flush_tlb = 1; 8.139 + } 8.140 + } 8.141 + break; 8.142 + default: 8.143 + /* More than one guest reference, 8.144 + * we don't afford tracking that. */ 8.145 + dirty = 1; 8.146 + break; 8.147 + } 8.148 + 8.149 + if ( dirty ) 8.150 + d->dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8); 8.151 + } 8.152 + 8.153 +#ifdef __i386__ 8.154 + if ( map_sl1p ) 8.155 + sh_unmap_domain_page(map_sl1p); 8.156 +#endif 8.157 + 8.158 + rc = -EFAULT; 8.159 + if ( copy_to_guest(dirty_bitmap, d->dirty_vram->dirty_bitmap, dirty_size) == 0 ) { 8.160 + memset(d->dirty_vram->dirty_bitmap, 0, dirty_size); 8.161 + rc = 0; 8.162 + } 8.163 + } 8.164 + if ( flush_tlb ) 8.165 + flush_tlb_mask(d->domain_dirty_cpumask); 8.166 + goto out; 8.167 + 8.168 +out_sl1ma: 8.169 + xfree(d->dirty_vram->sl1ma); 8.170 +out_dirty_vram: 8.171 + xfree(d->dirty_vram); 8.172 + d->dirty_vram = NULL; 8.173 + 8.174 +out: 8.175 + shadow_unlock(d); 8.176 + return rc; 8.177 +} 8.178 + 8.179 /**************************************************************************/ 8.180 /* Shadow-control XEN_DOMCTL dispatcher */ 8.181
9.1 --- a/xen/arch/x86/mm/shadow/multi.c Fri May 02 14:35:27 2008 +0100 9.2 +++ b/xen/arch/x86/mm/shadow/multi.c Fri May 02 15:08:27 2008 +0100 9.3 @@ -801,7 +801,7 @@ static always_inline void 9.4 // Since we know the guest's PRESENT bit is set, we also set the shadow's 9.5 // SHADOW_PRESENT bit. 9.6 // 9.7 - pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER | 9.8 + pass_thru_flags = (_PAGE_ACCESSED | _PAGE_USER | 9.9 _PAGE_RW | _PAGE_PRESENT); 9.10 if ( guest_supports_nx(v) ) 9.11 pass_thru_flags |= _PAGE_NX_BIT; 9.12 @@ -1251,6 +1251,80 @@ static int shadow_set_l2e(struct vcpu *v 9.13 return flags; 9.14 } 9.15 9.16 +static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e, 9.17 + shadow_l1e_t *sl1e, 9.18 + mfn_t sl1mfn, 9.19 + struct domain *d) 9.20 +{ 9.21 + mfn_t mfn; 9.22 + unsigned long gfn; 9.23 + 9.24 + if ( !d->dirty_vram ) return; 9.25 + 9.26 + mfn = shadow_l1e_get_mfn(new_sl1e); 9.27 + gfn = mfn_to_gfn(d, mfn); 9.28 + 9.29 + if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) { 9.30 + unsigned long i = gfn - d->dirty_vram->begin_pfn; 9.31 + struct page_info *page = mfn_to_page(mfn); 9.32 + u32 count_info = page->u.inuse.type_info & PGT_count_mask; 9.33 + 9.34 + if ( count_info == 1 ) 9.35 + /* Initial guest reference, record it */ 9.36 + d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn)) 9.37 + | ((paddr_t) sl1e & ~PAGE_MASK); 9.38 + } 9.39 +} 9.40 + 9.41 +static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e, 9.42 + shadow_l1e_t *sl1e, 9.43 + mfn_t sl1mfn, 9.44 + struct domain *d) 9.45 +{ 9.46 + mfn_t mfn; 9.47 + unsigned long gfn; 9.48 + 9.49 + if ( !d->dirty_vram ) return; 9.50 + 9.51 + mfn = shadow_l1e_get_mfn(old_sl1e); 9.52 + gfn = mfn_to_gfn(d, mfn); 9.53 + 9.54 + if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) { 9.55 + unsigned long i = gfn - d->dirty_vram->begin_pfn; 9.56 + struct page_info *page = mfn_to_page(mfn); 9.57 + u32 count_info = page->u.inuse.type_info & PGT_count_mask; 9.58 + int dirty = 0; 9.59 + paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) 9.60 + | ((paddr_t) sl1e & ~PAGE_MASK); 9.61 + 9.62 + if ( count_info == 1 ) { 9.63 + /* Last reference */ 9.64 + if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) { 9.65 + /* We didn't know it was that one, let's say it is dirty */ 9.66 + dirty = 1; 9.67 + } else { 9.68 + ASSERT(d->dirty_vram->sl1ma[i] == sl1ma); 9.69 + d->dirty_vram->sl1ma[i] = INVALID_PADDR; 9.70 + if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_DIRTY ) 9.71 + dirty = 1; 9.72 + } 9.73 + } else { 9.74 + /* We had more than one reference, just consider the page dirty. */ 9.75 + dirty = 1; 9.76 + /* Check that it's not the one we recorded. */ 9.77 + if ( d->dirty_vram->sl1ma[i] == sl1ma ) { 9.78 + /* Too bad, we remembered the wrong one... */ 9.79 + d->dirty_vram->sl1ma[i] = INVALID_PADDR; 9.80 + } else { 9.81 + /* Ok, our recorded sl1e is still pointing to this page, let's 9.82 + * just hope it will remain. */ 9.83 + } 9.84 + } 9.85 + if ( dirty ) 9.86 + d->dirty_vram->dirty_bitmap[i / 8] |= d->dirty_vram->dirty_bitmap[i % 8]; 9.87 + } 9.88 +} 9.89 + 9.90 static int shadow_set_l1e(struct vcpu *v, 9.91 shadow_l1e_t *sl1e, 9.92 shadow_l1e_t new_sl1e, 9.93 @@ -1275,6 +1349,8 @@ static int shadow_set_l1e(struct vcpu *v 9.94 /* Doesn't look like a pagetable. */ 9.95 flags |= SHADOW_SET_ERROR; 9.96 new_sl1e = shadow_l1e_empty(); 9.97 + } else { 9.98 + shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d); 9.99 } 9.100 } 9.101 } 9.102 @@ -1293,6 +1369,7 @@ static int shadow_set_l1e(struct vcpu *v 9.103 * trigger a flush later. */ 9.104 if ( shadow_mode_refcounts(d) ) 9.105 { 9.106 + shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d); 9.107 shadow_put_page_from_l1e(old_sl1e, d); 9.108 } 9.109 } 9.110 @@ -2248,8 +2325,10 @@ void sh_destroy_l1_shadow(struct vcpu *v 9.111 mfn_t sl1mfn = smfn; 9.112 SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, { 9.113 if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT) 9.114 - && !sh_l1e_is_magic(*sl1e) ) 9.115 + && !sh_l1e_is_magic(*sl1e) ) { 9.116 + shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d); 9.117 shadow_put_page_from_l1e(*sl1e, d); 9.118 + } 9.119 }); 9.120 } 9.121
10.1 --- a/xen/arch/x86/mm/shadow/private.h Fri May 02 14:35:27 2008 +0100 10.2 +++ b/xen/arch/x86/mm/shadow/private.h Fri May 02 15:08:27 2008 +0100 10.3 @@ -528,6 +528,15 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 10.4 return rv; 10.5 } 10.6 10.7 +/**************************************************************************/ 10.8 +/* VRAM dirty tracking support */ 10.9 + 10.10 +struct sh_dirty_vram { 10.11 + unsigned long begin_pfn; 10.12 + unsigned long end_pfn; 10.13 + paddr_t *sl1ma; 10.14 + uint8_t *dirty_bitmap; 10.15 +}; 10.16 10.17 /**************************************************************************/ 10.18 /* Shadow-page refcounting. */
11.1 --- a/xen/include/asm-ia64/config.h Fri May 02 14:35:27 2008 +0100 11.2 +++ b/xen/include/asm-ia64/config.h Fri May 02 15:08:27 2008 +0100 11.3 @@ -71,6 +71,7 @@ typedef int pid_t; 11.4 11.5 // now needed for xen/include/mm.h 11.6 typedef unsigned long paddr_t; 11.7 +#define INVALID_PADDR (~0UL) 11.8 // from include/linux/kernel.h 11.9 #define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) 11.10
12.1 --- a/xen/include/asm-powerpc/types.h Fri May 02 14:35:27 2008 +0100 12.2 +++ b/xen/include/asm-powerpc/types.h Fri May 02 15:08:27 2008 +0100 12.3 @@ -61,6 +61,7 @@ typedef unsigned long size_t; 12.4 #endif 12.5 12.6 typedef unsigned long paddr_t; 12.7 +#define INVALID_PADDR (~0UL) 12.8 #define PRIpaddr "08lx" 12.9 12.10 /* DMA addresses come in generic and 64-bit flavours. */
13.1 --- a/xen/include/asm-x86/shadow.h Fri May 02 14:35:27 2008 +0100 13.2 +++ b/xen/include/asm-x86/shadow.h Fri May 02 15:08:27 2008 +0100 13.3 @@ -62,6 +62,12 @@ void shadow_vcpu_init(struct vcpu *v); 13.4 /* Enable an arbitrary shadow mode. Call once at domain creation. */ 13.5 int shadow_enable(struct domain *d, u32 mode); 13.6 13.7 +/* Enable VRAM dirty bit tracking. */ 13.8 +int shadow_track_dirty_vram(struct domain *d, 13.9 + unsigned long first_pfn, 13.10 + unsigned long nr, 13.11 + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap); 13.12 + 13.13 /* Handler for shadow control ops: operations from user-space to enable 13.14 * and disable ephemeral shadow modes (test mode and log-dirty mode) and 13.15 * manipulate the log-dirty bitmap. */
14.1 --- a/xen/include/asm-x86/types.h Fri May 02 14:35:27 2008 +0100 14.2 +++ b/xen/include/asm-x86/types.h Fri May 02 15:08:27 2008 +0100 14.3 @@ -38,15 +38,18 @@ typedef signed long long s64; 14.4 typedef unsigned long long u64; 14.5 #if defined(CONFIG_X86_PAE) 14.6 typedef u64 paddr_t; 14.7 +#define INVALID_PADDR (~0ULL) 14.8 #define PRIpaddr "016llx" 14.9 #else 14.10 typedef unsigned long paddr_t; 14.11 +#define INVALID_PADDR (~0UL) 14.12 #define PRIpaddr "08lx" 14.13 #endif 14.14 #elif defined(__x86_64__) 14.15 typedef signed long s64; 14.16 typedef unsigned long u64; 14.17 typedef unsigned long paddr_t; 14.18 +#define INVALID_PADDR (~0UL) 14.19 #define PRIpaddr "016lx" 14.20 #endif 14.21
15.1 --- a/xen/include/public/hvm/hvm_op.h Fri May 02 14:35:27 2008 +0100 15.2 +++ b/xen/include/public/hvm/hvm_op.h Fri May 02 15:08:27 2008 +0100 15.3 @@ -73,4 +73,20 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_ 15.4 /* Flushes all VCPU TLBs: @arg must be NULL. */ 15.5 #define HVMOP_flush_tlbs 5 15.6 15.7 +/* Track dirty VRAM. */ 15.8 +#define HVMOP_track_dirty_vram 6 15.9 +struct xen_hvm_track_dirty_vram { 15.10 + /* Domain to be tracked. */ 15.11 + domid_t domid; 15.12 + /* First pfn to track. */ 15.13 + uint64_aligned_t first_pfn; 15.14 + /* Number of pages to track. */ 15.15 + uint64_aligned_t nr; 15.16 + /* OUT variable. */ 15.17 + /* Dirty bitmap buffer. */ 15.18 + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; 15.19 +}; 15.20 +typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t; 15.21 +DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t); 15.22 + 15.23 #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
16.1 --- a/xen/include/xen/sched.h Fri May 02 14:35:27 2008 +0100 16.2 +++ b/xen/include/xen/sched.h Fri May 02 15:08:27 2008 +0100 16.3 @@ -236,6 +236,9 @@ struct domain 16.4 * cause a deadlock. Acquirers don't spin waiting; they preempt. 16.5 */ 16.6 spinlock_t hypercall_deadlock_mutex; 16.7 + 16.8 + /* VRAM dirty support. */ 16.9 + struct sh_dirty_vram *dirty_vram; 16.10 }; 16.11 16.12 struct domain_setup_info