debuggers.hg

view tools/ioemu/patches/xen-mapcache @ 0:7d21f7218375

Exact replica of unstable on 051908 + README-this
author Mukesh Rathor
date Mon May 19 15:34:57 2008 -0700 (2008-05-19)
parents
children
line source
1 # HG changeset patch
2 # User kfraser@localhost.localdomain
3 # Node ID 67a06a9b7b1dca707e1cd3b08ae0a341d6e97b3d
4 # Parent 3f0ca90351e268084fbdb733d70fc596cb46537d
5 [HVM] qemu: Add guest address-space mapping cache.
7 On IA32 host or IA32 PAE host, at present, generally, we can't create
8 an HVM guest with more than 2G memory, because generally it's almost
9 impossible for Qemu to find a large enough and consecutive virtual
10 address space to map an HVM guest's whole physical address space.
11 The attached patch fixes this issue using dynamic mapping based on
12 little blocks of memory.
14 Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
15 Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
16 Signed-off-by: Keir Fraser <keir@xensource.com>
18 Index: ioemu/vl.c
19 ===================================================================
20 --- ioemu.orig/vl.c 2007-05-11 10:04:51.000000000 +0100
21 +++ ioemu/vl.c 2007-05-11 10:04:52.000000000 +0100
22 @@ -275,7 +275,7 @@
23 for(i = start; i < start + length; i += size) {
24 ioport_read_table[bsize][i] = func;
25 if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque)
26 - hw_error("register_ioport_read: invalid opaque");
27 + hw_error("register_ioport_write: invalid opaque");
28 ioport_opaque[i] = opaque;
29 }
30 return 0;
31 @@ -6791,6 +6791,157 @@
32 suspend_requested = 1;
33 }
35 +#if defined(MAPCACHE)
36 +
37 +#if defined(__i386__)
38 +#define MAX_MCACHE_SIZE 0x40000000 /* 1GB max for x86 */
39 +#define MCACHE_BUCKET_SHIFT 16
40 +#elif defined(__x86_64__)
41 +#define MAX_MCACHE_SIZE 0x1000000000 /* 64GB max for x86_64 */
42 +#define MCACHE_BUCKET_SHIFT 20
43 +#endif
44 +
45 +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
46 +
47 +#define BITS_PER_LONG (sizeof(long)*8)
48 +#define BITS_TO_LONGS(bits) \
49 + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
50 +#define DECLARE_BITMAP(name,bits) \
51 + unsigned long name[BITS_TO_LONGS(bits)]
52 +#define test_bit(bit,map) \
53 + (!!((map)[(bit)/BITS_PER_LONG] & (1UL << ((bit)%BITS_PER_LONG))))
54 +
55 +struct map_cache {
56 + unsigned long paddr_index;
57 + uint8_t *vaddr_base;
58 + DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE>>PAGE_SHIFT);
59 +};
60 +
61 +static struct map_cache *mapcache_entry;
62 +static unsigned long nr_buckets;
63 +
64 +/* For most cases (>99.9%), the page address is the same. */
65 +static unsigned long last_address_index = ~0UL;
66 +static uint8_t *last_address_vaddr;
67 +
68 +static int qemu_map_cache_init(void)
69 +{
70 + unsigned long size;
71 +
72 + nr_buckets = (((MAX_MCACHE_SIZE >> PAGE_SHIFT) +
73 + (1UL << (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)) - 1) >>
74 + (MCACHE_BUCKET_SHIFT - PAGE_SHIFT));
75 + fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets);
76 +
77 + /*
78 + * Use mmap() directly: lets us allocate a big hash table with no up-front
79 + * cost in storage space. The OS will allocate memory only for the buckets
80 + * that we actually use. All others will contain all zeroes.
81 + */
82 + size = nr_buckets * sizeof(struct map_cache);
83 + size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
84 + mapcache_entry = mmap(NULL, size, PROT_READ|PROT_WRITE,
85 + MAP_SHARED|MAP_ANONYMOUS, 0, 0);
86 + if (mapcache_entry == MAP_FAILED) {
87 + errno = ENOMEM;
88 + return -1;
89 + }
90 +
91 + return 0;
92 +}
93 +
94 +static void qemu_remap_bucket(struct map_cache *entry,
95 + unsigned long address_index)
96 +{
97 + uint8_t *vaddr_base;
98 + unsigned long pfns[MCACHE_BUCKET_SIZE >> PAGE_SHIFT];
99 + unsigned int i, j;
100 +
101 + if (entry->vaddr_base != NULL) {
102 + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
103 + if (errno) {
104 + fprintf(logfile, "unmap fails %d\n", errno);
105 + exit(-1);
106 + }
107 + }
108 +
109 + for (i = 0; i < MCACHE_BUCKET_SIZE >> PAGE_SHIFT; i++)
110 + pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-PAGE_SHIFT)) + i;
111 +
112 + vaddr_base = xc_map_foreign_batch(xc_handle, domid, PROT_READ|PROT_WRITE,
113 + pfns, MCACHE_BUCKET_SIZE >> PAGE_SHIFT);
114 + if (vaddr_base == NULL) {
115 + fprintf(logfile, "xc_map_foreign_batch error %d\n", errno);
116 + exit(-1);
117 + }
118 +
119 + entry->vaddr_base = vaddr_base;
120 + entry->paddr_index = address_index;
121 +
122 + for (i = 0; i < MCACHE_BUCKET_SIZE >> PAGE_SHIFT; i += BITS_PER_LONG) {
123 + unsigned long word = 0;
124 + j = ((i + BITS_PER_LONG) > (MCACHE_BUCKET_SIZE >> PAGE_SHIFT)) ?
125 + (MCACHE_BUCKET_SIZE >> PAGE_SHIFT) % BITS_PER_LONG : BITS_PER_LONG;
126 + while (j > 0)
127 + word = (word << 1) | !(pfns[i + --j] & 0xF0000000UL);
128 + entry->valid_mapping[i / BITS_PER_LONG] = word;
129 + }
130 +}
131 +
132 +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr)
133 +{
134 + struct map_cache *entry;
135 + unsigned long address_index = phys_addr >> MCACHE_BUCKET_SHIFT;
136 + unsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1);
137 +
138 + if (address_index == last_address_index)
139 + return last_address_vaddr + address_offset;
140 +
141 + entry = &mapcache_entry[address_index % nr_buckets];
142 +
143 + if (entry->vaddr_base == NULL || entry->paddr_index != address_index ||
144 + !test_bit(address_offset>>PAGE_SHIFT, entry->valid_mapping))
145 + qemu_remap_bucket(entry, address_index);
146 +
147 + if (!test_bit(address_offset>>PAGE_SHIFT, entry->valid_mapping))
148 + return NULL;
149 +
150 + last_address_index = address_index;
151 + last_address_vaddr = entry->vaddr_base;
152 +
153 + return last_address_vaddr + address_offset;
154 +}
155 +
156 +void qemu_invalidate_map_cache(void)
157 +{
158 + unsigned long i;
159 +
160 + mapcache_lock();
161 +
162 + for (i = 0; i < nr_buckets; i++) {
163 + struct map_cache *entry = &mapcache_entry[i];
164 +
165 + if (entry->vaddr_base == NULL)
166 + continue;
167 +
168 + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
169 + if (errno) {
170 + fprintf(logfile, "unmap fails %d\n", errno);
171 + exit(-1);
172 + }
173 +
174 + entry->paddr_index = 0;
175 + entry->vaddr_base = NULL;
176 + }
177 +
178 + last_address_index = ~0UL;
179 + last_address_vaddr = NULL;
180 +
181 + mapcache_unlock();
182 +}
183 +
184 +#endif /* defined(MAPCACHE) */
185 +
186 int main(int argc, char **argv)
187 {
188 #ifdef CONFIG_GDBSTUB
189 @@ -6827,8 +6978,11 @@
190 unsigned long ioreq_pfn;
191 extern void *shared_page;
192 extern void *buffered_io_page;
193 - extern void *buffered_pio_page;
194 +#ifdef __ia64__
195 unsigned long nr_pages;
196 + xen_pfn_t *page_array;
197 + extern void *buffered_pio_page;
198 +#endif
200 char qemu_dm_logfilename[64];
202 @@ -7119,6 +7273,7 @@
203 break;
204 case QEMU_OPTION_m:
205 ram_size = atol(optarg) * 1024 * 1024;
206 + ram_size = (uint64_t)atol(optarg) * 1024 * 1024;
207 if (ram_size <= 0)
208 help();
209 #ifndef CONFIG_DM
210 @@ -7472,30 +7627,15 @@
212 #if defined(__i386__) || defined(__x86_64__)
214 - nr_pages = ram_size/PAGE_SIZE;
215 -
216 - page_array = (xen_pfn_t *)malloc(nr_pages * sizeof(xen_pfn_t));
217 - if (page_array == NULL) {
218 - fprintf(logfile, "malloc returned error %d\n", errno);
219 - exit(-1);
220 - }
221 -
222 - for ( i = 0; i < nr_pages; i++)
223 - page_array[i] = i;
224 -
225 - phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
226 - PROT_READ|PROT_WRITE, page_array,
227 - nr_pages);
228 - if (phys_ram_base == NULL) {
229 - fprintf(logfile, "batch map guest memory returned error %d\n", errno);
230 + if (qemu_map_cache_init()) {
231 + fprintf(logfile, "qemu_map_cache_init returned: error %d\n", errno);
232 exit(-1);
233 }
235 xc_get_hvm_param(xc_handle, domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn);
236 fprintf(logfile, "shared page at pfn %lx\n", ioreq_pfn);
237 shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
238 - PROT_READ|PROT_WRITE,
239 - page_array[ioreq_pfn]);
240 + PROT_READ|PROT_WRITE, ioreq_pfn);
241 if (shared_page == NULL) {
242 fprintf(logfile, "map shared IO page returned error %d\n", errno);
243 exit(-1);
244 @@ -7504,15 +7644,12 @@
245 xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn);
246 fprintf(logfile, "buffered io page at pfn %lx\n", ioreq_pfn);
247 buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
248 - PROT_READ|PROT_WRITE,
249 - page_array[ioreq_pfn]);
250 + PROT_READ|PROT_WRITE, ioreq_pfn);
251 if (buffered_io_page == NULL) {
252 fprintf(logfile, "map buffered IO page returned error %d\n", errno);
253 exit(-1);
254 }
256 - free(page_array);
257 -
258 #elif defined(__ia64__)
260 nr_pages = ram_size/PAGE_SIZE;
261 Index: ioemu/target-i386-dm/exec-dm.c
262 ===================================================================
263 --- ioemu.orig/target-i386-dm/exec-dm.c 2007-05-11 10:04:46.000000000 +0100
264 +++ ioemu/target-i386-dm/exec-dm.c 2007-05-11 10:04:52.000000000 +0100
265 @@ -36,6 +36,7 @@
267 #include "cpu.h"
268 #include "exec-all.h"
269 +#include "vl.h"
271 //#define DEBUG_TB_INVALIDATE
272 //#define DEBUG_FLUSH
273 @@ -127,10 +128,17 @@
274 FILE *logfile;
275 int loglevel;
277 +#ifdef MAPCACHE
278 +pthread_mutex_t mapcache_mutex;
279 +#endif
280 +
281 void cpu_exec_init(CPUState *env)
282 {
283 CPUState **penv;
284 int cpu_index;
285 +#ifdef MAPCACHE
286 + pthread_mutexattr_t mxattr;
287 +#endif
289 env->next_cpu = NULL;
290 penv = &first_cpu;
291 @@ -144,6 +152,14 @@
293 /* alloc dirty bits array */
294 phys_ram_dirty = qemu_malloc(phys_ram_size >> TARGET_PAGE_BITS);
295 +
296 +#ifdef MAPCACHE
297 + /* setup memory access mutex to protect mapcache */
298 + pthread_mutexattr_init(&mxattr);
299 + pthread_mutexattr_settype(&mxattr, PTHREAD_MUTEX_RECURSIVE);
300 + pthread_mutex_init(&mapcache_mutex, &mxattr);
301 + pthread_mutexattr_destroy(&mxattr);
302 +#endif
303 }
305 /* enable or disable low levels log */
306 @@ -414,16 +430,11 @@
307 return 0;
308 }
310 -static inline int paddr_is_ram(target_phys_addr_t addr)
311 -{
312 - /* Is this guest physical address RAM-backed? */
313 -#if defined(CONFIG_DM) && (defined(__i386__) || defined(__x86_64__))
314 - return ((addr < HVM_BELOW_4G_MMIO_START) ||
315 - (addr >= HVM_BELOW_4G_MMIO_START + HVM_BELOW_4G_MMIO_LENGTH));
316 -#else
317 - return (addr < ram_size);
318 +#if defined(__i386__) || defined(__x86_64__)
319 +#define phys_ram_addr(x) (qemu_map_cache(x))
320 +#elif defined(__ia64__)
321 +#define phys_ram_addr(x) ((addr < ram_size) ? (phys_ram_base + (x)) : NULL)
322 #endif
323 -}
325 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
326 int len, int is_write)
327 @@ -431,13 +442,15 @@
328 int l, io_index;
329 uint8_t *ptr;
330 uint32_t val;
331 -
332 +
333 + mapcache_lock();
334 +
335 while (len > 0) {
336 /* How much can we copy before the next page boundary? */
337 l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK);
338 if (l > len)
339 l = len;
340 -
341 +
342 io_index = iomem_index(addr);
343 if (is_write) {
344 if (io_index) {
345 @@ -457,11 +470,11 @@
346 io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
347 l = 1;
348 }
349 - } else if (paddr_is_ram(addr)) {
350 + } else if ((ptr = phys_ram_addr(addr)) != NULL) {
351 /* Reading from RAM */
352 - memcpy(phys_ram_base + addr, buf, l);
353 + memcpy(ptr, buf, l);
354 #ifdef __ia64__
355 - sync_icache((unsigned long)(phys_ram_base + addr), l);
356 + sync_icache(ptr, l);
357 #endif
358 }
359 } else {
360 @@ -482,9 +495,9 @@
361 stb_raw(buf, val);
362 l = 1;
363 }
364 - } else if (paddr_is_ram(addr)) {
365 + } else if ((ptr = phys_ram_addr(addr)) != NULL) {
366 /* Reading from RAM */
367 - memcpy(buf, phys_ram_base + addr, l);
368 + memcpy(buf, ptr, l);
369 } else {
370 /* Neither RAM nor known MMIO space */
371 memset(buf, 0xff, len);
372 @@ -494,6 +507,8 @@
373 buf += l;
374 addr += l;
375 }
376 +
377 + mapcache_unlock();
378 }
379 #endif
381 Index: ioemu/vl.h
382 ===================================================================
383 --- ioemu.orig/vl.h 2007-05-11 10:04:51.000000000 +0100
384 +++ ioemu/vl.h 2007-05-11 10:04:52.000000000 +0100
385 @@ -159,6 +159,28 @@
387 extern FILE *logfile;
389 +
390 +#if defined(__i386__) || defined(__x86_64__)
391 +
392 +#define MAPCACHE
393 +
394 +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr);
395 +void qemu_invalidate_map_cache(void);
396 +
397 +#include <pthread.h>
398 +extern pthread_mutex_t mapcache_mutex;
399 +#define mapcache_lock() pthread_mutex_lock(&mapcache_mutex)
400 +#define mapcache_unlock() pthread_mutex_unlock(&mapcache_mutex)
401 +
402 +#else
403 +
404 +#define qemu_invalidate_map_cache() ((void)0)
405 +
406 +#define mapcache_lock() ((void)0)
407 +#define mapcache_unlock() ((void)0)
408 +
409 +#endif
410 +
411 extern int xc_handle;
412 extern int domid;
414 Index: ioemu/target-i386-dm/cpu.h
415 ===================================================================
416 --- ioemu.orig/target-i386-dm/cpu.h 2007-05-11 10:04:47.000000000 +0100
417 +++ ioemu/target-i386-dm/cpu.h 2007-05-11 10:04:52.000000000 +0100
418 @@ -25,7 +25,8 @@
419 #ifdef TARGET_X86_64
420 #define TARGET_LONG_BITS 64
421 #else
422 -#define TARGET_LONG_BITS 32
423 +/* #define TARGET_LONG_BITS 32 */
424 +#define TARGET_LONG_BITS 64 /* for Qemu map cache */
425 #endif
427 /* target supports implicit self modifying code */
428 Index: ioemu/target-i386-dm/helper2.c
429 ===================================================================
430 --- ioemu.orig/target-i386-dm/helper2.c 2007-05-11 10:04:50.000000000 +0100
431 +++ ioemu/target-i386-dm/helper2.c 2007-05-11 10:04:52.000000000 +0100
432 @@ -526,6 +526,9 @@
433 case IOREQ_TYPE_TIMEOFFSET:
434 cpu_ioreq_timeoffset(env, req);
435 break;
436 + case IOREQ_TYPE_INVALIDATE:
437 + qemu_invalidate_map_cache();
438 + break;
439 default:
440 hw_error("Invalid ioreq type 0x%x\n", req->type);
441 }