debuggers.hg
changeset 16:790c2f0eaf7c
REFRESH to unstable changeset 18414. NO debugger changes.
line diff
1.1 --- a/Config.mk Tue Sep 02 16:34:53 2008 -0700 1.2 +++ b/Config.mk Tue Sep 02 16:55:55 2008 -0700 1.3 @@ -93,11 +93,7 @@ QEMU_REMOTE=http://xenbits.xensource.com 1.4 # Mercurial in-tree version, or a local directory, or a git URL. 1.5 # CONFIG_QEMU ?= ioemu 1.6 # CONFIG_QEMU ?= ../qemu-xen.git 1.7 -ifeq ($(XEN_TARGET_ARCH),ia64) 1.8 -CONFIG_QEMU ?= ioemu 1.9 -else 1.10 CONFIG_QEMU ?= $(QEMU_REMOTE) 1.11 -endif 1.12 1.13 # Optional components 1.14 XENSTAT_XENTOP ?= y
2.1 --- a/docs/misc/vtpm.txt Tue Sep 02 16:34:53 2008 -0700 2.2 +++ b/docs/misc/vtpm.txt Tue Sep 02 16:55:55 2008 -0700 2.3 @@ -92,8 +92,8 @@ the actual instance number that is assig 2.4 can be different. This is the case if for example that particular 2.5 instance is already used by another virtual machine. The association 2.6 of which TPM instance number is used by which virtual machine is 2.7 -kept in the file /etc/xen/vtpm.db. Associations are maintained by 2.8 -domain name and instance number. 2.9 +kept in the file /var/vtpm/vtpm.db. Associations are maintained by 2.10 +a xend-internal vTPM UUID and vTPM instance number. 2.11 2.12 Note: If you do not want TPM functionality for your user domain simply 2.13 leave out the 'vtpm' line in the configuration file.
3.1 --- a/docs/src/user.tex Tue Sep 02 16:34:53 2008 -0700 3.2 +++ b/docs/src/user.tex Tue Sep 02 16:55:55 2008 -0700 3.3 @@ -22,7 +22,7 @@ 3.4 \vfill 3.5 \begin{tabular}{l} 3.6 {\Huge \bf Users' Manual} \\[4mm] 3.7 -{\huge Xen v3.0} \\[80mm] 3.8 +{\huge Xen v3.3} \\[80mm] 3.9 \end{tabular} 3.10 \end{center} 3.11 3.12 @@ -42,9 +42,7 @@ welcome.} 3.13 3.14 \vspace*{\fill} 3.15 3.16 -Xen is Copyright \copyright 2002-2005, University of Cambridge, UK, XenSource 3.17 -Inc., IBM Corp., Hewlett-Packard Co., Intel Corp., AMD Inc., and others. All 3.18 -rights reserved. 3.19 +Xen is Copyright \copyright 2002-2008, Citrix Systems, Inc., University of Cambridge, UK, XenSource Inc., IBM Corp., Hewlett-Packard Co., Intel Corp., AMD Inc., and others. All rights reserved. 3.20 3.21 Xen is an open-source project. Most portions of Xen are licensed for copying 3.22 under the terms of the GNU General Public License, version 2. Other portions 3.23 @@ -116,16 +114,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3.24 3.25 3.26 Xen is an open-source \emph{para-virtualizing} virtual machine monitor 3.27 -(VMM), or ``hypervisor'', for the x86 processor architecture. Xen can 3.28 -securely execute multiple virtual machines on a single physical system 3.29 -with close-to-native performance. Xen facilitates enterprise-grade 3.30 -functionality, including: 3.31 +(VMM), or ``hypervisor'', for a variety of processor architectures including x86. Xen can securely execute multiple virtual machines on a single physical system with near native performance. Xen facilitates enterprise-grade functionality, including: 3.32 3.33 \begin{itemize} 3.34 \item Virtual machines with performance close to native hardware. 3.35 \item Live migration of running virtual machines between physical hosts. 3.36 \item Up to 32\footnote{IA64 supports up to 64 virtual CPUs per guest virtual machine} virtual CPUs per guest virtual machine, with VCPU hotplug. 3.37 -\item x86/32, x86/32 with PAE, x86/64, IA64 and Power platform support. 3.38 +\item x86/32 with PAE, x86/64, and IA64 platform support. 3.39 \item Intel and AMD Virtualization Technology for unmodified guest operating systems (including Microsoft Windows). 3.40 \item Excellent hardware support (supports almost all Linux device 3.41 drivers). 3.42 @@ -182,22 +177,20 @@ unmodified guests running natively on th 3.43 3.44 Paravirtualized Xen support is available for increasingly many 3.45 operating systems: currently, mature Linux support is available and 3.46 -included in the standard distribution. Other OS ports---including 3.47 -NetBSD, FreeBSD and Solaris x86 v10---are nearing completion. 3.48 +included in the standard distribution. Other OS ports, including 3.49 +NetBSD, FreeBSD and Solaris are also complete. 3.50 3.51 3.52 \section{Hardware Support} 3.53 3.54 -Xen currently runs on the x86 architecture, requiring a ``P6'' or 3.55 -newer processor (e.g.\ Pentium Pro, Celeron, Pentium~II, Pentium~III, 3.56 -Pentium~IV, Xeon, AMD~Athlon, AMD~Duron). Multiprocessor machines are 3.57 -supported, and there is support for HyperThreading (SMT). In 3.58 -addition, ports to IA64 and Power architectures are supported. 3.59 - 3.60 -The default 32-bit Xen supports for Intel's Physical Addressing Extensions (PAE), which enable x86/32 machines to address up to 64 GB of physical memory. 3.61 -It also supports non-PAE 32-bit Xen up to 4GB of memory. 3.62 -Xen also supports x86/64 platforms such as Intel EM64T and AMD Opteron 3.63 -which can currently address up to 1TB of physical memory. 3.64 +Xen currently runs on the IA64 and x86 architectures. Multiprocessor 3.65 +machines are supported, and there is support for HyperThreading (SMT). 3.66 + 3.67 +The default 32-bit Xen requires processor support for Physical 3.68 +Addressing Extensions (PAE), which enables the hypervisor to address 3.69 +up to 16GB of physical memory. Xen also supports x86/64 platforms 3.70 +such as Intel EM64T and AMD Opteron which can currently address up to 3.71 +1TB of physical memory. 3.72 3.73 Xen offloads most of the hardware support issues to the guest OS 3.74 running in the \emph{Domain~0} management virtual machine. Xen itself 3.75 @@ -253,8 +246,8 @@ with pointers to papers and technical re 3.76 Xen has grown into a fully-fledged project in its own right, enabling us 3.77 to investigate interesting research issues regarding the best techniques 3.78 for virtualizing resources such as the CPU, memory, disk and network. 3.79 -Project contributors now include XenSource, Intel, IBM, HP, AMD, Novell, 3.80 -RedHat. 3.81 +Project contributors now include Citrix, Intel, IBM, HP, AMD, Novell, 3.82 +RedHat, Sun, Fujitsu, and Samsung. 3.83 3.84 Xen was first described in a paper presented at SOSP in 3.85 2003\footnote{\tt 3.86 @@ -265,25 +258,20 @@ sites. 3.87 3.88 \section{What's New} 3.89 3.90 -Xen 3.0.0 offers: 3.91 +Xen 3.3.0 offers: 3.92 3.93 \begin{itemize} 3.94 -\item Support for up to 32-way SMP guest operating systems 3.95 -\item Intel (Physical Addressing Extensions) PAE to support 32-bit 3.96 - servers with more than 4GB physical memory 3.97 -\item x86/64 support (Intel EM64T, AMD Opteron) 3.98 -\item Intel VT-x support to enable the running of unmodified guest 3.99 -operating systems (Windows XP/2003, Legacy Linux) 3.100 -\item Enhanced control tools 3.101 -\item Improved ACPI support 3.102 -\item AGP/DRM graphics 3.103 +\item IO Emulation (stub domains) for HVM IO performance and scailability 3.104 +\item Replacement of Intel VT vmxassist by new 16b emulation code 3.105 +\item Improved VT-d device pass-through e.g. for graphics devices 3.106 +\item Enhanced C and P state power management 3.107 +\item Exploitation of multi-queue support on modern NICs 3.108 +\item Removal of domain lock for improved PV guest scalability 3.109 +\item 2MB page support for HVM and PV guests 3.110 +\item CPU Portability 3.111 \end{itemize} 3.112 3.113 - 3.114 -Xen 3.0 features greatly enhanced hardware support, configuration 3.115 -flexibility, usability and a larger complement of supported operating 3.116 -systems. This latest release takes Xen a step closer to being the 3.117 -definitive open source solution for virtualization. 3.118 +Xen 3.3 delivers the capabilities needed by enterprise customers and gives computing industry leaders a solid, secure platform to build upon for their virtualization solutions. This latest release establishes Xen as the definitive open source solution for virtualization. 3.119 3.120 3.121 3.122 @@ -295,7 +283,7 @@ definitive open source solution for virt 3.123 The Xen distribution includes three main components: Xen itself, ports 3.124 of Linux and NetBSD to run on Xen, and the userspace tools required to 3.125 manage a Xen-based system. This chapter describes how to install the 3.126 -Xen~3.0 distribution from source. Alternatively, there may be pre-built 3.127 +Xen~3.3 distribution from source. Alternatively, there may be pre-built 3.128 packages available as part of your operating system distribution. 3.129 3.130 3.131 @@ -4029,9 +4017,8 @@ files: \path{Config.mk} and \path{Makefi 3.132 3.133 The former allows the overall build target architecture to be 3.134 specified. You will typically not need to modify this unless 3.135 -you are cross-compiling or if you wish to build a non-PAE 3.136 -Xen system. Additional configuration options are documented 3.137 -in the \path{Config.mk} file. 3.138 +you are cross-compiling. Additional configuration options are 3.139 +documented in the \path{Config.mk} file. 3.140 3.141 The top-level \path{Makefile} is chiefly used to customize the set of 3.142 kernels built. Look for the line:
4.1 --- a/docs/xen-api/xenapi-datamodel-graph.dot Tue Sep 02 16:34:53 2008 -0700 4.2 +++ b/docs/xen-api/xenapi-datamodel-graph.dot Tue Sep 02 16:55:55 2008 -0700 4.3 @@ -14,7 +14,7 @@ fontname="Verdana"; 4.4 4.5 node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user XSPolicy ACMPolicy; 4.6 node [shape=ellipse]; PIF_metrics VIF_metrics VM_metrics VBD_metrics PBD_metrics VM_guest_metrics host_metrics; 4.7 -node [shape=box]; DPCI PPCI host_cpu console 4.8 +node [shape=box]; DPCI PPCI host_cpu console VTPM 4.9 session -> host [ arrowhead="none" ] 4.10 session -> user [ arrowhead="none" ] 4.11 VM -> VM_metrics [ arrowhead="none" ]
5.1 --- a/extras/mini-os/include/posix/dirent.h Tue Sep 02 16:34:53 2008 -0700 5.2 +++ b/extras/mini-os/include/posix/dirent.h Tue Sep 02 16:55:55 2008 -0700 5.3 @@ -1,7 +1,7 @@ 5.4 #ifndef _POSIX_DIRENT_H 5.5 #define _POSIX_DIRENT_H 5.6 5.7 -#include <sys/types.h> 5.8 +#include <stdint.h> 5.9 5.10 struct dirent { 5.11 char *d_name;
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/extras/mini-os/include/posix/err.h Tue Sep 02 16:55:55 2008 -0700 6.3 @@ -0,0 +1,15 @@ 6.4 +#ifndef _POSIX_ERR_H 6.5 +#define _POSIX_ERR_H 6.6 + 6.7 +#include <stdarg.h> 6.8 + 6.9 +void err(int eval, const char *fmt, ...); 6.10 +void errx(int eval, const char *fmt, ...); 6.11 +void warn(const char *fmt, ...); 6.12 +void warnx(const char *fmt, ...); 6.13 +void verr(int eval, const char *fmt, va_list args); 6.14 +void verrx(int eval, const char *fmt, va_list args); 6.15 +void vwarn(const char *fmt, va_list args); 6.16 +void vwarnx(const char *fmt, va_list args); 6.17 + 6.18 +#endif /* _POSIX_ERR_H */
7.1 --- a/extras/mini-os/include/posix/pthread.h Tue Sep 02 16:34:53 2008 -0700 7.2 +++ b/extras/mini-os/include/posix/pthread.h Tue Sep 02 16:55:55 2008 -0700 7.3 @@ -31,8 +31,15 @@ static inline int pthread_key_delete(pth 7.4 7.5 7.6 7.7 +typedef struct {} pthread_mutexattr_t; 7.8 +static inline int pthread_mutexattr_init(pthread_mutexattr_t *mattr) { return 0; } 7.9 +#define PTHREAD_MUTEX_NORMAL 0 7.10 +#define PTHREAD_MUTEX_RECURSIVE 1 7.11 +static inline int pthread_mutexattr_settype(pthread_mutexattr_t *mattr, int kind) { return 0; } 7.12 +static inline int pthread_mutexattr_destroy(pthread_mutexattr_t *mattr) { return 0; } 7.13 typedef struct {} pthread_mutex_t; 7.14 #define PTHREAD_MUTEX_INITIALIZER {} 7.15 +static inline int pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *mattr) { return 0; } 7.16 static inline int pthread_mutex_lock(pthread_mutex_t *mutex) { return 0; } 7.17 static inline int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; } 7.18
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/extras/mini-os/include/posix/syslog.h Tue Sep 02 16:55:55 2008 -0700 8.3 @@ -0,0 +1,37 @@ 8.4 +#ifndef _POSIX_SYSLOG_H 8.5 +#define _POSIX_SYSLOG_H 8.6 + 8.7 +#include <stdarg.h> 8.8 + 8.9 +#define LOG_PID 0 8.10 +#define LOG_CONS 0 8.11 +#define LOG_NDELAY 0 8.12 +#define LOG_ODELAY 0 8.13 +#define LOG_NOWAIT 0 8.14 + 8.15 +#define LOG_KERN 0 8.16 +#define LOG_USER 0 8.17 +#define LOG_MAIL 0 8.18 +#define LOG_NEWS 0 8.19 +#define LOG_UUCP 0 8.20 +#define LOG_DAEMON 0 8.21 +#define LOG_AUTH 0 8.22 +#define LOG_CRON 0 8.23 +#define LOG_LPR 0 8.24 + 8.25 +/* TODO: support */ 8.26 +#define LOG_EMERG 0 8.27 +#define LOG_ALERT 1 8.28 +#define LOG_CRIT 2 8.29 +#define LOG_ERR 3 8.30 +#define LOG_WARNING 4 8.31 +#define LOG_NOTICE 5 8.32 +#define LOG_INFO 6 8.33 +#define LOG_DEBUG 7 8.34 + 8.35 +void openlog(const char *ident, int option, int facility); 8.36 +void syslog(int priority, const char *format, ...); 8.37 +void closelog(void); 8.38 +void vsyslog(int priority, const char *format, va_list ap); 8.39 + 8.40 +#endif /* _POSIX_SYSLOG_H */
9.1 --- a/extras/mini-os/include/xenbus.h Tue Sep 02 16:34:53 2008 -0700 9.2 +++ b/extras/mini-os/include/xenbus.h Tue Sep 02 16:55:55 2008 -0700 9.3 @@ -83,12 +83,13 @@ char *xenbus_transaction_end(xenbus_tran 9.4 int *retry); 9.5 9.6 /* Read path and parse it as an integer. Returns -1 on error. */ 9.7 -int xenbus_read_integer(char *path); 9.8 +int xenbus_read_integer(const char *path); 9.9 9.10 /* Contraction of snprintf and xenbus_write(path/node). */ 9.11 char* xenbus_printf(xenbus_transaction_t xbt, 9.12 - char* node, char* path, 9.13 - char* fmt, ...); 9.14 + const char* node, const char* path, 9.15 + const char* fmt, ...) 9.16 + __attribute__((__format__(printf, 4, 5))); 9.17 9.18 /* Reset the XenBus system. */ 9.19 void fini_xenbus(void);
10.1 --- a/extras/mini-os/lib/sys.c Tue Sep 02 16:34:53 2008 -0700 10.2 +++ b/extras/mini-os/lib/sys.c Tue Sep 02 16:55:55 2008 -0700 10.3 @@ -1007,6 +1007,96 @@ LWIP_STUB(ssize_t, sendto, (int s, void 10.4 LWIP_STUB(int, getsockname, (int s, struct sockaddr *name, socklen_t *namelen), (s, name, namelen)) 10.5 #endif 10.6 10.7 +static char *syslog_ident; 10.8 +void openlog(const char *ident, int option, int facility) 10.9 +{ 10.10 + if (syslog_ident) 10.11 + free(syslog_ident); 10.12 + syslog_ident = strdup(ident); 10.13 +} 10.14 + 10.15 +void vsyslog(int priority, const char *format, va_list ap) 10.16 +{ 10.17 + printk("%s: ", syslog_ident); 10.18 + print(0, format, ap); 10.19 +} 10.20 + 10.21 +void syslog(int priority, const char *format, ...) 10.22 +{ 10.23 + va_list ap; 10.24 + va_start(ap, format); 10.25 + vsyslog(priority, format, ap); 10.26 + va_end(ap); 10.27 +} 10.28 + 10.29 +void closelog(void) 10.30 +{ 10.31 + free(syslog_ident); 10.32 + syslog_ident = NULL; 10.33 +} 10.34 + 10.35 +void vwarn(const char *format, va_list ap) 10.36 +{ 10.37 + int the_errno = errno; 10.38 + printk("stubdom: "); 10.39 + if (format) { 10.40 + print(0, format, ap); 10.41 + printk(", "); 10.42 + } 10.43 + printk("%s", strerror(the_errno)); 10.44 +} 10.45 + 10.46 +void warn(const char *format, ...) 10.47 +{ 10.48 + va_list ap; 10.49 + va_start(ap, format); 10.50 + vwarn(format, ap); 10.51 + va_end(ap); 10.52 +} 10.53 + 10.54 +void verr(int eval, const char *format, va_list ap) 10.55 +{ 10.56 + vwarn(format, ap); 10.57 + exit(eval); 10.58 +} 10.59 + 10.60 +void err(int eval, const char *format, ...) 10.61 +{ 10.62 + va_list ap; 10.63 + va_start(ap, format); 10.64 + verr(eval, format, ap); 10.65 + va_end(ap); 10.66 +} 10.67 + 10.68 +void vwarnx(const char *format, va_list ap) 10.69 +{ 10.70 + printk("stubdom: "); 10.71 + if (format) 10.72 + print(0, format, ap); 10.73 +} 10.74 + 10.75 +void warnx(const char *format, ...) 10.76 +{ 10.77 + va_list ap; 10.78 + va_start(ap, format); 10.79 + vwarnx(format, ap); 10.80 + va_end(ap); 10.81 +} 10.82 + 10.83 +void verrx(int eval, const char *format, va_list ap) 10.84 +{ 10.85 + vwarnx(format, ap); 10.86 + exit(eval); 10.87 +} 10.88 + 10.89 +void errx(int eval, const char *format, ...) 10.90 +{ 10.91 + va_list ap; 10.92 + va_start(ap, format); 10.93 + verrx(eval, format, ap); 10.94 + va_end(ap); 10.95 +} 10.96 + 10.97 int nanosleep(const struct timespec *req, struct timespec *rem) 10.98 { 10.99 s_time_t start = NOW(); 10.100 @@ -1115,34 +1205,47 @@ void *mmap(void *start, size_t length, i 10.101 } else ASSERT(0); 10.102 } 10.103 10.104 +#define UNMAP_BATCH ((STACK_SIZE / 2) / sizeof(multicall_entry_t)) 10.105 int munmap(void *start, size_t length) 10.106 { 10.107 - int i, n = length / PAGE_SIZE; 10.108 - multicall_entry_t call[n]; 10.109 - unsigned char (*data)[PAGE_SIZE] = start; 10.110 - int ret; 10.111 + int total = length / PAGE_SIZE; 10.112 ASSERT(!((unsigned long)start & ~PAGE_MASK)); 10.113 - ASSERT(!(length & ~PAGE_MASK)); 10.114 + while (total) { 10.115 + int n = UNMAP_BATCH; 10.116 + if (n > total) 10.117 + n = total; 10.118 + { 10.119 + int i; 10.120 + multicall_entry_t call[n]; 10.121 + unsigned char (*data)[PAGE_SIZE] = start; 10.122 + int ret; 10.123 10.124 - for (i = 0; i < n; i++) { 10.125 - call[i].op = __HYPERVISOR_update_va_mapping; 10.126 - call[i].args[0] = (unsigned long) &data[i]; 10.127 - call[i].args[1] = 0; 10.128 - call[i].args[2] = 0; 10.129 - call[i].args[3] = UVMF_INVLPG; 10.130 - } 10.131 + for (i = 0; i < n; i++) { 10.132 + int arg = 0; 10.133 + call[i].op = __HYPERVISOR_update_va_mapping; 10.134 + call[i].args[arg++] = (unsigned long) &data[i]; 10.135 + call[i].args[arg++] = 0; 10.136 +#ifdef __i386__ 10.137 + call[i].args[arg++] = 0; 10.138 +#endif 10.139 + call[i].args[arg++] = UVMF_INVLPG; 10.140 + } 10.141 10.142 - ret = HYPERVISOR_multicall(call, n); 10.143 - if (ret) { 10.144 - errno = -ret; 10.145 - return -1; 10.146 - } 10.147 + ret = HYPERVISOR_multicall(call, n); 10.148 + if (ret) { 10.149 + errno = -ret; 10.150 + return -1; 10.151 + } 10.152 10.153 - for (i = 0; i < n; i++) { 10.154 - if (call[i].result) { 10.155 - errno = call[i].result; 10.156 - return -1; 10.157 - } 10.158 + for (i = 0; i < n; i++) { 10.159 + if (call[i].result) { 10.160 + errno = call[i].result; 10.161 + return -1; 10.162 + } 10.163 + } 10.164 + } 10.165 + start = (char *)start + n * PAGE_SIZE; 10.166 + total -= n; 10.167 } 10.168 return 0; 10.169 }
11.1 --- a/extras/mini-os/main.c Tue Sep 02 16:34:53 2008 -0700 11.2 +++ b/extras/mini-os/main.c Tue Sep 02 16:55:55 2008 -0700 11.3 @@ -42,7 +42,7 @@ void _fini(void) 11.4 extern char __app_bss_start, __app_bss_end; 11.5 static void call_main(void *p) 11.6 { 11.7 - char *c; 11.8 + char *c, quote; 11.9 #ifdef CONFIG_QEMU 11.10 char *domargs, *msg; 11.11 #endif 11.12 @@ -101,32 +101,53 @@ static void call_main(void *p) 11.13 11.14 argc = 1; 11.15 11.16 -#define PARSE_ARGS(ARGS,START,END) \ 11.17 +#define PARSE_ARGS(ARGS,START,QUOTE,END) \ 11.18 c = ARGS; \ 11.19 + quote = 0; \ 11.20 while (*c) { \ 11.21 if (*c != ' ') { \ 11.22 START; \ 11.23 - while (*c && *c != ' ') \ 11.24 + while (*c) { \ 11.25 + if (quote) { \ 11.26 + if (*c == quote) { \ 11.27 + quote = 0; \ 11.28 + QUOTE; \ 11.29 + continue; \ 11.30 + } \ 11.31 + } else if (*c == ' ') \ 11.32 + break; \ 11.33 + if (*c == '"' || *c == '\'') { \ 11.34 + quote = *c; \ 11.35 + QUOTE; \ 11.36 + continue; \ 11.37 + } \ 11.38 c++; \ 11.39 + } \ 11.40 } else { \ 11.41 END; \ 11.42 while (*c == ' ') \ 11.43 c++; \ 11.44 } \ 11.45 + } \ 11.46 + if (quote) {\ 11.47 + printk("Warning: unterminated quotation %c\n", quote); \ 11.48 + quote = 0; \ 11.49 } 11.50 +#define PARSE_ARGS_COUNT(ARGS) PARSE_ARGS(ARGS, argc++, c++, ) 11.51 +#define PARSE_ARGS_STORE(ARGS) PARSE_ARGS(ARGS, argv[argc++] = c, memmove(c, c + 1, strlen(c + 1) + 1), *c++ = 0) 11.52 11.53 - PARSE_ARGS((char*)start_info.cmd_line, argc++, ); 11.54 + PARSE_ARGS_COUNT((char*)start_info.cmd_line); 11.55 #ifdef CONFIG_QEMU 11.56 - PARSE_ARGS(domargs, argc++, ); 11.57 + PARSE_ARGS_COUNT(domargs); 11.58 #endif 11.59 11.60 argv = alloca((argc + 1) * sizeof(char *)); 11.61 argv[0] = "main"; 11.62 argc = 1; 11.63 11.64 - PARSE_ARGS((char*)start_info.cmd_line, argv[argc++] = c, *c++ = 0) 11.65 + PARSE_ARGS_STORE((char*)start_info.cmd_line) 11.66 #ifdef CONFIG_QEMU 11.67 - PARSE_ARGS(domargs, argv[argc++] = c, *c++ = 0) 11.68 + PARSE_ARGS_STORE(domargs) 11.69 #endif 11.70 11.71 argv[argc] = NULL;
12.1 --- a/extras/mini-os/xenbus/xenbus.c Tue Sep 02 16:34:53 2008 -0700 12.2 +++ b/extras/mini-os/xenbus/xenbus.c Tue Sep 02 16:55:55 2008 -0700 12.3 @@ -633,7 +633,7 @@ xenbus_transaction_end(xenbus_transactio 12.4 return NULL; 12.5 } 12.6 12.7 -int xenbus_read_integer(char *path) 12.8 +int xenbus_read_integer(const char *path) 12.9 { 12.10 char *res, *buf; 12.11 int t; 12.12 @@ -650,8 +650,8 @@ int xenbus_read_integer(char *path) 12.13 } 12.14 12.15 char* xenbus_printf(xenbus_transaction_t xbt, 12.16 - char* node, char* path, 12.17 - char* fmt, ...) 12.18 + const char* node, const char* path, 12.19 + const char* fmt, ...) 12.20 { 12.21 #define BUFFER_SIZE 256 12.22 char fullpath[BUFFER_SIZE];
13.1 --- a/stubdom/Makefile Tue Sep 02 16:34:53 2008 -0700 13.2 +++ b/stubdom/Makefile Tue Sep 02 16:55:55 2008 -0700 13.3 @@ -9,7 +9,7 @@ include $(XEN_ROOT)/Config.mk 13.4 13.5 override CONFIG_QEMU=ioemu 13.6 13.7 -IOEMU_OPTIONS=--disable-sdl --disable-opengl --disable-gfx-check --disable-vnc-tls --disable-brlapi --disable-kqemu 13.8 +IOEMU_OPTIONS=--disable-sdl --disable-opengl --disable-vnc-tls --disable-brlapi --disable-kqemu 13.9 ZLIB_URL?=http://www.zlib.net 13.10 ZLIB_VERSION=1.2.3 13.11 LIBPCI_URL?=http://www.kernel.org/pub/software/utils/pciutils 13.12 @@ -53,7 +53,7 @@ TARGET_CFLAGS += $(call cc-option,$(CC), 13.13 TARGET_CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,) 13.14 13.15 # Do not use host headers and libs 13.16 -GCC_INSTALL = $(shell gcc -print-search-dirs | sed -n -e 's/install: \(.*\)/\1/p') 13.17 +GCC_INSTALL = $(shell LANG=C gcc -print-search-dirs | sed -n -e 's/install: \(.*\)/\1/p') 13.18 TARGET_CPPFLAGS += -U __linux__ -U __FreeBSD__ -U __sun__ 13.19 TARGET_CPPFLAGS += -nostdinc 13.20 TARGET_CPPFLAGS += -isystem $(CURDIR)/$(MINI_OS)/include/posix
14.1 --- a/tools/Makefile Tue Sep 02 16:34:53 2008 -0700 14.2 +++ b/tools/Makefile Tue Sep 02 16:55:55 2008 -0700 14.3 @@ -59,8 +59,7 @@ clean distclean: subdirs-clean 14.4 ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH)) 14.5 IOEMU_CONFIGURE_CROSS ?= --cpu=$(XEN_TARGET_ARCH) \ 14.6 --cross-prefix=$(CROSS_COMPILE) \ 14.7 - --interp-prefix=$(CROSS_SYS_ROOT) \ 14.8 - --install=$(CURDIR)/cross-install 14.9 + --interp-prefix=$(CROSS_SYS_ROOT) 14.10 endif 14.11 14.12 ioemu/config-host.mak:
15.1 --- a/tools/Rules.mk Tue Sep 02 16:34:53 2008 -0700 15.2 +++ b/tools/Rules.mk Tue Sep 02 16:55:55 2008 -0700 15.3 @@ -5,6 +5,9 @@ all: 15.4 15.5 include $(XEN_ROOT)/Config.mk 15.6 15.7 +export _INSTALL := $(INSTALL) 15.8 +INSTALL = $(XEN_ROOT)/tools/cross-install 15.9 + 15.10 XEN_INCLUDE = $(XEN_ROOT)/tools/include 15.11 XEN_XC = $(XEN_ROOT)/tools/python/xen/lowlevel/xc 15.12 XEN_LIBXC = $(XEN_ROOT)/tools/libxc
16.1 --- a/tools/cross-install Tue Sep 02 16:34:53 2008 -0700 16.2 +++ b/tools/cross-install Tue Sep 02 16:55:55 2008 -0700 16.3 @@ -5,4 +5,4 @@ if [ -n "$CROSS_BIN_PATH" ]; then 16.4 PATH="$CROSS_BIN_PATH:$PATH" 16.5 fi 16.6 16.7 -exec install "$@" 16.8 +exec $_INSTALL "$@"
17.1 --- a/tools/examples/xmexample.pv-grub Tue Sep 02 16:34:53 2008 -0700 17.2 +++ b/tools/examples/xmexample.pv-grub Tue Sep 02 16:55:55 2008 -0700 17.3 @@ -25,7 +25,7 @@ extra = "(hd0,0)/boot/grub/menu.lst" 17.4 # WARNING: Creating a domain with insufficient memory may cause out of 17.5 # memory errors. The domain needs enough memory to boot kernel 17.6 # and modules. Allocating less than 32MBs is not recommended. 17.7 -memory = 64 17.8 +memory = 128 17.9 17.10 # A name for your domain. All domains must have different names. 17.11 name = "ExampleDomain" 17.12 @@ -119,32 +119,6 @@ disk = [ 'phy:hda1,hda1,w' ] 17.13 #vtpm = [ 'instance=1,backend=0' ] 17.14 17.15 #---------------------------------------------------------------------------- 17.16 -# Set the kernel command line for the new domain. 17.17 -# You only need to define the IP parameters and hostname if the domain's 17.18 -# IP config doesn't, e.g. in ifcfg-eth0 or via DHCP. 17.19 -# You can use 'extra' to set the runlevel and custom environment 17.20 -# variables used by custom rc scripts (e.g. VMID=, usr= ). 17.21 - 17.22 -# Set if you want dhcp to allocate the IP address. 17.23 -#dhcp="dhcp" 17.24 -# Set netmask. 17.25 -#netmask= 17.26 -# Set default gateway. 17.27 -#gateway= 17.28 -# Set the hostname. 17.29 -#hostname= "vm%d" % vmid 17.30 - 17.31 -# Set root device. 17.32 -root = "/dev/hda1 ro" 17.33 - 17.34 -# Root device for nfs. 17.35 -#root = "/dev/nfs" 17.36 -# The nfs server. 17.37 -#nfs_server = '192.0.2.1' 17.38 -# Root directory on the nfs server. 17.39 -#nfs_root = '/full/path/to/root/directory' 17.40 - 17.41 -#---------------------------------------------------------------------------- 17.42 # Configure the behaviour when a domain exits. There are three 'reasons' 17.43 # for a domain to stop: poweroff, reboot, and crash. For each of these you 17.44 # may specify:
18.1 --- a/tools/firmware/Rules.mk Tue Sep 02 16:34:53 2008 -0700 18.2 +++ b/tools/firmware/Rules.mk Tue Sep 02 16:55:55 2008 -0700 18.3 @@ -6,6 +6,10 @@ CFLAGS := 18.4 18.5 include $(XEN_ROOT)/tools/Rules.mk 18.6 18.7 +ifneq ($(debug),y) 18.8 +CFLAGS += -DNDEBUG 18.9 +endif 18.10 + 18.11 CFLAGS += -Werror 18.12 18.13 # Disable PIE/SSP if GCC supports them. They can break us.
19.1 --- a/tools/firmware/hvmloader/32bitbios_support.c Tue Sep 02 16:34:53 2008 -0700 19.2 +++ b/tools/firmware/hvmloader/32bitbios_support.c Tue Sep 02 16:55:55 2008 -0700 19.3 @@ -76,7 +76,7 @@ static void relocate_32bitbios(char *elf 19.4 */ 19.5 reloc_size = reloc_off; 19.6 printf("%d bytes of ROMBIOS high-memory extensions:\n", reloc_size); 19.7 - highbiosarea = (char *)(long)e820_malloc(reloc_size); 19.8 + highbiosarea = (char *)(long)e820_malloc(reloc_size, 0); 19.9 BUG_ON(highbiosarea == NULL); 19.10 printf(" Relocating to 0x%x-0x%x ... ", 19.11 (uint32_t)&highbiosarea[0],
20.1 --- a/tools/firmware/hvmloader/Makefile Tue Sep 02 16:34:53 2008 -0700 20.2 +++ b/tools/firmware/hvmloader/Makefile Tue Sep 02 16:55:55 2008 -0700 20.3 @@ -30,6 +30,9 @@ CFLAGS += $(CFLAGS_include) -I. 20.4 20.5 SRCS = hvmloader.c mp_tables.c util.c smbios.c 20.6 SRCS += 32bitbios_support.c smp.c cacheattr.c 20.7 +ifeq ($(debug),y) 20.8 +SRCS += tests.c 20.9 +endif 20.10 OBJS = $(patsubst %.c,%.o,$(SRCS)) 20.11 20.12 .PHONY: all
21.1 --- a/tools/firmware/hvmloader/acpi/Makefile Tue Sep 02 16:34:53 2008 -0700 21.2 +++ b/tools/firmware/hvmloader/acpi/Makefile Tue Sep 02 16:55:55 2008 -0700 21.3 @@ -22,8 +22,8 @@ C_SRC = build.c dsdt.c static_tables.c 21.4 H_SRC = $(wildcard *.h) 21.5 OBJS = $(patsubst %.c,%.o,$(C_SRC)) 21.6 21.7 -IASL_VER = acpica-unix-20060707 21.8 -IASL_URL = http://developer.intel.com/technology/iapc/acpi/downloads/$(IASL_VER).tar.gz 21.9 +IASL_VER = acpica-unix-20080729 21.10 +IASL_URL = http://acpica.org/download/$(IASL_VER).tar.gz 21.11 21.12 CFLAGS += -I. -I.. $(CFLAGS_include) 21.13 21.14 @@ -48,7 +48,7 @@ iasl: 21.15 @echo "ACPI ASL compiler(iasl) is needed" 21.16 @echo "Download Intel ACPI CA" 21.17 @echo "If wget failed, please download and compile manually from" 21.18 - @echo "http://developer.intel.com/technology/iapc/acpi/downloads.htm" 21.19 + @echo "http://acpica.org/downloads/" 21.20 @echo 21.21 wget $(IASL_URL) 21.22 tar xzf $(IASL_VER).tar.gz
22.1 --- a/tools/firmware/hvmloader/acpi/build.c Tue Sep 02 16:34:53 2008 -0700 22.2 +++ b/tools/firmware/hvmloader/acpi/build.c Tue Sep 02 16:55:55 2008 -0700 22.3 @@ -233,7 +233,7 @@ static int construct_secondary_tables(ui 22.4 tcpa->header.oem_revision = ACPI_OEM_REVISION; 22.5 tcpa->header.creator_id = ACPI_CREATOR_ID; 22.6 tcpa->header.creator_revision = ACPI_CREATOR_REVISION; 22.7 - tcpa->lasa = e820_malloc(ACPI_2_0_TCPA_LAML_SIZE); 22.8 + tcpa->lasa = e820_malloc(ACPI_2_0_TCPA_LAML_SIZE, 0); 22.9 if ( tcpa->lasa ) 22.10 { 22.11 tcpa->laml = ACPI_2_0_TCPA_LAML_SIZE; 22.12 @@ -363,7 +363,7 @@ void acpi_build_tables(void) 22.13 memset(buf, 0, high_sz); 22.14 22.15 /* Allocate data area and set up ACPI tables there. */ 22.16 - buf = (uint8_t *)e820_malloc(high_sz); 22.17 + buf = (uint8_t *)e820_malloc(high_sz, 0); 22.18 __acpi_build_tables(buf, &low_sz, &high_sz); 22.19 22.20 printf(" - Lo data: %08lx-%08lx\n"
23.1 --- a/tools/firmware/hvmloader/hvmloader.c Tue Sep 02 16:34:53 2008 -0700 23.2 +++ b/tools/firmware/hvmloader/hvmloader.c Tue Sep 02 16:55:55 2008 -0700 23.3 @@ -243,6 +243,13 @@ static void pci_setup(void) 23.4 bars[i].bar_sz = bar_sz; 23.5 23.6 nr_bars++; 23.7 + 23.8 + /* Skip the upper-half of the address for a 64-bit BAR. */ 23.9 + if ( (bar_data & (PCI_BASE_ADDRESS_SPACE | 23.10 + PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == 23.11 + (PCI_BASE_ADDRESS_SPACE_MEMORY | 23.12 + PCI_BASE_ADDRESS_MEM_TYPE_64) ) 23.13 + bar++; 23.14 } 23.15 23.16 /* Map the interrupt. */ 23.17 @@ -430,12 +437,14 @@ static void cmos_write_memory_size(void) 23.18 cmos_outb(0x35, (uint8_t)( alt_mem >> 8)); 23.19 } 23.20 23.21 -static void init_xen_platform_io_base(void) 23.22 +static uint16_t init_xen_platform_io_base(void) 23.23 { 23.24 struct bios_info *bios_info = (struct bios_info *)ACPI_PHYSICAL_ADDRESS; 23.25 uint32_t devfn, bar_data; 23.26 uint16_t vendor_id, device_id; 23.27 23.28 + bios_info->xen_pfiob = 0; 23.29 + 23.30 for ( devfn = 0; devfn < 128; devfn++ ) 23.31 { 23.32 vendor_id = pci_readw(devfn, PCI_VENDOR_ID); 23.33 @@ -445,12 +454,16 @@ static void init_xen_platform_io_base(vo 23.34 bar_data = pci_readl(devfn, PCI_BASE_ADDRESS_0); 23.35 bios_info->xen_pfiob = bar_data & PCI_BASE_ADDRESS_IO_MASK; 23.36 } 23.37 + 23.38 + return bios_info->xen_pfiob; 23.39 } 23.40 23.41 int main(void) 23.42 { 23.43 int vgabios_sz = 0, etherboot_sz = 0, rombios_sz, smbios_sz; 23.44 int extboot_sz = 0; 23.45 + uint32_t vga_ram = 0; 23.46 + uint16_t xen_pfiob; 23.47 23.48 printf("HVM Loader\n"); 23.49 23.50 @@ -460,6 +473,8 @@ int main(void) 23.51 23.52 smp_initialise(); 23.53 23.54 + perform_tests(); 23.55 + 23.56 printf("Writing SMBIOS tables ...\n"); 23.57 smbios_sz = hvm_write_smbios_tables(); 23.58 23.59 @@ -495,6 +510,12 @@ int main(void) 23.60 break; 23.61 } 23.62 23.63 + if ( virtual_vga != VGA_none ) 23.64 + { 23.65 + vga_ram = e820_malloc(8 << 20, 4096); 23.66 + printf("VGA RAM at %08x\n", vga_ram); 23.67 + } 23.68 + 23.69 etherboot_sz = scan_etherboot_nic((void*)ETHERBOOT_PHYSICAL_ADDRESS); 23.70 23.71 if ( must_load_extboot() ) 23.72 @@ -535,7 +556,9 @@ int main(void) 23.73 ROMBIOS_PHYSICAL_ADDRESS, 23.74 ROMBIOS_PHYSICAL_ADDRESS + rombios_sz - 1); 23.75 23.76 - init_xen_platform_io_base(); 23.77 + xen_pfiob = init_xen_platform_io_base(); 23.78 + if ( xen_pfiob && vga_ram ) 23.79 + outl(xen_pfiob + 4, vga_ram); 23.80 23.81 printf("Invoking ROMBIOS ...\n"); 23.82 return 0;
24.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 24.2 +++ b/tools/firmware/hvmloader/tests.c Tue Sep 02 16:55:55 2008 -0700 24.3 @@ -0,0 +1,164 @@ 24.4 +/* 24.5 + * tests.c: HVM environment tests. 24.6 + * 24.7 + * Copyright (c) 2008, Citrix Systems, Inc. 24.8 + * 24.9 + * Authors: 24.10 + * Keir Fraser <keir.fraser@citrix.com> 24.11 + * 24.12 + * This program is free software; you can redistribute it and/or modify it 24.13 + * under the terms and conditions of the GNU General Public License, 24.14 + * version 2, as published by the Free Software Foundation. 24.15 + * 24.16 + * This program is distributed in the hope it will be useful, but WITHOUT 24.17 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 24.18 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 24.19 + * more details. 24.20 + * 24.21 + * You should have received a copy of the GNU General Public License along with 24.22 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 24.23 + * Place - Suite 330, Boston, MA 02111-1307 USA. 24.24 + */ 24.25 + 24.26 +#include "util.h" 24.27 + 24.28 +/* 24.29 + * Memory layout during tests: 24.30 + * 4MB to 8MB is cleared. 24.31 + * Page directory resides at 8MB. 24.32 + * 4 page table pages reside at 8MB+4kB to 8MB+20kB. 24.33 + * Pagetables identity-map 0-16MB, except 4kB at va 6MB maps to pa 5MB. 24.34 + */ 24.35 +#define PD_START (8ul << 20) 24.36 +#define PT_START (PD_START + 4096) 24.37 + 24.38 +static void setup_paging(void) 24.39 +{ 24.40 + uint32_t *pd = (uint32_t *)PD_START; 24.41 + uint32_t *pt = (uint32_t *)PT_START; 24.42 + uint32_t i; 24.43 + 24.44 + /* Identity map 0-16MB. */ 24.45 + for ( i = 0; i < 4; i++ ) 24.46 + pd[i] = (unsigned long)pt + (i<<12) + 3; 24.47 + for ( i = 0; i < (4*1024); i++ ) 24.48 + pt[i] = (i << 12) + 3; 24.49 + 24.50 + /* Page at virtual 6MB maps to physical 5MB. */ 24.51 + pt[6u<<8] -= 0x100000u; 24.52 +} 24.53 + 24.54 +static void start_paging(void) 24.55 +{ 24.56 + asm volatile ( 24.57 + "mov %%eax,%%cr3; mov %%cr0,%%eax; " 24.58 + "orl $0x80000000,%%eax; mov %%eax,%%cr0; " 24.59 + "jmp 1f; 1:" 24.60 + : : "a" (PD_START) : "memory" ); 24.61 +} 24.62 + 24.63 +static void stop_paging(void) 24.64 +{ 24.65 + asm volatile ( 24.66 + "mov %%cr0,%%eax; andl $0x7fffffff,%%eax; mov %%eax,%%cr0; " 24.67 + "jmp 1f; 1:" 24.68 + : : : "eax", "memory" ); 24.69 +} 24.70 + 24.71 +/* 24.72 + * rep_io_test: Tests REP INSB both forwards and backwards (EF.DF={0,1}) across 24.73 + * a discontiguous page boundary. 24.74 + */ 24.75 +static int rep_io_test(void) 24.76 +{ 24.77 + uint32_t *p; 24.78 + uint32_t i, p0, p1, p2; 24.79 + int okay = 1; 24.80 + 24.81 + static const struct { 24.82 + unsigned long addr; 24.83 + uint32_t expected; 24.84 + } check[] = { 24.85 + { 0x00500000, 0x987654ff }, 24.86 + { 0x00500ffc, 0xff000000 }, 24.87 + { 0x005ffffc, 0xff000000 }, 24.88 + { 0x00601000, 0x000000ff }, 24.89 + { 0, 0 } 24.90 + }; 24.91 + 24.92 + start_paging(); 24.93 + 24.94 + /* Phys 5MB = 0xdeadbeef */ 24.95 + *(uint32_t *)0x500000ul = 0xdeadbeef; 24.96 + 24.97 + /* Phys 5MB = 0x98765432 */ 24.98 + *(uint32_t *)0x600000ul = 0x98765432; 24.99 + 24.100 + /* Phys 0x5fffff = Phys 0x500000 = 0xff (byte) */ 24.101 + asm volatile ( 24.102 + "rep insb" 24.103 + : "=d" (p0), "=c" (p1), "=D" (p2) 24.104 + : "0" (0x5f), "1" (2), "2" (0x5ffffful) : "memory" ); 24.105 + 24.106 + /* Phys 0x500fff = Phys 0x601000 = 0xff (byte) */ 24.107 + asm volatile ( 24.108 + "std ; rep insb ; cld" 24.109 + : "=d" (p0), "=c" (p1), "=D" (p2) 24.110 + : "0" (0x5f), "1" (2), "2" (0x601000ul) : "memory" ); 24.111 + 24.112 + stop_paging(); 24.113 + 24.114 + i = 0; 24.115 + for ( p = (uint32_t *)0x400000ul; p < (uint32_t *)0x700000ul; p++ ) 24.116 + { 24.117 + uint32_t expected = 0; 24.118 + if ( check[i].addr == (unsigned long)p ) 24.119 + { 24.120 + expected = check[i].expected; 24.121 + i++; 24.122 + } 24.123 + if ( *p != expected ) 24.124 + { 24.125 + printf("Bad value at 0x%08lx: saw %08x expected %08x\n", 24.126 + (unsigned long)p, *p, expected); 24.127 + okay = 0; 24.128 + } 24.129 + } 24.130 + 24.131 + return okay; 24.132 +} 24.133 + 24.134 +void perform_tests(void) 24.135 +{ 24.136 + int i, passed; 24.137 + 24.138 + static struct { 24.139 + int (* const test)(void); 24.140 + const char *description; 24.141 + } tests[] = { 24.142 + { rep_io_test, "REP INSB across page boundaries" }, 24.143 + { NULL, NULL } 24.144 + }; 24.145 + 24.146 + printf("Testing HVM environment:\n"); 24.147 + 24.148 + passed = 0; 24.149 + for ( i = 0; tests[i].test; i++ ) 24.150 + { 24.151 + printf(" - %s ... ", tests[i].description); 24.152 + memset((char *)(4ul << 20), 0, 4ul << 20); 24.153 + setup_paging(); 24.154 + if ( (*tests[i].test)() ) 24.155 + { 24.156 + printf("passed\n"); 24.157 + passed++; 24.158 + } 24.159 + else 24.160 + { 24.161 + printf("failed\n"); 24.162 + } 24.163 + } 24.164 + 24.165 + printf("Passed %d/%d tests\n", passed, i); 24.166 + BUG_ON(passed != i); 24.167 +}
25.1 --- a/tools/firmware/hvmloader/util.c Tue Sep 02 16:34:53 2008 -0700 25.2 +++ b/tools/firmware/hvmloader/util.c Tue Sep 02 16:55:55 2008 -0700 25.3 @@ -325,35 +325,34 @@ static void e820_collapse(void) 25.4 } 25.5 } 25.6 25.7 -uint32_t e820_malloc(uint32_t size) 25.8 +uint32_t e820_malloc(uint32_t size, uint32_t align) 25.9 { 25.10 uint32_t addr; 25.11 int i; 25.12 struct e820entry *ent = (struct e820entry *)HVM_E820; 25.13 25.14 - /* Align allocation request to a reasonable boundary (1kB). */ 25.15 - size = (size + 1023) & ~1023; 25.16 + /* Align to at leats one kilobyte. */ 25.17 + if ( align < 1024 ) 25.18 + align = 1024; 25.19 25.20 for ( i = *HVM_E820_NR - 1; i >= 0; i-- ) 25.21 { 25.22 - addr = ent[i].addr; 25.23 + addr = (ent[i].addr + ent[i].size - size) & ~(align-1); 25.24 if ( (ent[i].type != E820_RAM) || /* not ram? */ 25.25 - (ent[i].size < size) || /* too small? */ 25.26 - (addr != ent[i].addr) || /* starts above 4gb? */ 25.27 + (addr < ent[i].addr) || /* too small or starts above 4gb? */ 25.28 ((addr + size) < addr) ) /* ends above 4gb? */ 25.29 continue; 25.30 25.31 - if ( ent[i].size != size ) 25.32 + if ( addr != ent[i].addr ) 25.33 { 25.34 memmove(&ent[i+1], &ent[i], (*HVM_E820_NR-i) * sizeof(*ent)); 25.35 (*HVM_E820_NR)++; 25.36 - ent[i].size -= size; 25.37 - addr += ent[i].size; 25.38 + ent[i].size = addr - ent[i].addr; 25.39 + ent[i+1].addr = addr; 25.40 + ent[i+1].size -= ent[i].size; 25.41 i++; 25.42 } 25.43 25.44 - ent[i].addr = addr; 25.45 - ent[i].size = size; 25.46 ent[i].type = E820_RESERVED; 25.47 25.48 e820_collapse();
26.1 --- a/tools/firmware/hvmloader/util.h Tue Sep 02 16:34:53 2008 -0700 26.2 +++ b/tools/firmware/hvmloader/util.h Tue Sep 02 16:55:55 2008 -0700 26.3 @@ -132,7 +132,7 @@ int printf(const char *fmt, ...) __attri 26.4 int vprintf(const char *fmt, va_list ap); 26.5 26.6 /* Reserve a RAM region in the e820 table. */ 26.7 -uint32_t e820_malloc(uint32_t size); 26.8 +uint32_t e820_malloc(uint32_t size, uint32_t align); 26.9 26.10 /* Prepare the 32bit BIOS */ 26.11 void highbios_setup(void); 26.12 @@ -143,6 +143,12 @@ void create_mp_tables(void); 26.13 int hvm_write_smbios_tables(void); 26.14 void smp_initialise(void); 26.15 26.16 +#ifndef NDEBUG 26.17 +void perform_tests(void); 26.18 +#else 26.19 +#define perform_tests() ((void)0) 26.20 +#endif 26.21 + 26.22 #define isdigit(c) ((c) >= '0' && (c) <= '9') 26.23 26.24 extern char _start[], _end[];
27.1 --- a/tools/ioemu/hw/cirrus_vga.c Tue Sep 02 16:34:53 2008 -0700 27.2 +++ b/tools/ioemu/hw/cirrus_vga.c Tue Sep 02 16:55:55 2008 -0700 27.3 @@ -2543,34 +2543,28 @@ static CPUWriteMemoryFunc *cirrus_linear 27.4 cirrus_linear_bitblt_writel, 27.5 }; 27.6 27.7 -static void *set_vram_mapping(unsigned long begin, unsigned long end) 27.8 +static void set_vram_mapping(CirrusVGAState *s, unsigned long begin, unsigned long end) 27.9 { 27.10 - xen_pfn_t *extent_start = NULL; 27.11 - unsigned long nr_extents; 27.12 - void *vram_pointer = NULL; 27.13 - int i; 27.14 - 27.15 - /* align begin and end address */ 27.16 - begin = begin & TARGET_PAGE_MASK; 27.17 - end = begin + VGA_RAM_SIZE; 27.18 - end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK; 27.19 - nr_extents = (end - begin) >> TARGET_PAGE_BITS; 27.20 - 27.21 - extent_start = malloc(sizeof(xen_pfn_t) * nr_extents); 27.22 - if (extent_start == NULL) { 27.23 - fprintf(stderr, "Failed malloc on set_vram_mapping\n"); 27.24 - return NULL; 27.25 - } 27.26 - 27.27 - memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents); 27.28 - 27.29 - for (i = 0; i < nr_extents; i++) 27.30 - extent_start[i] = (begin + i * TARGET_PAGE_SIZE) >> TARGET_PAGE_BITS; 27.31 - 27.32 - if (set_mm_mapping(xc_handle, domid, nr_extents, 0, extent_start) < 0) { 27.33 - fprintf(logfile, "Failed set_mm_mapping\n"); 27.34 - free(extent_start); 27.35 - return NULL; 27.36 + unsigned long i; 27.37 + struct xen_add_to_physmap xatp; 27.38 + int rc; 27.39 + 27.40 + if (end > begin + VGA_RAM_SIZE) 27.41 + end = begin + VGA_RAM_SIZE; 27.42 + 27.43 + fprintf(logfile,"mapping vram to %lx - %lx\n", begin, end); 27.44 + 27.45 + xatp.domid = domid; 27.46 + xatp.space = XENMAPSPACE_mfn; 27.47 + 27.48 + for (i = 0; i < (end - begin) >> TARGET_PAGE_BITS; i++) { 27.49 + xatp.idx = s->vram_mfns[i]; 27.50 + xatp.gpfn = (begin >> TARGET_PAGE_BITS) + i; 27.51 + rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp); 27.52 + if (rc) { 27.53 + fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"PRI_xen_pfn" failed: %d\n", xatp.idx, xatp.gpfn, rc); 27.54 + return; 27.55 + } 27.56 } 27.57 27.58 (void)xc_domain_pin_memory_cacheattr( 27.59 @@ -2578,61 +2572,42 @@ static void *set_vram_mapping(unsigned l 27.60 begin >> TARGET_PAGE_BITS, 27.61 end >> TARGET_PAGE_BITS, 27.62 XEN_DOMCTL_MEM_CACHEATTR_WB); 27.63 - 27.64 - vram_pointer = xc_map_foreign_pages(xc_handle, domid, 27.65 - PROT_READ|PROT_WRITE, 27.66 - extent_start, nr_extents); 27.67 - if (vram_pointer == NULL) { 27.68 - fprintf(logfile, "xc_map_foreign_batch vgaram returned error %d\n", 27.69 - errno); 27.70 - free(extent_start); 27.71 - return NULL; 27.72 - } 27.73 - 27.74 - memset(vram_pointer, 0, nr_extents * TARGET_PAGE_SIZE); 27.75 - 27.76 -#ifdef CONFIG_STUBDOM 27.77 - xenfb_pv_display_start(vram_pointer); 27.78 -#endif 27.79 - 27.80 - free(extent_start); 27.81 - 27.82 - return vram_pointer; 27.83 } 27.84 27.85 -static int unset_vram_mapping(unsigned long begin, unsigned long end, 27.86 - void *mapping) 27.87 +static void unset_vram_mapping(CirrusVGAState *s, unsigned long begin, unsigned long end) 27.88 { 27.89 - xen_pfn_t *extent_start = NULL; 27.90 - unsigned long nr_extents; 27.91 - int i; 27.92 - 27.93 - /* align begin and end address */ 27.94 - 27.95 - end = begin + VGA_RAM_SIZE; 27.96 - begin = begin & TARGET_PAGE_MASK; 27.97 - end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK; 27.98 - nr_extents = (end - begin) >> TARGET_PAGE_BITS; 27.99 - 27.100 - extent_start = malloc(sizeof(xen_pfn_t) * nr_extents); 27.101 - 27.102 - if (extent_start == NULL) { 27.103 - fprintf(stderr, "Failed malloc on set_mm_mapping\n"); 27.104 - return -1; 27.105 + if (s->stolen_vram_addr) { 27.106 + /* We can put it there for xend to save it efficiently */ 27.107 + set_vram_mapping(s, s->stolen_vram_addr, s->stolen_vram_addr + VGA_RAM_SIZE); 27.108 + } else { 27.109 + /* Old image, we have to unmap them completely */ 27.110 + struct xen_remove_from_physmap xrfp; 27.111 + unsigned long i; 27.112 + int rc; 27.113 + 27.114 + if (end > begin + VGA_RAM_SIZE) 27.115 + end = begin + VGA_RAM_SIZE; 27.116 + 27.117 + fprintf(logfile,"unmapping vram from %lx - %lx\n", begin, end); 27.118 + 27.119 + xrfp.domid = domid; 27.120 + 27.121 + for (i = 0; i < (end - begin) >> TARGET_PAGE_BITS; i++) { 27.122 + xrfp.gpfn = (begin >> TARGET_PAGE_BITS) + i; 27.123 + rc = xc_memory_op(xc_handle, XENMEM_remove_from_physmap, &xrfp); 27.124 + if (rc) { 27.125 + fprintf(stderr, "remove_from_physmap PFN %"PRI_xen_pfn" failed: %d\n", xrfp.gpfn, rc); 27.126 + return; 27.127 + } 27.128 + } 27.129 } 27.130 - 27.131 - /* Drop our own references to the vram pages */ 27.132 - munmap(mapping, nr_extents * TARGET_PAGE_SIZE); 27.133 - 27.134 - /* Now drop the guest's mappings */ 27.135 - memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents); 27.136 - for (i = 0; i < nr_extents; i++) 27.137 - extent_start[i] = (begin + (i * TARGET_PAGE_SIZE)) >> TARGET_PAGE_BITS; 27.138 - unset_mm_mapping(xc_handle, domid, nr_extents, 0, extent_start); 27.139 - 27.140 - free(extent_start); 27.141 - 27.142 - return 0; 27.143 +} 27.144 + 27.145 +void cirrus_restart_acc(CirrusVGAState *s) 27.146 +{ 27.147 + set_vram_mapping(s, s->lfb_addr, s->lfb_end); 27.148 + s->map_addr = s->lfb_addr; 27.149 + s->map_end = s->lfb_end; 27.150 } 27.151 27.152 /* Compute the memory access functions */ 27.153 @@ -2654,17 +2629,7 @@ static void cirrus_update_memory_access( 27.154 mode = s->gr[0x05] & 0x7; 27.155 if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) { 27.156 if (s->lfb_addr && s->lfb_end && !s->map_addr) { 27.157 - void *vram_pointer, *old_vram; 27.158 - 27.159 - vram_pointer = set_vram_mapping(s->lfb_addr, 27.160 - s->lfb_end); 27.161 - if (!vram_pointer) 27.162 - fprintf(stderr, "NULL vram_pointer\n"); 27.163 - else { 27.164 - old_vram = vga_update_vram((VGAState *)s, vram_pointer, 27.165 - VGA_RAM_SIZE); 27.166 - qemu_free(old_vram); 27.167 - } 27.168 + set_vram_mapping(s, s->lfb_addr, s->lfb_end); 27.169 s->map_addr = s->lfb_addr; 27.170 s->map_end = s->lfb_end; 27.171 } 27.172 @@ -2674,14 +2639,7 @@ static void cirrus_update_memory_access( 27.173 } else { 27.174 generic_io: 27.175 if (s->lfb_addr && s->lfb_end && s->map_addr) { 27.176 - void *old_vram; 27.177 - 27.178 - old_vram = vga_update_vram((VGAState *)s, NULL, VGA_RAM_SIZE); 27.179 - 27.180 - unset_vram_mapping(s->lfb_addr, 27.181 - s->lfb_end, 27.182 - old_vram); 27.183 - 27.184 + unset_vram_mapping(s, s->map_addr, s->map_end); 27.185 s->map_addr = s->map_end = 0; 27.186 } 27.187 s->cirrus_linear_write[0] = cirrus_linear_writeb; 27.188 @@ -3040,36 +2998,6 @@ static CPUWriteMemoryFunc *cirrus_mmio_w 27.189 cirrus_mmio_writel, 27.190 }; 27.191 27.192 -void cirrus_stop_acc(CirrusVGAState *s) 27.193 -{ 27.194 - if (s->map_addr){ 27.195 - int error; 27.196 - s->map_addr = 0; 27.197 - error = unset_vram_mapping(s->lfb_addr, 27.198 - s->lfb_end, s->vram_ptr); 27.199 - fprintf(stderr, "cirrus_stop_acc:unset_vram_mapping.\n"); 27.200 - } 27.201 -} 27.202 - 27.203 -void cirrus_restart_acc(CirrusVGAState *s) 27.204 -{ 27.205 - if (s->lfb_addr && s->lfb_end) { 27.206 - void *vram_pointer, *old_vram; 27.207 - fprintf(stderr, "cirrus_vga_load:re-enable vga acc.lfb_addr=0x%lx, lfb_end=0x%lx.\n", 27.208 - s->lfb_addr, s->lfb_end); 27.209 - vram_pointer = set_vram_mapping(s->lfb_addr ,s->lfb_end); 27.210 - if (!vram_pointer){ 27.211 - fprintf(stderr, "cirrus_vga_load:NULL vram_pointer\n"); 27.212 - } else { 27.213 - old_vram = vga_update_vram((VGAState *)s, vram_pointer, 27.214 - VGA_RAM_SIZE); 27.215 - qemu_free(old_vram); 27.216 - s->map_addr = s->lfb_addr; 27.217 - s->map_end = s->lfb_end; 27.218 - } 27.219 - } 27.220 -} 27.221 - 27.222 /* load/save state */ 27.223 27.224 static void cirrus_vga_save(QEMUFile *f, void *opaque) 27.225 @@ -3118,7 +3046,10 @@ static void cirrus_vga_save(QEMUFile *f, 27.226 qemu_put_8s(f, &vga_acc); 27.227 qemu_put_be64s(f, (uint64_t*)&s->lfb_addr); 27.228 qemu_put_be64s(f, (uint64_t*)&s->lfb_end); 27.229 - qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 27.230 + qemu_put_be64s(f, &s->stolen_vram_addr); 27.231 + if (!s->stolen_vram_addr && !vga_acc) 27.232 + /* Old guest: VRAM is not mapped, we have to save it ourselves */ 27.233 + qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 27.234 } 27.235 27.236 static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id) 27.237 @@ -3127,7 +3058,7 @@ static int cirrus_vga_load(QEMUFile *f, 27.238 uint8_t vga_acc = 0; 27.239 int ret; 27.240 27.241 - if (version_id > 2) 27.242 + if (version_id > 3) 27.243 return -EINVAL; 27.244 27.245 if (s->pci_dev && version_id >= 2) { 27.246 @@ -3173,9 +3104,20 @@ static int cirrus_vga_load(QEMUFile *f, 27.247 qemu_get_8s(f, &vga_acc); 27.248 qemu_get_be64s(f, (uint64_t*)&s->lfb_addr); 27.249 qemu_get_be64s(f, (uint64_t*)&s->lfb_end); 27.250 - qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 27.251 - if (vga_acc){ 27.252 - cirrus_restart_acc(s); 27.253 + if (version_id >= 3) { 27.254 + qemu_get_be64s(f, &s->stolen_vram_addr); 27.255 + if (!s->stolen_vram_addr && !vga_acc) { 27.256 + /* Old guest, VRAM is not mapped, we have to restore it ourselves */ 27.257 + qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 27.258 + xen_vga_populate_vram(s->lfb_addr); 27.259 + } else 27.260 + xen_vga_vram_map(vga_acc ? s->lfb_addr : s->stolen_vram_addr, 0); 27.261 + } else { 27.262 + /* Old image, we have to populate and restore VRAM ourselves */ 27.263 + xen_vga_populate_vram(s->lfb_addr); 27.264 + qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 27.265 + if (vga_acc) 27.266 + cirrus_restart_acc(s); 27.267 } 27.268 27.269 /* force refresh */ 27.270 @@ -3297,7 +3239,7 @@ static void cirrus_init_common(CirrusVGA 27.271 s->cursor_invalidate = cirrus_cursor_invalidate; 27.272 s->cursor_draw_line = cirrus_cursor_draw_line; 27.273 27.274 - register_savevm("cirrus_vga", 0, 2, cirrus_vga_save, cirrus_vga_load, s); 27.275 + register_savevm("cirrus_vga", 0, 3, cirrus_vga_save, cirrus_vga_load, s); 27.276 } 27.277 27.278 /***************************************
28.1 --- a/tools/ioemu/hw/ide.c Tue Sep 02 16:34:53 2008 -0700 28.2 +++ b/tools/ioemu/hw/ide.c Tue Sep 02 16:55:55 2008 -0700 28.3 @@ -1108,14 +1108,14 @@ static void ide_flush_cb(void *opaque, i 28.4 return; 28.5 } 28.6 else 28.7 - s->status = READY_STAT; 28.8 + s->status = READY_STAT | SEEK_STAT; 28.9 ide_set_irq(s); 28.10 } 28.11 28.12 static void ide_atapi_cmd_ok(IDEState *s) 28.13 { 28.14 s->error = 0; 28.15 - s->status = READY_STAT; 28.16 + s->status = READY_STAT | SEEK_STAT; 28.17 s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD; 28.18 ide_set_irq(s); 28.19 } 28.20 @@ -1229,7 +1229,7 @@ static void ide_atapi_cmd_reply_end(IDES 28.21 if (s->packet_transfer_size <= 0) { 28.22 /* end of transfer */ 28.23 ide_transfer_stop(s); 28.24 - s->status = READY_STAT; 28.25 + s->status = READY_STAT | SEEK_STAT; 28.26 s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD; 28.27 ide_set_irq(s); 28.28 #ifdef DEBUG_IDE_ATAPI 28.29 @@ -1307,10 +1307,10 @@ static void ide_atapi_cmd_reply(IDEState 28.30 s->io_buffer_index = 0; 28.31 28.32 if (s->atapi_dma) { 28.33 - s->status = READY_STAT | DRQ_STAT; 28.34 + s->status = READY_STAT | SEEK_STAT | DRQ_STAT; 28.35 ide_dma_start(s, ide_atapi_cmd_read_dma_cb); 28.36 } else { 28.37 - s->status = READY_STAT; 28.38 + s->status = READY_STAT | SEEK_STAT; 28.39 ide_atapi_cmd_reply_end(s); 28.40 } 28.41 } 28.42 @@ -1325,7 +1325,7 @@ static void ide_atapi_cmd_read_pio(IDESt 28.43 s->io_buffer_index = sector_size; 28.44 s->cd_sector_size = sector_size; 28.45 28.46 - s->status = READY_STAT; 28.47 + s->status = READY_STAT | SEEK_STAT; 28.48 ide_atapi_cmd_reply_end(s); 28.49 } 28.50 28.51 @@ -1368,7 +1368,7 @@ static void ide_atapi_cmd_read_dma_cb(vo 28.52 } 28.53 28.54 if (s->packet_transfer_size <= 0) { 28.55 - s->status = READY_STAT; 28.56 + s->status = READY_STAT | SEEK_STAT; 28.57 s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD; 28.58 ide_set_irq(s); 28.59 eot: 28.60 @@ -1418,7 +1418,7 @@ static void ide_atapi_cmd_read_dma(IDESt 28.61 s->cd_sector_size = sector_size; 28.62 28.63 /* XXX: check if BUSY_STAT should be set */ 28.64 - s->status = READY_STAT | DRQ_STAT | BUSY_STAT; 28.65 + s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT; 28.66 ide_dma_start(s, ide_atapi_cmd_read_dma_cb); 28.67 } 28.68 28.69 @@ -1886,7 +1886,7 @@ static void ide_ioport_write(void *opaqu 28.70 ide_abort_command(s); 28.71 } else { 28.72 s->mult_sectors = s->nsector; 28.73 - s->status = READY_STAT; 28.74 + s->status = READY_STAT | SEEK_STAT; 28.75 } 28.76 ide_set_irq(s); 28.77 break; 28.78 @@ -1896,7 +1896,7 @@ static void ide_ioport_write(void *opaqu 28.79 case WIN_VERIFY_ONCE: 28.80 /* do sector number check ? */ 28.81 ide_cmd_lba48_transform(s, lba48); 28.82 - s->status = READY_STAT; 28.83 + s->status = READY_STAT | SEEK_STAT; 28.84 ide_set_irq(s); 28.85 break; 28.86 case WIN_READ_EXT: 28.87 @@ -1965,12 +1965,12 @@ static void ide_ioport_write(void *opaqu 28.88 case WIN_READ_NATIVE_MAX: 28.89 ide_cmd_lba48_transform(s, lba48); 28.90 ide_set_sector(s, s->nb_sectors - 1); 28.91 - s->status = READY_STAT; 28.92 + s->status = READY_STAT | SEEK_STAT; 28.93 ide_set_irq(s); 28.94 break; 28.95 case WIN_CHECKPOWERMODE1: 28.96 s->nsector = 0xff; /* device active or idle */ 28.97 - s->status = READY_STAT; 28.98 + s->status = READY_STAT | SEEK_STAT; 28.99 ide_set_irq(s); 28.100 break; 28.101 case WIN_SETFEATURES: 28.102 @@ -2070,7 +2070,7 @@ static void ide_ioport_write(void *opaqu 28.103 /* overlapping commands not supported */ 28.104 if (s->feature & 0x02) 28.105 goto abort_cmd; 28.106 - s->status = READY_STAT; 28.107 + s->status = READY_STAT | SEEK_STAT; 28.108 s->atapi_dma = s->feature & 1; 28.109 s->nsector = 1; 28.110 ide_transfer_start(s, s->io_buffer, ATAPI_PACKET_SIZE, 28.111 @@ -2289,7 +2289,7 @@ static void ide_reset(IDEState *s) 28.112 s->mult_sectors = MAX_MULT_SECTORS; 28.113 s->cur_drive = s; 28.114 s->select = 0xa0; 28.115 - s->status = READY_STAT; 28.116 + s->status = READY_STAT | SEEK_STAT; 28.117 ide_set_signature(s); 28.118 /* init the transfer handler so that 0xffff is returned on data 28.119 accesses */
29.1 --- a/tools/ioemu/hw/pass-through.c Tue Sep 02 16:34:53 2008 -0700 29.2 +++ b/tools/ioemu/hw/pass-through.c Tue Sep 02 16:55:55 2008 -0700 29.3 @@ -2340,11 +2340,6 @@ static int pt_bar_reg_write(struct pt_de 29.4 return -1; 29.5 } 29.6 29.7 - /* always keep the emulate register value to 0, 29.8 - * because hvmloader does not support high MMIO for now. 29.9 - */ 29.10 - cfg_entry->data = 0; 29.11 - 29.12 /* never mapping the 'empty' upper region, 29.13 * because we'll do it enough for the lower region. 29.14 */
30.1 --- a/tools/ioemu/hw/pci.c Tue Sep 02 16:34:53 2008 -0700 30.2 +++ b/tools/ioemu/hw/pci.c Tue Sep 02 16:55:55 2008 -0700 30.3 @@ -45,7 +45,6 @@ struct PCIBus { 30.4 static void pci_update_mappings(PCIDevice *d); 30.5 30.6 target_phys_addr_t pci_mem_base; 30.7 -static int pci_irq_index; 30.8 static PCIBus *first_bus; 30.9 30.10 PCIBus *pci_register_bus(pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, 30.11 @@ -114,9 +113,6 @@ PCIDevice *pci_register_device(PCIBus *b 30.12 { 30.13 PCIDevice *pci_dev; 30.14 30.15 - if (pci_irq_index >= PCI_DEVICES_MAX) 30.16 - return NULL; 30.17 - 30.18 if (devfn < 0) { 30.19 for(devfn = bus->devfn_min ; devfn < 256; devfn += 8) { 30.20 if ( !bus->devices[devfn] && 30.21 @@ -140,7 +136,6 @@ PCIDevice *pci_register_device(PCIBus *b 30.22 config_write = pci_default_write_config; 30.23 pci_dev->config_read = config_read; 30.24 pci_dev->config_write = config_write; 30.25 - pci_dev->irq_index = pci_irq_index++; 30.26 bus->devices[devfn] = pci_dev; 30.27 return pci_dev; 30.28 }
31.1 --- a/tools/ioemu/hw/vga.c Tue Sep 02 16:34:53 2008 -0700 31.2 +++ b/tools/ioemu/hw/vga.c Tue Sep 02 16:55:55 2008 -0700 31.3 @@ -23,6 +23,7 @@ 31.4 */ 31.5 #include "vl.h" 31.6 #include "vga_int.h" 31.7 +#include <sys/mman.h> 31.8 31.9 //#define DEBUG_VGA 31.10 //#define DEBUG_VGA_MEM 31.11 @@ -1776,7 +1777,10 @@ static void vga_save(QEMUFile *f, void * 31.12 #endif 31.13 vram_size = s->vram_size; 31.14 qemu_put_be32s(f, &vram_size); 31.15 - qemu_put_buffer(f, s->vram_ptr, s->vram_size); 31.16 + qemu_put_be64s(f, &s->stolen_vram_addr); 31.17 + if (!s->stolen_vram_addr) 31.18 + /* Old guest: VRAM is not mapped, we have to save it ourselves */ 31.19 + qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 31.20 } 31.21 31.22 static int vga_load(QEMUFile *f, void *opaque, int version_id) 31.23 @@ -1788,7 +1792,7 @@ static int vga_load(QEMUFile *f, void *o 31.24 int i; 31.25 #endif 31.26 31.27 - if (version_id > 3) 31.28 + if (version_id > 4) 31.29 return -EINVAL; 31.30 31.31 if (s->pci_dev && version_id >= 2) { 31.32 @@ -1839,7 +1843,14 @@ static int vga_load(QEMUFile *f, void *o 31.33 qemu_get_be32s(f, &vram_size); 31.34 if (vram_size != s->vram_size) 31.35 return -EINVAL; 31.36 - qemu_get_buffer(f, s->vram_ptr, s->vram_size); 31.37 + if (version_id >= 4) { 31.38 + qemu_get_be64s(f, &s->stolen_vram_addr); 31.39 + if (s->stolen_vram_addr) 31.40 + xen_vga_vram_map(s->stolen_vram_addr, 0); 31.41 + } 31.42 + /* Old guest, VRAM is not mapped, we have to restore it ourselves */ 31.43 + if (!s->stolen_vram_addr) 31.44 + qemu_get_buffer(f, s->vram_ptr, s->vram_size); 31.45 } 31.46 31.47 /* force refresh */ 31.48 @@ -1994,6 +2005,100 @@ void vga_bios_init(VGAState *s) 31.49 /* TODO: add vbe support if enabled */ 31.50 } 31.51 31.52 + 31.53 +static VGAState *xen_vga_state; 31.54 + 31.55 +/* When loading old images we have to populate the video ram ourselves */ 31.56 +void xen_vga_populate_vram(uint64_t vram_addr) 31.57 +{ 31.58 + unsigned long nr_pfn; 31.59 + struct xen_remove_from_physmap xrfp; 31.60 + xen_pfn_t *pfn_list; 31.61 + int i; 31.62 + int rc; 31.63 + 31.64 + fprintf(logfile, "populating video RAM at %lx\n", vram_addr); 31.65 + 31.66 + nr_pfn = VGA_RAM_SIZE >> TARGET_PAGE_BITS; 31.67 + 31.68 + pfn_list = malloc(sizeof(*pfn_list) * nr_pfn); 31.69 + 31.70 + for (i = 0; i < nr_pfn; i++) 31.71 + pfn_list[i] = (vram_addr >> TARGET_PAGE_BITS) + i; 31.72 + 31.73 + if (xc_domain_memory_populate_physmap(xc_handle, domid, nr_pfn, 0, 0, pfn_list)) { 31.74 + fprintf(stderr, "Failed to populate video ram\n"); 31.75 + exit(1); 31.76 + } 31.77 + free(pfn_list); 31.78 + 31.79 + xen_vga_vram_map(vram_addr, 0); 31.80 + 31.81 + /* Unmap them from the guest for now. */ 31.82 + xrfp.domid = domid; 31.83 + for (i = 0; i < nr_pfn; i++) { 31.84 + xrfp.gpfn = (vram_addr >> TARGET_PAGE_BITS) + i; 31.85 + rc = xc_memory_op(xc_handle, XENMEM_remove_from_physmap, &xrfp); 31.86 + if (rc) { 31.87 + fprintf(stderr, "remove_from_physmap PFN %"PRI_xen_pfn" failed: %d\n", xrfp.gpfn, rc); 31.88 + break; 31.89 + } 31.90 + } 31.91 +} 31.92 + 31.93 +/* Called once video memory has been allocated in the GPFN space */ 31.94 +void xen_vga_vram_map(uint64_t vram_addr, int copy) 31.95 +{ 31.96 + unsigned long nr_pfn; 31.97 + xen_pfn_t *pfn_list; 31.98 + int i; 31.99 + void *vram; 31.100 + 31.101 + fprintf(logfile, "mapping video RAM from %lx\n", vram_addr); 31.102 + 31.103 + nr_pfn = VGA_RAM_SIZE >> TARGET_PAGE_BITS; 31.104 + 31.105 + pfn_list = malloc(sizeof(*pfn_list) * nr_pfn); 31.106 + 31.107 + for (i = 0; i < nr_pfn; i++) 31.108 + pfn_list[i] = (vram_addr >> TARGET_PAGE_BITS) + i; 31.109 + 31.110 + vram = xc_map_foreign_pages(xc_handle, domid, 31.111 + PROT_READ|PROT_WRITE, 31.112 + pfn_list, nr_pfn); 31.113 + 31.114 + if (!vram) { 31.115 + fprintf(stderr, "Failed to map vram\n"); 31.116 + exit(1); 31.117 + } 31.118 + 31.119 + if (xc_domain_memory_translate_gpfn_list(xc_handle, domid, nr_pfn, 31.120 + pfn_list, pfn_list)) { 31.121 + fprintf(stderr, "Failed translation in xen_vga_vram_addr\n"); 31.122 + exit(1); 31.123 + } 31.124 + 31.125 + if (copy) 31.126 + memcpy(vram, xen_vga_state->vram_ptr, VGA_RAM_SIZE); 31.127 + qemu_free(xen_vga_state->vram_ptr); 31.128 + xen_vga_state->vram_ptr = vram; 31.129 + xen_vga_state->vram_mfns = pfn_list; 31.130 +#ifdef CONFIG_STUBDOM 31.131 + xenfb_pv_display_start(vram); 31.132 +#endif 31.133 +} 31.134 + 31.135 +/* Called at boot time when the BIOS has allocated video RAM */ 31.136 +void xen_vga_stolen_vram_addr(uint64_t stolen_vram_addr) 31.137 +{ 31.138 + fprintf(logfile, "stolen video RAM at %lx\n", stolen_vram_addr); 31.139 + 31.140 + xen_vga_state->stolen_vram_addr = stolen_vram_addr; 31.141 + 31.142 + /* And copy from the initialization value */ 31.143 + xen_vga_vram_map(stolen_vram_addr, 1); 31.144 +} 31.145 + 31.146 /* when used on xen environment, the vga_ram_base is not used */ 31.147 void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base, 31.148 unsigned long vga_ram_offset, int vga_ram_size) 31.149 @@ -2025,13 +2130,9 @@ void vga_common_init(VGAState *s, Displa 31.150 31.151 vga_reset(s); 31.152 31.153 - /* Video RAM must be page-aligned for PVFB memory sharing */ 31.154 - s->vram_ptr = s->vram_alloc = qemu_memalign(TARGET_PAGE_SIZE, vga_ram_size); 31.155 - 31.156 -#ifdef CONFIG_STUBDOM 31.157 - if (!cirrus_vga_enabled) 31.158 - xenfb_pv_display_start(s->vram_ptr); 31.159 -#endif 31.160 + s->vram_ptr = qemu_malloc(vga_ram_size); 31.161 + s->vram_mfns = NULL; 31.162 + xen_vga_state = s; 31.163 31.164 s->vram_offset = vga_ram_offset; 31.165 s->vram_size = vga_ram_size; 31.166 @@ -2051,7 +2152,7 @@ static void vga_init(VGAState *s) 31.167 { 31.168 int vga_io_memory; 31.169 31.170 - register_savevm("vga", 0, 3, vga_save, vga_load, s); 31.171 + register_savevm("vga", 0, 4, vga_save, vga_load, s); 31.172 31.173 register_ioport_write(0x3c0, 16, 1, vga_ioport_write, s); 31.174 31.175 @@ -2163,33 +2264,6 @@ int pci_vga_init(PCIBus *bus, DisplaySta 31.176 return 0; 31.177 } 31.178 31.179 -void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size) 31.180 -{ 31.181 - uint8_t *old_pointer; 31.182 - 31.183 - if (s->vram_size != vga_ram_size) { 31.184 - fprintf(stderr, "No support to change vga_ram_size\n"); 31.185 - return NULL; 31.186 - } 31.187 - 31.188 - if (!vga_ram_base) { 31.189 - vga_ram_base = qemu_memalign(TARGET_PAGE_SIZE, vga_ram_size + TARGET_PAGE_SIZE + 1); 31.190 - if (!vga_ram_base) { 31.191 - fprintf(stderr, "reallocate error\n"); 31.192 - return NULL; 31.193 - } 31.194 - } 31.195 - 31.196 - /* XXX lock needed? */ 31.197 - old_pointer = s->vram_alloc; 31.198 - s->vram_alloc = vga_ram_base; 31.199 - vga_ram_base = (uint8_t *)((long)(vga_ram_base + 15) & ~15L); 31.200 - memcpy(vga_ram_base, s->vram_ptr, vga_ram_size); 31.201 - s->vram_ptr = vga_ram_base; 31.202 - 31.203 - return old_pointer; 31.204 -} 31.205 - 31.206 /********************************************************/ 31.207 /* vga screen dump */ 31.208
32.1 --- a/tools/ioemu/hw/vga_int.h Tue Sep 02 16:34:53 2008 -0700 32.2 +++ b/tools/ioemu/hw/vga_int.h Tue Sep 02 16:55:55 2008 -0700 32.3 @@ -80,9 +80,9 @@ 32.4 #define VGA_MAX_HEIGHT 2048 32.5 32.6 #define VGA_STATE_COMMON \ 32.7 - uint8_t *vram_alloc; \ 32.8 uint8_t *vram_ptr; \ 32.9 - uint8_t *vram_shadow; \ 32.10 + xen_pfn_t *vram_mfns; \ 32.11 + uint64_t stolen_vram_addr; /* Address of stolen RAM */ \ 32.12 unsigned long vram_offset; \ 32.13 unsigned int vram_size; \ 32.14 unsigned long bios_offset; \
33.1 --- a/tools/ioemu/hw/xen_platform.c Tue Sep 02 16:34:53 2008 -0700 33.2 +++ b/tools/ioemu/hw/xen_platform.c Tue Sep 02 16:55:55 2008 -0700 33.3 @@ -34,6 +34,7 @@ typedef struct PCIXenPlatformState 33.4 { 33.5 PCIDevice pci_dev; 33.6 uint8_t platform_flags; 33.7 + uint64_t vga_stolen_ram; 33.8 } PCIXenPlatformState; 33.9 33.10 static uint32_t xen_platform_ioport_readb(void *opaque, uint32_t addr) 33.11 @@ -69,11 +70,46 @@ static void xen_platform_ioport_writeb(v 33.12 } 33.13 33.14 33.15 +static uint32_t xen_platform_ioport_readl(void *opaque, uint32_t addr) 33.16 +{ 33.17 + PCIXenPlatformState *d = opaque; 33.18 + 33.19 + addr &= 0xff; 33.20 + 33.21 + switch (addr) { 33.22 + case 4: /* VGA stolen memory address */ 33.23 + return d->vga_stolen_ram; 33.24 + default: 33.25 + return ~0u; 33.26 + } 33.27 +} 33.28 + 33.29 +static void xen_platform_ioport_writel(void *opaque, uint32_t addr, uint32_t val) 33.30 +{ 33.31 + PCIXenPlatformState *d = opaque; 33.32 + 33.33 + addr &= 0xff; 33.34 + val &= 0xffffffff; 33.35 + 33.36 + switch (addr) { 33.37 + case 4: /* VGA stolen memory address */ 33.38 + d->vga_stolen_ram = val; 33.39 + xen_vga_stolen_vram_addr(val); 33.40 + break; 33.41 + default: 33.42 + break; 33.43 + } 33.44 +} 33.45 + 33.46 + 33.47 + 33.48 static void platform_ioport_map(PCIDevice *pci_dev, int region_num, uint32_t addr, uint32_t size, int type) 33.49 { 33.50 PCIXenPlatformState *d = (PCIXenPlatformState *)pci_dev; 33.51 register_ioport_write(addr, size, 1, xen_platform_ioport_writeb, d); 33.52 + register_ioport_write(addr, size, 4, xen_platform_ioport_writel, d); 33.53 register_ioport_read(addr, size, 1, xen_platform_ioport_readb, d); 33.54 + register_ioport_read(addr, size, 4, xen_platform_ioport_readl, d); 33.55 } 33.56 33.57 static uint32_t platform_mmio_read(void *opaque, target_phys_addr_t addr) 33.58 @@ -155,6 +191,7 @@ void xen_pci_save(QEMUFile *f, void *opa 33.59 33.60 pci_device_save(&d->pci_dev, f); 33.61 qemu_put_8s(f, &d->platform_flags); 33.62 + qemu_put_be64s(f, &d->vga_stolen_ram); 33.63 } 33.64 33.65 int xen_pci_load(QEMUFile *f, void *opaque, int version_id) 33.66 @@ -173,6 +210,7 @@ int xen_pci_load(QEMUFile *f, void *opaq 33.67 uint8_t flags; 33.68 qemu_get_8s(f, &flags); 33.69 xen_platform_ioport_writeb(d, 0, flags); 33.70 + qemu_get_be64s(f, &d->vga_stolen_ram); 33.71 } 33.72 33.73 return 0;
34.1 --- a/tools/ioemu/vl.c Tue Sep 02 16:34:53 2008 -0700 34.2 +++ b/tools/ioemu/vl.c Tue Sep 02 16:55:55 2008 -0700 34.3 @@ -7023,38 +7023,6 @@ static BOOL WINAPI qemu_ctrl_handler(DWO 34.4 34.5 #include <xg_private.h> 34.6 34.7 -/* FIXME Flush the shadow page */ 34.8 -int unset_mm_mapping(int xc_handle, uint32_t domid, 34.9 - unsigned long nr_pages, unsigned int address_bits, 34.10 - xen_pfn_t *extent_start) 34.11 -{ 34.12 - int err = 0; 34.13 - 34.14 - err = xc_domain_memory_decrease_reservation(xc_handle, domid, 34.15 - nr_pages, 0, extent_start); 34.16 - if (err) 34.17 - fprintf(stderr, "Failed to decrease physmap\n"); 34.18 - 34.19 - return err; 34.20 -} 34.21 - 34.22 -int set_mm_mapping(int xc_handle, uint32_t domid, 34.23 - unsigned long nr_pages, unsigned int address_bits, 34.24 - xen_pfn_t *extent_start) 34.25 -{ 34.26 - int err = 0; 34.27 - 34.28 - err = xc_domain_memory_populate_physmap( 34.29 - xc_handle, domid, nr_pages, 0, 34.30 - XENMEMF_address_bits(address_bits), extent_start); 34.31 - if (err) { 34.32 - fprintf(stderr, "Failed to populate physmap\n"); 34.33 - return -1; 34.34 - } 34.35 - 34.36 - return 0; 34.37 -} 34.38 - 34.39 34.40 int main(int argc, char **argv) 34.41 {
35.1 --- a/tools/ioemu/vl.h Tue Sep 02 16:34:53 2008 -0700 35.2 +++ b/tools/ioemu/vl.h Tue Sep 02 16:55:55 2008 -0700 35.3 @@ -812,8 +812,6 @@ struct PCIDevice { 35.4 /* do not access the following fields */ 35.5 PCIConfigReadFunc *config_read; 35.6 PCIConfigWriteFunc *config_write; 35.7 - /* ??? This is a PC-specific hack, and should be removed. */ 35.8 - int irq_index; 35.9 35.10 /* Current IRQ levels. Used internally by the generic PCI code. */ 35.11 int irq_state[4]; 35.12 @@ -1560,6 +1558,9 @@ void timeoffset_get(void); 35.13 /* xen_platform.c */ 35.14 #ifndef QEMU_TOOL 35.15 void pci_xen_platform_init(PCIBus *bus); 35.16 +void xen_vga_stolen_vram_addr(uint64_t vram_addr); 35.17 +void xen_vga_populate_vram(uint64_t vram_addr); 35.18 +void xen_vga_vram_map(uint64_t vram_addr, int copy); 35.19 #endif 35.20 35.21 /* pci_emulation.c */
36.1 --- a/tools/libxc/xc_dom_boot.c Tue Sep 02 16:34:53 2008 -0700 36.2 +++ b/tools/libxc/xc_dom_boot.c Tue Sep 02 16:55:55 2008 -0700 36.3 @@ -187,7 +187,7 @@ void *xc_dom_boot_domU_map(struct xc_dom 36.4 int xc_dom_boot_image(struct xc_dom_image *dom) 36.5 { 36.6 DECLARE_DOMCTL; 36.7 - void *ctxt; 36.8 + vcpu_guest_context_any_t ctxt; 36.9 int rc; 36.10 36.11 xc_dom_printf("%s: called\n", __FUNCTION__); 36.12 @@ -245,12 +245,11 @@ int xc_dom_boot_image(struct xc_dom_imag 36.13 return rc; 36.14 36.15 /* let the vm run */ 36.16 - ctxt = xc_dom_malloc(dom, PAGE_SIZE * 2 /* FIXME */ ); 36.17 - memset(ctxt, 0, PAGE_SIZE * 2); 36.18 - if ( (rc = dom->arch_hooks->vcpu(dom, ctxt)) != 0 ) 36.19 + memset(&ctxt, 0, sizeof(ctxt)); 36.20 + if ( (rc = dom->arch_hooks->vcpu(dom, &ctxt)) != 0 ) 36.21 return rc; 36.22 xc_dom_unmap_all(dom); 36.23 - rc = launch_vm(dom->guest_xc, dom->guest_domid, ctxt); 36.24 + rc = launch_vm(dom->guest_xc, dom->guest_domid, &ctxt); 36.25 36.26 return rc; 36.27 }
37.1 --- a/tools/libxc/xc_domain.c Tue Sep 02 16:34:53 2008 -0700 37.2 +++ b/tools/libxc/xc_domain.c Tue Sep 02 16:55:55 2008 -0700 37.3 @@ -537,6 +537,33 @@ int xc_domain_memory_populate_physmap(in 37.4 return err; 37.5 } 37.6 37.7 +int xc_domain_memory_translate_gpfn_list(int xc_handle, 37.8 + uint32_t domid, 37.9 + unsigned long nr_gpfns, 37.10 + xen_pfn_t *gpfn_list, 37.11 + xen_pfn_t *mfn_list) 37.12 +{ 37.13 + int err; 37.14 + struct xen_translate_gpfn_list translate_gpfn_list = { 37.15 + .domid = domid, 37.16 + .nr_gpfns = nr_gpfns, 37.17 + }; 37.18 + set_xen_guest_handle(translate_gpfn_list.gpfn_list, gpfn_list); 37.19 + set_xen_guest_handle(translate_gpfn_list.mfn_list, mfn_list); 37.20 + 37.21 + err = xc_memory_op(xc_handle, XENMEM_translate_gpfn_list, &translate_gpfn_list); 37.22 + 37.23 + if ( err != 0 ) 37.24 + { 37.25 + DPRINTF("Failed translation for dom %d (%ld PFNs)\n", 37.26 + domid, nr_gpfns); 37.27 + errno = -err; 37.28 + err = -1; 37.29 + } 37.30 + 37.31 + return err; 37.32 +} 37.33 + 37.34 int xc_domain_max_vcpus(int xc_handle, uint32_t domid, unsigned int max) 37.35 { 37.36 DECLARE_DOMCTL;
38.1 --- a/tools/libxc/xc_domain_save.c Tue Sep 02 16:34:53 2008 -0700 38.2 +++ b/tools/libxc/xc_domain_save.c Tue Sep 02 16:55:55 2008 -0700 38.3 @@ -1111,12 +1111,6 @@ int xc_domain_save(int xc_handle, int io 38.4 (test_bit(n, to_fix) && last_iter)) ) 38.5 continue; 38.6 38.7 - /* Skip PFNs that aren't really there */ 38.8 - if ( hvm && ((n >= 0xa0 && n < 0xc0) /* VGA hole */ 38.9 - || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) 38.10 - && n < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ ) 38.11 - continue; 38.12 - 38.13 /* 38.14 ** we get here if: 38.15 ** 1. page is marked to_send & hasn't already been re-dirtied
39.1 --- a/tools/libxc/xc_minios.c Tue Sep 02 16:34:53 2008 -0700 39.2 +++ b/tools/libxc/xc_minios.c Tue Sep 02 16:55:55 2008 -0700 39.3 @@ -64,7 +64,6 @@ void *xc_map_foreign_range(int xc_handle 39.4 unsigned long mfn) 39.5 { 39.6 unsigned long pt_prot = 0; 39.7 - printf("xc_map_foreign_range(%lx, %d)\n", mfn, size); 39.8 #ifdef __ia64__ 39.9 /* TODO */ 39.10 #else 39.11 @@ -81,9 +80,10 @@ void *xc_map_foreign_ranges(int xc_handl 39.12 size_t size, int prot, size_t chunksize, 39.13 privcmd_mmap_entry_t entries[], int nentries) 39.14 { 39.15 - unsigned long mfns[size / PAGE_SIZE]; 39.16 + unsigned long *mfns; 39.17 int i, j, n; 39.18 unsigned long pt_prot = 0; 39.19 + void *ret; 39.20 #ifdef __ia64__ 39.21 /* TODO */ 39.22 #else 39.23 @@ -93,12 +93,16 @@ void *xc_map_foreign_ranges(int xc_handl 39.24 pt_prot = L1_PROT; 39.25 #endif 39.26 39.27 + mfns = malloc((size / PAGE_SIZE) * sizeof(*mfns)); 39.28 + 39.29 n = 0; 39.30 for (i = 0; i < nentries; i++) 39.31 for (j = 0; j < chunksize / PAGE_SIZE; j++) 39.32 mfns[n++] = entries[i].mfn + j; 39.33 39.34 - return map_frames_ex(mfns, n, 1, 0, 1, dom, 0, pt_prot); 39.35 + ret = map_frames_ex(mfns, n, 1, 0, 1, dom, 0, pt_prot); 39.36 + free(mfns); 39.37 + return ret; 39.38 } 39.39 39.40
40.1 --- a/tools/libxc/xenctrl.h Tue Sep 02 16:34:53 2008 -0700 40.2 +++ b/tools/libxc/xenctrl.h Tue Sep 02 16:55:55 2008 -0700 40.3 @@ -628,6 +628,12 @@ int xc_domain_memory_populate_physmap(in 40.4 unsigned int mem_flags, 40.5 xen_pfn_t *extent_start); 40.6 40.7 +int xc_domain_memory_translate_gpfn_list(int xc_handle, 40.8 + uint32_t domid, 40.9 + unsigned long nr_gpfns, 40.10 + xen_pfn_t *gpfn_list, 40.11 + xen_pfn_t *mfn_list); 40.12 + 40.13 int xc_domain_ioport_permission(int xc_handle, 40.14 uint32_t domid, 40.15 uint32_t first_port,
41.1 --- a/tools/pygrub/src/pygrub Tue Sep 02 16:34:53 2008 -0700 41.2 +++ b/tools/pygrub/src/pygrub Tue Sep 02 16:55:55 2008 -0700 41.3 @@ -124,7 +124,7 @@ def get_fs_offset(file): 41.4 class GrubLineEditor(curses.textpad.Textbox): 41.5 def __init__(self, screen, startx, starty, line = ""): 41.6 screen.addstr(startx, starty, "> ") 41.7 - screen.refresh() 41.8 + screen.noutrefresh() 41.9 win = curses.newwin(1, 74, startx, starty + 2) 41.10 curses.textpad.Textbox.__init__(self, win) 41.11 41.12 @@ -137,7 +137,7 @@ class GrubLineEditor(curses.textpad.Text 41.13 """Show the text. One of our advantages over standard textboxes 41.14 is that we can handle lines longer than the window.""" 41.15 41.16 - self.win.clear() 41.17 + self.win.erase() 41.18 p = self.pos 41.19 off = 0 41.20 while p > 70: 41.21 @@ -188,6 +188,7 @@ class GrubLineEditor(curses.textpad.Text 41.22 return 1 41.23 41.24 def edit(self): 41.25 + curses.doupdate() 41.26 r = curses.textpad.Textbox.edit(self) 41.27 if self.cancelled: 41.28 return None 41.29 @@ -217,16 +218,15 @@ class Grub: 41.30 curses.def_prog_mode() 41.31 41.32 curses.reset_prog_mode() 41.33 - self.screen.clear() 41.34 - self.screen.refresh() 41.35 + self.screen.erase() 41.36 41.37 # create basic grub screen with a box of entries and a textbox 41.38 self.screen.addstr(1, 4, "pyGRUB version %s" %(PYGRUB_VER,)) 41.39 self.entry_win.box() 41.40 - self.screen.refresh() 41.41 + self.screen.noutrefresh() 41.42 41.43 def fill_entry_list(self): 41.44 - self.entry_win.clear() 41.45 + self.entry_win.erase() 41.46 self.entry_win.box() 41.47 41.48 maxy = self.entry_win.getmaxyx()[0]-3 # maxy - 2 for the frame + index 41.49 @@ -244,7 +244,7 @@ class Grub: 41.50 self.entry_win.addstr(y + 1 - self.start_image, 2, i.title.ljust(70)) 41.51 if y == self.selected_image: 41.52 self.entry_win.attroff(curses.A_REVERSE) 41.53 - self.entry_win.refresh() 41.54 + self.entry_win.noutrefresh() 41.55 41.56 def edit_entry(self, origimg): 41.57 def draw(): 41.58 @@ -259,13 +259,13 @@ class Grub: 41.59 self.text_win.addch(0, 14, curses.ACS_DARROW) 41.60 (y, x) = self.text_win.getmaxyx() 41.61 self.text_win.move(y - 1, x - 1) 41.62 - self.text_win.refresh() 41.63 + self.text_win.noutrefresh() 41.64 41.65 curline = 1 41.66 img = copy.deepcopy(origimg) 41.67 while 1: 41.68 draw() 41.69 - self.entry_win.clear() 41.70 + self.entry_win.erase() 41.71 self.entry_win.box() 41.72 for idx in range(1, len(img.lines)): 41.73 # current line should be highlighted 41.74 @@ -280,7 +280,8 @@ class Grub: 41.75 self.entry_win.addstr(idx, 2, l) 41.76 if idx == curline: 41.77 self.entry_win.attroff(curses.A_REVERSE) 41.78 - self.entry_win.refresh() 41.79 + self.entry_win.noutrefresh() 41.80 + curses.doupdate() 41.81 41.82 c = self.screen.getch() 41.83 if c in (ord('q'), 27): # 27 == esc 41.84 @@ -318,10 +319,10 @@ class Grub: 41.85 origimg.reset(img.lines) 41.86 41.87 def edit_line(self, line): 41.88 - self.screen.clear() 41.89 + self.screen.erase() 41.90 self.screen.addstr(1, 2, "[ Minimal BASH-like line editing is supported. ") 41.91 self.screen.addstr(2, 2, " ESC at any time cancels. ENTER at any time accepts your changes. ]") 41.92 - self.screen.refresh() 41.93 + self.screen.noutrefresh() 41.94 41.95 t = GrubLineEditor(self.screen, 5, 2, line) 41.96 enable_cursor(True) 41.97 @@ -331,10 +332,10 @@ class Grub: 41.98 return None 41.99 41.100 def command_line_mode(self): 41.101 - self.screen.clear() 41.102 + self.screen.erase() 41.103 self.screen.addstr(1, 2, "[ Minimal BASH-like line editing is supported. ESC at any time ") 41.104 self.screen.addstr(2, 2, " exits. Typing 'boot' will boot with your entered commands. ] ") 41.105 - self.screen.refresh() 41.106 + self.screen.noutrefresh() 41.107 41.108 y = 5 41.109 lines = [] 41.110 @@ -420,7 +421,7 @@ class Grub: 41.111 self.text_win.addch(0, 14, curses.ACS_DARROW) 41.112 (y, x) = self.text_win.getmaxyx() 41.113 self.text_win.move(y - 1, x - 1) 41.114 - self.text_win.refresh() 41.115 + self.text_win.noutrefresh() 41.116 41.117 # now loop until we hit the timeout or get a go from the user 41.118 mytime = 0 41.119 @@ -433,6 +434,7 @@ class Grub: 41.120 else: 41.121 self.screen.addstr(20, 5, " " * 80) 41.122 self.fill_entry_list() 41.123 + curses.doupdate() 41.124 41.125 c = self.screen.getch() 41.126 if c == -1:
42.1 --- a/tools/python/xen/util/pci.py Tue Sep 02 16:34:53 2008 -0700 42.2 +++ b/tools/python/xen/util/pci.py Tue Sep 02 16:55:55 2008 -0700 42.3 @@ -40,6 +40,7 @@ DEV_TYPE_PCIe_BRIDGE = 1 42.4 DEV_TYPE_PCI_BRIDGE = 2 42.5 DEV_TYPE_PCI = 3 42.6 42.7 +PCI_VENDOR_ID = 0x0 42.8 PCI_STATUS = 0x6 42.9 PCI_CLASS_DEVICE = 0x0a 42.10 PCI_CLASS_BRIDGE_PCI = 0x0604 42.11 @@ -69,6 +70,11 @@ PCI_PM_CTRL_NO_SOFT_RESET = 0x0004 42.12 PCI_PM_CTRL_STATE_MASK = 0x0003 42.13 PCI_D3hot = 3 42.14 42.15 +VENDOR_INTEL = 0x8086 42.16 +PCI_CAP_ID_VENDOR_SPECIFIC_CAP = 0x09 42.17 +PCI_CLASS_ID_USB = 0x0c03 42.18 +PCI_USB_FLRCTRL = 0x4 42.19 + 42.20 PCI_CAP_ID_AF = 0x13 42.21 PCI_AF_CAPs = 0x3 42.22 PCI_AF_CAPs_TP_FLR = 0x3 42.23 @@ -487,7 +493,7 @@ class PciDevice: 42.24 def do_Dstate_transition(self): 42.25 pos = self.find_cap_offset(PCI_CAP_ID_PM) 42.26 if pos == 0: 42.27 - return 42.28 + return False 42.29 42.30 (pci_list, cfg_list) = save_pci_conf_space([self.name]) 42.31 42.32 @@ -504,6 +510,31 @@ class PciDevice: 42.33 time.sleep(0.010) 42.34 42.35 restore_pci_conf_space((pci_list, cfg_list)) 42.36 + return True 42.37 + 42.38 + def do_vendor_specific_FLR_method(self): 42.39 + pos = self.find_cap_offset(PCI_CAP_ID_VENDOR_SPECIFIC_CAP) 42.40 + if pos == 0: 42.41 + return 42.42 + 42.43 + vendor_id = self.pci_conf_read16(PCI_VENDOR_ID) 42.44 + if vendor_id != VENDOR_INTEL: 42.45 + return 42.46 + 42.47 + class_id = self.pci_conf_read16(PCI_CLASS_DEVICE) 42.48 + if class_id != PCI_CLASS_ID_USB: 42.49 + return 42.50 + 42.51 + (pci_list, cfg_list) = save_pci_conf_space([self.name]) 42.52 + 42.53 + self.pci_conf_write8(pos + PCI_USB_FLRCTRL, 1) 42.54 + time.sleep(0.010) 42.55 + 42.56 + restore_pci_conf_space((pci_list, cfg_list)) 42.57 + 42.58 + def do_FLR_for_integrated_device(self): 42.59 + if not self.do_Dstate_transition(): 42.60 + self.do_vendor_specific_FLR_method() 42.61 42.62 def find_all_the_multi_functions(self): 42.63 sysfs_mnt = find_sysfs_mnt() 42.64 @@ -676,7 +707,7 @@ class PciDevice: 42.65 restore_pci_conf_space((pci_list, cfg_list)) 42.66 else: 42.67 if self.bus == 0: 42.68 - self.do_Dstate_transition() 42.69 + self.do_FLR_for_integrated_device() 42.70 else: 42.71 funcs = self.find_all_the_multi_functions() 42.72 self.devs_check_driver(funcs) 42.73 @@ -697,7 +728,7 @@ class PciDevice: 42.74 restore_pci_conf_space((pci_list, cfg_list)) 42.75 else: 42.76 if self.bus == 0: 42.77 - self.do_Dstate_transition() 42.78 + self.do_FLR_for_integrated_device() 42.79 else: 42.80 devs = self.find_coassigned_devices(False) 42.81 # Remove the element 0 which is a bridge
43.1 --- a/tools/python/xen/xend/XendConfig.py Tue Sep 02 16:34:53 2008 -0700 43.2 +++ b/tools/python/xen/xend/XendConfig.py Tue Sep 02 16:55:55 2008 -0700 43.3 @@ -1538,9 +1538,9 @@ class XendConfig(dict): 43.4 pci_dev_info[opt] = val 43.5 except TypeError: 43.6 pass 43.7 - # append uuid for each pci device. 43.8 - dpci_uuid = pci_dev_info.get('uuid', uuid.createString()) 43.9 - pci_dev_info['uuid'] = dpci_uuid 43.10 + # append uuid for each pci device. 43.11 + dpci_uuid = pci_dev_info.get('uuid', uuid.createString()) 43.12 + pci_dev_info['uuid'] = dpci_uuid 43.13 pci_devs.append(pci_dev_info) 43.14 dev_config['devs'] = pci_devs 43.15
44.1 --- a/tools/python/xen/xend/XendDomain.py Tue Sep 02 16:34:53 2008 -0700 44.2 +++ b/tools/python/xen/xend/XendDomain.py Tue Sep 02 16:55:55 2008 -0700 44.3 @@ -419,6 +419,8 @@ class XendDomain: 44.4 except VmError: 44.5 log.exception("Unable to recreate domain") 44.6 try: 44.7 + xc.domain_pause(domid) 44.8 + do_FLR(domid) 44.9 xc.domain_destroy(domid) 44.10 except: 44.11 log.exception("Hard destruction of domain failed: %d" % 44.12 @@ -1255,6 +1257,8 @@ class XendDomain: 44.13 val = dominfo.destroy() 44.14 else: 44.15 try: 44.16 + xc.domain_pause(int(domid)) 44.17 + do_FLR(int(domid)) 44.18 val = xc.domain_destroy(int(domid)) 44.19 except ValueError: 44.20 raise XendInvalidDomain(domid)
45.1 --- a/tools/python/xen/xend/XendDomainInfo.py Tue Sep 02 16:34:53 2008 -0700 45.2 +++ b/tools/python/xen/xend/XendDomainInfo.py Tue Sep 02 16:55:55 2008 -0700 45.3 @@ -287,6 +287,28 @@ def dom_get(dom): 45.4 log.trace("domain_getinfo(%d) failed, ignoring: %s", dom, str(err)) 45.5 return None 45.6 45.7 +def do_FLR(domid): 45.8 + from xen.xend.server.pciif import parse_pci_name, PciDevice 45.9 + path = '/local/domain/0/backend/pci/%u/0/' % domid 45.10 + num_devs = xstransact.Read(path + 'num_devs'); 45.11 + if num_devs is None or num_devs == "": 45.12 + return; 45.13 + 45.14 + num_devs = int(xstransact.Read(path + 'num_devs')); 45.15 + 45.16 + dev_str_list = [] 45.17 + for i in range(num_devs): 45.18 + dev_str = xstransact.Read(path + 'dev-%i' % i) 45.19 + dev_str_list = dev_str_list + [dev_str] 45.20 + 45.21 + for dev_str in dev_str_list: 45.22 + (dom, b, d, f) = parse_pci_name(dev_str) 45.23 + try: 45.24 + dev = PciDevice(dom, b, d, f) 45.25 + except Exception, e: 45.26 + raise VmError("pci: failed to locate device and "+ 45.27 + "parse it's resources - "+str(e)) 45.28 + dev.do_FLR() 45.29 45.30 class XendDomainInfo: 45.31 """An object represents a domain. 45.32 @@ -2386,44 +2408,34 @@ class XendDomainInfo: 45.33 if self.domid is None: 45.34 return 45.35 45.36 + from xen.xend import XendDomain 45.37 log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid)) 45.38 45.39 paths = self._prepare_phantom_paths() 45.40 45.41 self._cleanupVm() 45.42 if self.dompath is not None: 45.43 - self.destroyDomain() 45.44 + try: 45.45 + xc.domain_destroy_hook(self.domid) 45.46 + xc.domain_pause(self.domid) 45.47 + do_FLR(self.domid) 45.48 + xc.domain_destroy(self.domid) 45.49 + for state in DOM_STATES_OLD: 45.50 + self.info[state] = 0 45.51 + self._stateSet(DOM_STATE_HALTED) 45.52 + except: 45.53 + log.exception("XendDomainInfo.destroy: domain destruction failed.") 45.54 + 45.55 + XendDomain.instance().remove_domain(self) 45.56 + self.cleanupDomain() 45.57 45.58 self._cleanup_phantom_devs(paths) 45.59 45.60 if "transient" in self.info["other_config"] \ 45.61 and bool(self.info["other_config"]["transient"]): 45.62 - from xen.xend import XendDomain 45.63 XendDomain.instance().domain_delete_by_dominfo(self) 45.64 45.65 45.66 - def destroyDomain(self): 45.67 - log.debug("XendDomainInfo.destroyDomain(%s)", str(self.domid)) 45.68 - 45.69 - paths = self._prepare_phantom_paths() 45.70 - 45.71 - try: 45.72 - if self.domid is not None: 45.73 - xc.domain_destroy_hook(self.domid) 45.74 - xc.domain_destroy(self.domid) 45.75 - for state in DOM_STATES_OLD: 45.76 - self.info[state] = 0 45.77 - self._stateSet(DOM_STATE_HALTED) 45.78 - except: 45.79 - log.exception("XendDomainInfo.destroy: xc.domain_destroy failed.") 45.80 - 45.81 - from xen.xend import XendDomain 45.82 - XendDomain.instance().remove_domain(self) 45.83 - 45.84 - self.cleanupDomain() 45.85 - self._cleanup_phantom_devs(paths) 45.86 - 45.87 - 45.88 def resetDomain(self): 45.89 log.debug("XendDomainInfo.resetDomain(%s)", str(self.domid)) 45.90
46.1 --- a/tools/python/xen/xend/image.py Tue Sep 02 16:34:53 2008 -0700 46.2 +++ b/tools/python/xen/xend/image.py Tue Sep 02 16:55:55 2008 -0700 46.3 @@ -637,8 +637,9 @@ class LinuxImageHandler(ImageHandler): 46.4 log.debug("ramdisk = %s", self.ramdisk) 46.5 log.debug("vcpus = %d", self.vm.getVCpuCount()) 46.6 log.debug("features = %s", self.vm.getFeatures()) 46.7 + log.debug("flags = %d", self.flags) 46.8 if arch.type == "ia64": 46.9 - log.debug("vhpt = %d", self.flags) 46.10 + log.debug("vhpt = %d", self.vhpt) 46.11 46.12 return xc.linux_build(domid = self.vm.getDomid(), 46.13 memsize = mem_mb,
47.1 --- a/tools/python/xen/xend/server/DevController.py Tue Sep 02 16:34:53 2008 -0700 47.2 +++ b/tools/python/xen/xend/server/DevController.py Tue Sep 02 16:55:55 2008 -0700 47.3 @@ -223,12 +223,6 @@ class DevController: 47.4 raise VmError('%s devices may not be reconfigured' % self.deviceClass) 47.5 47.6 47.7 - def cleanupDeviceOnDomainDestroy(self, devid): 47.8 - """ Some devices may need special cleanup when the guest domain 47.9 - is destroyed. 47.10 - """ 47.11 - return 47.12 - 47.13 def destroyDevice(self, devid, force): 47.14 """Destroy the specified device. 47.15 47.16 @@ -245,8 +239,6 @@ class DevController: 47.17 47.18 dev = self.convertToDeviceNumber(devid) 47.19 47.20 - self.cleanupDeviceOnDomainDestroy(dev) 47.21 - 47.22 # Modify online status /before/ updating state (latter is watched by 47.23 # drivers, so this ordering avoids a race). 47.24 self.writeBackend(dev, 'online', "0")
48.1 --- a/tools/python/xen/xend/server/pciif.py Tue Sep 02 16:34:53 2008 -0700 48.2 +++ b/tools/python/xen/xend/server/pciif.py Tue Sep 02 16:55:55 2008 -0700 48.3 @@ -286,7 +286,7 @@ class PciController(DevController): 48.4 )%(dev.name)) 48.5 48.6 if dev.has_non_page_aligned_bar and arch.type != "ia64": 48.7 - raise VmError("pci: %: non-page-aligned MMIO BAR found." % dev.name) 48.8 + raise VmError("pci: %s: non-page-aligned MMIO BAR found." % dev.name) 48.9 48.10 self.CheckSiblingDevices(fe_domid, dev) 48.11 48.12 @@ -383,10 +383,10 @@ class PciController(DevController): 48.13 if (dev.dev_type == DEV_TYPE_PCIe_ENDPOINT) and not dev.pcie_flr: 48.14 if dev.bus == 0: 48.15 # We cope with this case by using the Dstate transition 48.16 - # method for now. 48.17 + # method or some vendor specific methods for now. 48.18 err_msg = 'pci: %s: it is on bus 0, but has no PCIe' +\ 48.19 ' FLR Capability. Will try the Dstate transition'+\ 48.20 - ' method if available.' 48.21 + ' method or some vendor specific methods if available.' 48.22 log.warn(err_msg % dev.name) 48.23 else: 48.24 funcs = dev.find_all_the_multi_functions() 48.25 @@ -404,10 +404,11 @@ class PciController(DevController): 48.26 if dev.bus == 0 or arch.type == "ia64": 48.27 if not dev.pci_af_flr: 48.28 # We cope with this case by using the Dstate transition 48.29 - # method for now. 48.30 + # method or some vendor specific methods for now. 48.31 err_msg = 'pci: %s: it is on bus 0, but has no PCI' +\ 48.32 ' Advanced Capabilities for FLR. Will try the'+\ 48.33 - ' Dstate transition method if available.' 48.34 + ' Dstate transition method or some vendor' +\ 48.35 + ' specific methods if available.' 48.36 log.warn(err_msg % dev.name) 48.37 else: 48.38 # All devices behind the uppermost PCI/PCI-X bridge must be\ 48.39 @@ -543,22 +544,6 @@ class PciController(DevController): 48.40 48.41 return new_num_devs 48.42 48.43 - def cleanupDeviceOnDomainDestroy(self, devid): 48.44 - num_devs = int(self.readBackend(devid, 'num_devs')) 48.45 - dev_str_list = [] 48.46 - for i in range(num_devs): 48.47 - dev_str = self.readBackend(devid, 'dev-%i' % i) 48.48 - dev_str_list = dev_str_list + [dev_str] 48.49 - 48.50 - for dev_str in dev_str_list: 48.51 - (dom, b, d, f) = parse_pci_name(dev_str) 48.52 - try: 48.53 - dev = PciDevice(dom, b, d, f) 48.54 - except Exception, e: 48.55 - raise VmError("pci: failed to locate device and "+ 48.56 - "parse it's resources - "+str(e)) 48.57 - dev.do_FLR() 48.58 - 48.59 def waitForBackend(self,devid): 48.60 return (0, "ok - no hotplug") 48.61
49.1 --- a/tools/xenmon/Makefile Tue Sep 02 16:34:53 2008 -0700 49.2 +++ b/tools/xenmon/Makefile Tue Sep 02 16:55:55 2008 -0700 49.3 @@ -42,6 +42,6 @@ clean: 49.4 49.5 49.6 %: %.c Makefile 49.7 - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< 49.8 + $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@ 49.9 xentrace_%: %.c Makefile 49.10 - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< 49.11 + $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
50.1 --- a/tools/xentrace/formats Tue Sep 02 16:34:53 2008 -0700 50.2 +++ b/tools/xentrace/formats Tue Sep 02 16:55:55 2008 -0700 50.3 @@ -23,8 +23,8 @@ 0x0002f00f CPU%(cpu)d %(tsc)d (+%(relt 50.4 0x00081001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) VMENTRY [ dom:vcpu = 0x%(1)08x ] 50.5 0x00081002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) VMEXIT [ dom:vcpu = 0x%(1)08x, exitcode = 0x%(2)08x, rIP = 0x%(3)08x ] 50.6 0x00081102 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) VMEXIT [ dom:vcpu = 0x%(1)08x, exitcode = 0x%(2)08x, rIP = 0x%(3)016x ] 50.7 -0x00082001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_XEN [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(3)02x, virt = 0x%(2)08x ] 50.8 -0x00082101 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_XEN [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(3)02x, virt = 0x%(2)016x ] 50.9 +0x00082001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_XEN [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)08x ] 50.10 +0x00082101 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_XEN [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)016x ] 50.11 0x00082002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_INJECT [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)08x ] 50.12 0x00082102 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PF_INJECT [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)016x ] 50.13 0x00082003 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INJ_EXC [ dom:vcpu = 0x%(1)08x, vector = 0x%(2)02x, errorcode = 0x%(3)04x ]
51.1 --- a/xen/Makefile Tue Sep 02 16:34:53 2008 -0700 51.2 +++ b/xen/Makefile Tue Sep 02 16:55:55 2008 -0700 51.3 @@ -1,8 +1,8 @@ 51.4 # This is the correct place to edit the build version. 51.5 # All other places this is stored (eg. compile.h) should be autogenerated. 51.6 export XEN_VERSION = 3 51.7 -export XEN_SUBVERSION = 3 51.8 -export XEN_EXTRAVERSION ?= .0-rc8-pre$(XEN_VENDORVERSION) 51.9 +export XEN_SUBVERSION = 4 51.10 +export XEN_EXTRAVERSION ?= -unstable$(XEN_VENDORVERSION) 51.11 export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) 51.12 -include xen-version 51.13 51.14 @@ -88,7 +88,7 @@ include/xen/compile.h: include/xen/compi 51.15 -e 's/@@whoami@@/$(XEN_WHOAMI)/g' \ 51.16 -e 's/@@domain@@/$(XEN_DOMAIN)/g' \ 51.17 -e 's/@@hostname@@/$(shell hostname)/g' \ 51.18 - -e 's!@@compiler@@!$(shell $(CC) $(CFLAGS) -v 2>&1 | grep -i "gcc.*version")!g' \ 51.19 + -e 's!@@compiler@@!$(shell $(CC) $(CFLAGS) -v 2>&1 | tail -1)!g' \ 51.20 -e 's/@@version@@/$(XEN_VERSION)/g' \ 51.21 -e 's/@@subversion@@/$(XEN_SUBVERSION)/g' \ 51.22 -e 's/@@extraversion@@/$(XEN_EXTRAVERSION)/g' \
52.1 --- a/xen/arch/ia64/xen/mm.c Tue Sep 02 16:34:53 2008 -0700 52.2 +++ b/xen/arch/ia64/xen/mm.c Tue Sep 02 16:55:55 2008 -0700 52.3 @@ -2698,6 +2698,20 @@ void put_page_type(struct page_info *pag 52.4 } 52.5 52.6 52.7 +static int get_page_from_pagenr(unsigned long page_nr, struct domain *d) 52.8 +{ 52.9 + struct page_info *page = mfn_to_page(page_nr); 52.10 + 52.11 + if ( unlikely(!mfn_valid(page_nr)) || unlikely(!get_page(page, d)) ) 52.12 + { 52.13 + MEM_LOG("Could not get page ref for pfn %lx", page_nr); 52.14 + return 0; 52.15 + } 52.16 + 52.17 + return 1; 52.18 +} 52.19 + 52.20 + 52.21 int get_page_type(struct page_info *page, u32 type) 52.22 { 52.23 u64 nx, x, y = page->u.inuse.type_info; 52.24 @@ -2792,6 +2806,8 @@ int memory_is_conventional_ram(paddr_t p 52.25 long 52.26 arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) 52.27 { 52.28 + struct page_info *page = NULL; 52.29 + 52.30 switch (op) { 52.31 case XENMEM_add_to_physmap: 52.32 { 52.33 @@ -2836,11 +2852,21 @@ arch_memory_op(int op, XEN_GUEST_HANDLE( 52.34 52.35 spin_unlock(&d->grant_table->lock); 52.36 break; 52.37 + case XENMAPSPACE_mfn: 52.38 + { 52.39 + if ( get_page_from_pagenr(xatp.idx, d) ) { 52.40 + mfn = xatp.idx; 52.41 + page = mfn_to_page(mfn); 52.42 + } 52.43 + break; 52.44 + } 52.45 default: 52.46 break; 52.47 } 52.48 52.49 if (mfn == 0) { 52.50 + if ( page ) 52.51 + put_page(page); 52.52 rcu_unlock_domain(d); 52.53 return -EINVAL; 52.54 } 52.55 @@ -2872,12 +2898,54 @@ arch_memory_op(int op, XEN_GUEST_HANDLE( 52.56 52.57 out: 52.58 domain_unlock(d); 52.59 - 52.60 + 52.61 + if ( page ) 52.62 + put_page(page); 52.63 + 52.64 rcu_unlock_domain(d); 52.65 52.66 break; 52.67 } 52.68 52.69 + case XENMEM_remove_from_physmap: 52.70 + { 52.71 + struct xen_remove_from_physmap xrfp; 52.72 + unsigned long mfn; 52.73 + struct domain *d; 52.74 + 52.75 + if ( copy_from_guest(&xrfp, arg, 1) ) 52.76 + return -EFAULT; 52.77 + 52.78 + if ( xrfp.domid == DOMID_SELF ) 52.79 + { 52.80 + d = rcu_lock_current_domain(); 52.81 + } 52.82 + else 52.83 + { 52.84 + if ( (d = rcu_lock_domain_by_id(xrfp.domid)) == NULL ) 52.85 + return -ESRCH; 52.86 + if ( !IS_PRIV_FOR(current->domain, d) ) 52.87 + { 52.88 + rcu_unlock_domain(d); 52.89 + return -EPERM; 52.90 + } 52.91 + } 52.92 + 52.93 + domain_lock(d); 52.94 + 52.95 + mfn = gmfn_to_mfn(d, xrfp.gpfn); 52.96 + 52.97 + if ( mfn_valid(mfn) ) 52.98 + guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0); 52.99 + 52.100 + domain_unlock(d); 52.101 + 52.102 + rcu_unlock_domain(d); 52.103 + 52.104 + break; 52.105 + } 52.106 + 52.107 + 52.108 case XENMEM_machine_memory_map: 52.109 { 52.110 struct xen_memory_map memmap;
53.1 --- a/xen/arch/x86/acpi/power.c Tue Sep 02 16:34:53 2008 -0700 53.2 +++ b/xen/arch/x86/acpi/power.c Tue Sep 02 16:55:55 2008 -0700 53.3 @@ -24,6 +24,7 @@ 53.4 #include <xen/sched.h> 53.5 #include <xen/domain.h> 53.6 #include <xen/console.h> 53.7 +#include <xen/iommu.h> 53.8 #include <public/platform.h> 53.9 #include <asm/tboot.h> 53.10 53.11 @@ -41,6 +42,8 @@ void do_suspend_lowlevel(void); 53.12 53.13 static int device_power_down(void) 53.14 { 53.15 + iommu_suspend(); 53.16 + 53.17 console_suspend(); 53.18 53.19 time_suspend(); 53.20 @@ -65,6 +68,8 @@ static void device_power_up(void) 53.21 time_resume(); 53.22 53.23 console_resume(); 53.24 + 53.25 + iommu_resume(); 53.26 } 53.27 53.28 static void freeze_domains(void)
54.1 --- a/xen/arch/x86/cpu/amd.c Tue Sep 02 16:34:53 2008 -0700 54.2 +++ b/xen/arch/x86/cpu/amd.c Tue Sep 02 16:55:55 2008 -0700 54.3 @@ -10,10 +10,144 @@ 54.4 #include <asm/hvm/support.h> 54.5 54.6 #include "cpu.h" 54.7 +#include "amd.h" 54.8 54.9 int start_svm(struct cpuinfo_x86 *c); 54.10 54.11 /* 54.12 + * Pre-canned values for overriding the CPUID features 54.13 + * and extended features masks. 54.14 + * 54.15 + * Currently supported processors: 54.16 + * 54.17 + * "fam_0f_rev_c" 54.18 + * "fam_0f_rev_d" 54.19 + * "fam_0f_rev_e" 54.20 + * "fam_0f_rev_f" 54.21 + * "fam_0f_rev_g" 54.22 + * "fam_10_rev_b" 54.23 + * "fam_10_rev_c" 54.24 + * "fam_11_rev_b" 54.25 + */ 54.26 +static char opt_famrev[14]; 54.27 +string_param("cpuid_mask_cpu", opt_famrev); 54.28 + 54.29 +/* Finer-grained CPUID feature control. */ 54.30 +static unsigned int opt_cpuid_mask_ecx, opt_cpuid_mask_edx; 54.31 +integer_param("cpuid_mask_ecx", opt_cpuid_mask_ecx); 54.32 +integer_param("cpuid_mask_edx", opt_cpuid_mask_edx); 54.33 +static unsigned int opt_cpuid_mask_ext_ecx, opt_cpuid_mask_ext_edx; 54.34 +integer_param("cpuid_mask_ecx", opt_cpuid_mask_ext_ecx); 54.35 +integer_param("cpuid_mask_edx", opt_cpuid_mask_ext_edx); 54.36 + 54.37 +static inline void wrmsr_amd(unsigned int index, unsigned int lo, 54.38 + unsigned int hi) 54.39 +{ 54.40 + asm volatile ( 54.41 + "wrmsr" 54.42 + : /* No outputs */ 54.43 + : "c" (index), "a" (lo), 54.44 + "d" (hi), "D" (0x9c5a203a) 54.45 + ); 54.46 +} 54.47 + 54.48 +/* 54.49 + * Mask the features and extended features returned by CPUID. Parameters are 54.50 + * set from the boot line via two methods: 54.51 + * 54.52 + * 1) Specific processor revision string 54.53 + * 2) User-defined masks 54.54 + * 54.55 + * The processor revision string parameter has precedene. 54.56 + */ 54.57 +static void __devinit set_cpuidmask(struct cpuinfo_x86 *c) 54.58 +{ 54.59 + static unsigned int feat_ecx, feat_edx; 54.60 + static unsigned int extfeat_ecx, extfeat_edx; 54.61 + static enum { not_parsed, no_mask, set_mask } status; 54.62 + 54.63 + if (status == no_mask) 54.64 + return; 54.65 + 54.66 + if (status == set_mask) 54.67 + goto setmask; 54.68 + 54.69 + ASSERT((status == not_parsed) && (smp_processor_id() == 0)); 54.70 + status = no_mask; 54.71 + 54.72 + if (opt_cpuid_mask_ecx | opt_cpuid_mask_edx | 54.73 + opt_cpuid_mask_ext_ecx | opt_cpuid_mask_ext_edx) { 54.74 + feat_ecx = opt_cpuid_mask_ecx ? : ~0U; 54.75 + feat_edx = opt_cpuid_mask_edx ? : ~0U; 54.76 + extfeat_ecx = opt_cpuid_mask_ext_ecx ? : ~0U; 54.77 + extfeat_edx = opt_cpuid_mask_ext_edx ? : ~0U; 54.78 + } else if (*opt_famrev == '\0') { 54.79 + return; 54.80 + } else if (!strcmp(opt_famrev, "fam_0f_rev_c")) { 54.81 + feat_ecx = AMD_FEATURES_K8_REV_C_ECX; 54.82 + feat_edx = AMD_FEATURES_K8_REV_C_EDX; 54.83 + extfeat_ecx = AMD_EXTFEATURES_K8_REV_C_ECX; 54.84 + extfeat_edx = AMD_EXTFEATURES_K8_REV_C_EDX; 54.85 + } else if (!strcmp(opt_famrev, "fam_0f_rev_d")) { 54.86 + feat_ecx = AMD_FEATURES_K8_REV_D_ECX; 54.87 + feat_edx = AMD_FEATURES_K8_REV_D_EDX; 54.88 + extfeat_ecx = AMD_EXTFEATURES_K8_REV_D_ECX; 54.89 + extfeat_edx = AMD_EXTFEATURES_K8_REV_D_EDX; 54.90 + } else if (!strcmp(opt_famrev, "fam_0f_rev_e")) { 54.91 + feat_ecx = AMD_FEATURES_K8_REV_E_ECX; 54.92 + feat_edx = AMD_FEATURES_K8_REV_E_EDX; 54.93 + extfeat_ecx = AMD_EXTFEATURES_K8_REV_E_ECX; 54.94 + extfeat_edx = AMD_EXTFEATURES_K8_REV_E_EDX; 54.95 + } else if (!strcmp(opt_famrev, "fam_0f_rev_f")) { 54.96 + feat_ecx = AMD_FEATURES_K8_REV_F_ECX; 54.97 + feat_edx = AMD_FEATURES_K8_REV_F_EDX; 54.98 + extfeat_ecx = AMD_EXTFEATURES_K8_REV_F_ECX; 54.99 + extfeat_edx = AMD_EXTFEATURES_K8_REV_F_EDX; 54.100 + } else if (!strcmp(opt_famrev, "fam_0f_rev_g")) { 54.101 + feat_ecx = AMD_FEATURES_K8_REV_G_ECX; 54.102 + feat_edx = AMD_FEATURES_K8_REV_G_EDX; 54.103 + extfeat_ecx = AMD_EXTFEATURES_K8_REV_G_ECX; 54.104 + extfeat_edx = AMD_EXTFEATURES_K8_REV_G_EDX; 54.105 + } else if (!strcmp(opt_famrev, "fam_10_rev_b")) { 54.106 + feat_ecx = AMD_FEATURES_FAM10h_REV_B_ECX; 54.107 + feat_edx = AMD_FEATURES_FAM10h_REV_B_EDX; 54.108 + extfeat_ecx = AMD_EXTFEATURES_FAM10h_REV_B_ECX; 54.109 + extfeat_edx = AMD_EXTFEATURES_FAM10h_REV_B_EDX; 54.110 + } else if (!strcmp(opt_famrev, "fam_10_rev_c")) { 54.111 + feat_ecx = AMD_FEATURES_FAM10h_REV_C_ECX; 54.112 + feat_edx = AMD_FEATURES_FAM10h_REV_C_EDX; 54.113 + extfeat_ecx = AMD_EXTFEATURES_FAM10h_REV_C_ECX; 54.114 + extfeat_edx = AMD_EXTFEATURES_FAM10h_REV_C_EDX; 54.115 + } else if (!strcmp(opt_famrev, "fam_11_rev_b")) { 54.116 + feat_ecx = AMD_FEATURES_FAM11h_REV_B_ECX; 54.117 + feat_edx = AMD_FEATURES_FAM11h_REV_B_EDX; 54.118 + extfeat_ecx = AMD_EXTFEATURES_FAM11h_REV_B_ECX; 54.119 + extfeat_edx = AMD_EXTFEATURES_FAM11h_REV_B_EDX; 54.120 + } else { 54.121 + printk("Invalid processor string: %s\n", opt_famrev); 54.122 + printk("CPUID will not be masked\n"); 54.123 + return; 54.124 + } 54.125 + 54.126 + status = set_mask; 54.127 + printk("Writing CPUID feature mask ECX:EDX -> %08Xh:%08Xh\n", 54.128 + feat_ecx, feat_edx); 54.129 + printk("Writing CPUID extended feature mask ECX:EDX -> %08Xh:%08Xh\n", 54.130 + extfeat_ecx, extfeat_edx); 54.131 + 54.132 + setmask: 54.133 + /* FIXME check if processor supports CPUID masking */ 54.134 + /* AMD processors prior to family 10h required a 32-bit password */ 54.135 + if (c->x86 >= 0x10) { 54.136 + wrmsr(MSR_K8_FEATURE_MASK, feat_edx, feat_ecx); 54.137 + wrmsr(MSR_K8_EXT_FEATURE_MASK, extfeat_edx, extfeat_ecx); 54.138 + } else if (c->x86 == 0x0f) { 54.139 + wrmsr_amd(MSR_K8_FEATURE_MASK, feat_edx, feat_ecx); 54.140 + wrmsr_amd(MSR_K8_EXT_FEATURE_MASK, extfeat_edx, extfeat_ecx); 54.141 + } 54.142 +} 54.143 + 54.144 +/* 54.145 * amd_flush_filter={on,off}. Forcibly Enable or disable the TLB flush 54.146 * filter on AMD 64-bit processors. 54.147 */ 54.148 @@ -115,7 +249,7 @@ static void check_disable_c1e(unsigned i 54.149 on_each_cpu(disable_c1e, NULL, 1, 1); 54.150 } 54.151 54.152 -static void __init init_amd(struct cpuinfo_x86 *c) 54.153 +static void __devinit init_amd(struct cpuinfo_x86 *c) 54.154 { 54.155 u32 l, h; 54.156 int mbytes = num_physpages >> (20-PAGE_SHIFT); 54.157 @@ -368,6 +502,8 @@ static void __init init_amd(struct cpuin 54.158 if ((smp_processor_id() == 1) && c1_ramping_may_cause_clock_drift(c)) 54.159 disable_c1_ramping(); 54.160 54.161 + set_cpuidmask(c); 54.162 + 54.163 start_svm(c); 54.164 } 54.165
55.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 55.2 +++ b/xen/arch/x86/cpu/amd.h Tue Sep 02 16:55:55 2008 -0700 55.3 @@ -0,0 +1,103 @@ 55.4 +/* 55.5 + * amd.h - AMD processor specific definitions 55.6 + */ 55.7 + 55.8 +#ifndef __AMD_H__ 55.9 +#define __AMD_H__ 55.10 + 55.11 +#include <asm/cpufeature.h> 55.12 + 55.13 +/* CPUID masked for use by AMD-V Extended Migration */ 55.14 + 55.15 +#define X86_FEATURE_BITPOS(_feature_) ((_feature_) % 32) 55.16 +#define __bit(_x_) (1U << X86_FEATURE_BITPOS(_x_)) 55.17 + 55.18 +/* Family 0Fh, Revision C */ 55.19 +#define AMD_FEATURES_K8_REV_C_ECX 0 55.20 +#define AMD_FEATURES_K8_REV_C_EDX ( \ 55.21 + __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \ 55.22 + __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \ 55.23 + __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \ 55.24 + __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \ 55.25 + __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \ 55.26 + __bit(X86_FEATURE_SEP) | __bit(X86_FEATURE_MTRR) | \ 55.27 + __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \ 55.28 + __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \ 55.29 + __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_CLFLSH)| \ 55.30 + __bit(X86_FEATURE_MMX) | __bit(X86_FEATURE_FXSR) | \ 55.31 + __bit(X86_FEATURE_XMM) | __bit(X86_FEATURE_XMM2)) 55.32 +#define AMD_EXTFEATURES_K8_REV_C_ECX 0 55.33 +#define AMD_EXTFEATURES_K8_REV_C_EDX ( \ 55.34 + __bit(X86_FEATURE_FPU) | __bit(X86_FEATURE_VME) | \ 55.35 + __bit(X86_FEATURE_DE) | __bit(X86_FEATURE_PSE) | \ 55.36 + __bit(X86_FEATURE_TSC) | __bit(X86_FEATURE_MSR) | \ 55.37 + __bit(X86_FEATURE_PAE) | __bit(X86_FEATURE_MCE) | \ 55.38 + __bit(X86_FEATURE_CX8) | __bit(X86_FEATURE_APIC) | \ 55.39 + __bit(X86_FEATURE_SYSCALL) | __bit(X86_FEATURE_MTRR) | \ 55.40 + __bit(X86_FEATURE_PGE) | __bit(X86_FEATURE_MCA) | \ 55.41 + __bit(X86_FEATURE_CMOV) | __bit(X86_FEATURE_PAT) | \ 55.42 + __bit(X86_FEATURE_PSE36) | __bit(X86_FEATURE_NX) | \ 55.43 + __bit(X86_FEATURE_MMXEXT) | __bit(X86_FEATURE_MMX) | \ 55.44 + __bit(X86_FEATURE_FXSR) | __bit(X86_FEATURE_LM) | \ 55.45 + __bit(X86_FEATURE_3DNOWEXT) | __bit(X86_FEATURE_3DNOW)) 55.46 + 55.47 +/* Family 0Fh, Revision D */ 55.48 +#define AMD_FEATURES_K8_REV_D_ECX AMD_FEATURES_K8_REV_C_ECX 55.49 +#define AMD_FEATURES_K8_REV_D_EDX AMD_FEATURES_K8_REV_C_EDX 55.50 +#define AMD_EXTFEATURES_K8_REV_D_ECX (AMD_EXTFEATURES_K8_REV_C_ECX |\ 55.51 + __bit(X86_FEATURE_LAHF_LM)) 55.52 +#define AMD_EXTFEATURES_K8_REV_D_EDX (AMD_EXTFEATURES_K8_REV_C_EDX |\ 55.53 + __bit(X86_FEATURE_FFXSR)) 55.54 + 55.55 +/* Family 0Fh, Revision E */ 55.56 +#define AMD_FEATURES_K8_REV_E_ECX (AMD_FEATURES_K8_REV_D_ECX | \ 55.57 + __bit(X86_FEATURE_XMM3)) 55.58 +#define AMD_FEATURES_K8_REV_E_EDX (AMD_FEATURES_K8_REV_D_EDX | \ 55.59 + __bit(X86_FEATURE_HT)) 55.60 +#define AMD_EXTFEATURES_K8_REV_E_ECX (AMD_EXTFEATURES_K8_REV_D_ECX |\ 55.61 + __bit(X86_FEATURE_CMP_LEGACY)) 55.62 +#define AMD_EXTFEATURES_K8_REV_E_EDX AMD_EXTFEATURES_K8_REV_D_EDX 55.63 + 55.64 +/* Family 0Fh, Revision F */ 55.65 +#define AMD_FEATURES_K8_REV_F_ECX (AMD_FEATURES_K8_REV_E_ECX | \ 55.66 + __bit(X86_FEATURE_CX16)) 55.67 +#define AMD_FEATURES_K8_REV_F_EDX AMD_FEATURES_K8_REV_E_EDX 55.68 +#define AMD_EXTFEATURES_K8_REV_F_ECX (AMD_EXTFEATURES_K8_REV_E_ECX |\ 55.69 + __bit(X86_FEATURE_SVME) | __bit(X86_FEATURE_EXTAPICSPACE) | \ 55.70 + __bit(X86_FEATURE_ALTMOVCR)) 55.71 +#define AMD_EXTFEATURES_K8_REV_F_EDX (AMD_EXTFEATURES_K8_REV_E_EDX |\ 55.72 + __bit(X86_FEATURE_RDTSCP)) 55.73 + 55.74 +/* Family 0Fh, Revision G */ 55.75 +#define AMD_FEATURES_K8_REV_G_ECX AMD_FEATURES_K8_REV_F_ECX 55.76 +#define AMD_FEATURES_K8_REV_G_EDX AMD_FEATURES_K8_REV_F_EDX 55.77 +#define AMD_EXTFEATURES_K8_REV_G_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\ 55.78 + __bit(X86_FEATURE_3DNOWPF)) 55.79 +#define AMD_EXTFEATURES_K8_REV_G_EDX AMD_EXTFEATURES_K8_REV_F_EDX 55.80 + 55.81 +/* Family 10h, Revision B */ 55.82 +#define AMD_FEATURES_FAM10h_REV_B_ECX (AMD_FEATURES_K8_REV_F_ECX | \ 55.83 + __bit(X86_FEATURE_POPCNT) | __bit(X86_FEATURE_MWAIT)) 55.84 +#define AMD_FEATURES_FAM10h_REV_B_EDX AMD_FEATURES_K8_REV_F_EDX 55.85 +#define AMD_EXTFEATURES_FAM10h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\ 55.86 + __bit(X86_FEATURE_ABM) | __bit(X86_FEATURE_SSE4A) | \ 55.87 + __bit(X86_FEATURE_MISALIGNSSE) | __bit(X86_FEATURE_OSVW) | \ 55.88 + __bit(X86_FEATURE_IBS)) 55.89 +#define AMD_EXTFEATURES_FAM10h_REV_B_EDX (AMD_EXTFEATURES_K8_REV_F_EDX |\ 55.90 + __bit(X86_FEATURE_PAGE1GB)) 55.91 + 55.92 +/* Family 10h, Revision C */ 55.93 +#define AMD_FEATURES_FAM10h_REV_C_ECX AMD_FEATURES_FAM10h_REV_B_ECX 55.94 +#define AMD_FEATURES_FAM10h_REV_C_EDX AMD_FEATURES_FAM10h_REV_B_EDX 55.95 +#define AMD_EXTFEATURES_FAM10h_REV_C_ECX (AMD_EXTFEATURES_FAM10h_REV_B_ECX |\ 55.96 + __bit(X86_FEATURE_SKINIT) | __bit(X86_FEATURE_WDT)) 55.97 +#define AMD_EXTFEATURES_FAM10h_REV_C_EDX AMD_EXTFEATURES_FAM10h_REV_B_EDX 55.98 + 55.99 +/* Family 11h, Revision B */ 55.100 +#define AMD_FEATURES_FAM11h_REV_B_ECX AMD_FEATURES_K8_REV_G_ECX 55.101 +#define AMD_FEATURES_FAM11h_REV_B_EDX AMD_FEATURES_K8_REV_G_EDX 55.102 +#define AMD_EXTFEATURES_FAM11h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_G_ECX |\ 55.103 + __bit(X86_FEATURE_SKINIT)) 55.104 +#define AMD_EXTFEATURES_FAM11h_REV_B_EDX AMD_EXTFEATURES_K8_REV_G_EDX 55.105 + 55.106 +#endif /* __AMD_H__ */
56.1 --- a/xen/arch/x86/domain.c Tue Sep 02 16:34:53 2008 -0700 56.2 +++ b/xen/arch/x86/domain.c Tue Sep 02 16:55:55 2008 -0700 56.3 @@ -1645,23 +1645,26 @@ static int relinquish_memory( 56.4 56.5 /* 56.6 * Forcibly invalidate top-most, still valid page tables at this point 56.7 - * to break circular 'linear page table' references. This is okay 56.8 - * because MMU structures are not shared across domains and this domain 56.9 - * is now dead. Thus top-most valid tables are not in use so a non-zero 56.10 - * count means circular reference. 56.11 + * to break circular 'linear page table' references as well as clean up 56.12 + * partially validated pages. This is okay because MMU structures are 56.13 + * not shared across domains and this domain is now dead. Thus top-most 56.14 + * valid tables are not in use so a non-zero count means circular 56.15 + * reference or partially validated. 56.16 */ 56.17 y = page->u.inuse.type_info; 56.18 for ( ; ; ) 56.19 { 56.20 x = y; 56.21 - if ( likely((x & (PGT_type_mask|PGT_validated)) != 56.22 - (type|PGT_validated)) ) 56.23 + if ( likely((x & PGT_type_mask) != type) || 56.24 + likely(!(x & (PGT_validated|PGT_partial))) ) 56.25 break; 56.26 56.27 - y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated); 56.28 + y = cmpxchg(&page->u.inuse.type_info, x, 56.29 + x & ~(PGT_validated|PGT_partial)); 56.30 if ( likely(y == x) ) 56.31 { 56.32 - free_page_type(page, type); 56.33 + if ( free_page_type(page, x, 0) != 0 ) 56.34 + BUG(); 56.35 break; 56.36 } 56.37 }
57.1 --- a/xen/arch/x86/hvm/emulate.c Tue Sep 02 16:34:53 2008 -0700 57.2 +++ b/xen/arch/x86/hvm/emulate.c Tue Sep 02 16:55:55 2008 -0700 57.3 @@ -571,11 +571,12 @@ static int hvmemul_rep_movs( 57.4 { 57.5 struct hvm_emulate_ctxt *hvmemul_ctxt = 57.6 container_of(ctxt, struct hvm_emulate_ctxt, ctxt); 57.7 - unsigned long saddr, daddr; 57.8 + unsigned long saddr, daddr, bytes; 57.9 paddr_t sgpa, dgpa; 57.10 uint32_t pfec = PFEC_page_present; 57.11 p2m_type_t p2mt; 57.12 - int rc; 57.13 + int rc, df = !!(ctxt->regs->eflags & X86_EFLAGS_DF); 57.14 + char *buf; 57.15 57.16 rc = hvmemul_virtual_to_linear( 57.17 src_seg, src_offset, bytes_per_rep, reps, hvm_access_read, 57.18 @@ -606,15 +607,56 @@ static int hvmemul_rep_movs( 57.19 (void)gfn_to_mfn_current(sgpa >> PAGE_SHIFT, &p2mt); 57.20 if ( !p2m_is_ram(p2mt) ) 57.21 return hvmemul_do_mmio( 57.22 - sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ, 57.23 - !!(ctxt->regs->eflags & X86_EFLAGS_DF), NULL); 57.24 + sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ, df, NULL); 57.25 57.26 (void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt); 57.27 - if ( p2m_is_ram(p2mt) ) 57.28 + if ( !p2m_is_ram(p2mt) ) 57.29 + return hvmemul_do_mmio( 57.30 + dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE, df, NULL); 57.31 + 57.32 + /* RAM-to-RAM copy: emulate as equivalent of memmove(dgpa, sgpa, bytes). */ 57.33 + bytes = *reps * bytes_per_rep; 57.34 + 57.35 + /* Adjust source address for reverse copy. */ 57.36 + if ( df ) 57.37 + sgpa -= bytes - bytes_per_rep; 57.38 + 57.39 + /* 57.40 + * Will first iteration copy fall within source range? If not then entire 57.41 + * copy does not corrupt itself. If so, then this is more complex than 57.42 + * can be emulated by a source-to-buffer-to-destination block copy. 57.43 + */ 57.44 + if ( ((dgpa + bytes_per_rep) > sgpa) && (dgpa < (sgpa + bytes)) ) 57.45 return X86EMUL_UNHANDLEABLE; 57.46 - return hvmemul_do_mmio( 57.47 - dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE, 57.48 - !!(ctxt->regs->eflags & X86_EFLAGS_DF), NULL); 57.49 + 57.50 + /* Adjust destination address for reverse copy. */ 57.51 + if ( df ) 57.52 + dgpa -= bytes - bytes_per_rep; 57.53 + 57.54 + /* Allocate temporary buffer. Fall back to slow emulation if this fails. */ 57.55 + buf = xmalloc_bytes(bytes); 57.56 + if ( buf == NULL ) 57.57 + return X86EMUL_UNHANDLEABLE; 57.58 + 57.59 + /* 57.60 + * We do a modicum of checking here, just for paranoia's sake and to 57.61 + * definitely avoid copying an unitialised buffer into guest address space. 57.62 + */ 57.63 + rc = hvm_copy_from_guest_phys(buf, sgpa, bytes); 57.64 + if ( rc == HVMCOPY_okay ) 57.65 + rc = hvm_copy_to_guest_phys(dgpa, buf, bytes); 57.66 + 57.67 + xfree(buf); 57.68 + 57.69 + if ( rc != HVMCOPY_okay ) 57.70 + { 57.71 + gdprintk(XENLOG_WARNING, "Failed memory-to-memory REP MOVS: sgpa=%" 57.72 + PRIpaddr" dgpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n", 57.73 + sgpa, dgpa, *reps, bytes_per_rep); 57.74 + return X86EMUL_UNHANDLEABLE; 57.75 + } 57.76 + 57.77 + return X86EMUL_OKAY; 57.78 } 57.79 57.80 static int hvmemul_read_segment(
58.1 --- a/xen/arch/x86/irq.c Tue Sep 02 16:34:53 2008 -0700 58.2 +++ b/xen/arch/x86/irq.c Tue Sep 02 16:55:55 2008 -0700 58.3 @@ -63,7 +63,9 @@ asmlinkage void do_IRQ(struct cpu_user_r 58.4 58.5 if ( likely(desc->status & IRQ_GUEST) ) 58.6 { 58.7 + irq_enter(); 58.8 __do_IRQ_guest(vector); 58.9 + irq_exit(); 58.10 spin_unlock(&desc->lock); 58.11 return; 58.12 }
59.1 --- a/xen/arch/x86/microcode.c Tue Sep 02 16:34:53 2008 -0700 59.2 +++ b/xen/arch/x86/microcode.c Tue Sep 02 16:55:55 2008 -0700 59.3 @@ -124,7 +124,7 @@ static DEFINE_SPINLOCK(microcode_update_ 59.4 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ 59.5 static DEFINE_MUTEX(microcode_mutex); 59.6 59.7 -static void __user *user_buffer; /* user area microcode data buffer */ 59.8 +static const void __user *user_buffer; /* user area microcode data buffer */ 59.9 static unsigned int user_buffer_size; /* it's size */ 59.10 59.11 typedef enum mc_error_code { 59.12 @@ -455,7 +455,7 @@ out: 59.13 return error; 59.14 } 59.15 59.16 -int microcode_update(XEN_GUEST_HANDLE(void) buf, unsigned long len) 59.17 +int microcode_update(XEN_GUEST_HANDLE(const_void) buf, unsigned long len) 59.18 { 59.19 int ret; 59.20
60.1 --- a/xen/arch/x86/mm.c Tue Sep 02 16:34:53 2008 -0700 60.2 +++ b/xen/arch/x86/mm.c Tue Sep 02 16:55:55 2008 -0700 60.3 @@ -507,11 +507,11 @@ static int alloc_segdesc_page(struct pag 60.4 goto fail; 60.5 60.6 unmap_domain_page(descs); 60.7 - return 1; 60.8 + return 0; 60.9 60.10 fail: 60.11 unmap_domain_page(descs); 60.12 - return 0; 60.13 + return -EINVAL; 60.14 } 60.15 60.16 60.17 @@ -565,20 +565,23 @@ static int get_page_from_pagenr(unsigned 60.18 60.19 static int get_page_and_type_from_pagenr(unsigned long page_nr, 60.20 unsigned long type, 60.21 - struct domain *d) 60.22 + struct domain *d, 60.23 + int preemptible) 60.24 { 60.25 struct page_info *page = mfn_to_page(page_nr); 60.26 + int rc; 60.27 60.28 if ( unlikely(!get_page_from_pagenr(page_nr, d)) ) 60.29 - return 0; 60.30 - 60.31 - if ( unlikely(!get_page_type(page, type)) ) 60.32 - { 60.33 + return -EINVAL; 60.34 + 60.35 + rc = (preemptible ? 60.36 + get_page_type_preemptible(page, type) : 60.37 + (get_page_type(page, type) ? 0 : -EINVAL)); 60.38 + 60.39 + if ( rc ) 60.40 put_page(page); 60.41 - return 0; 60.42 - } 60.43 - 60.44 - return 1; 60.45 + 60.46 + return rc; 60.47 } 60.48 60.49 /* 60.50 @@ -754,22 +757,23 @@ get_page_from_l2e( 60.51 if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) ) 60.52 { 60.53 MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK); 60.54 - return 0; 60.55 + return -EINVAL; 60.56 } 60.57 60.58 - rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d); 60.59 - if ( unlikely(!rc) ) 60.60 - rc = get_l2_linear_pagetable(l2e, pfn, d); 60.61 + rc = get_page_and_type_from_pagenr( 60.62 + l2e_get_pfn(l2e), PGT_l1_page_table, d, 0); 60.63 + if ( unlikely(rc) && rc != -EAGAIN && 60.64 + get_l2_linear_pagetable(l2e, pfn, d) ) 60.65 + rc = -EINVAL; 60.66 60.67 return rc; 60.68 } 60.69 60.70 60.71 -#if CONFIG_PAGING_LEVELS >= 3 60.72 define_get_linear_pagetable(l3); 60.73 static int 60.74 get_page_from_l3e( 60.75 - l3_pgentry_t l3e, unsigned long pfn, struct domain *d) 60.76 + l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible) 60.77 { 60.78 int rc; 60.79 60.80 @@ -779,22 +783,23 @@ get_page_from_l3e( 60.81 if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) ) 60.82 { 60.83 MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d)); 60.84 - return 0; 60.85 + return -EINVAL; 60.86 } 60.87 60.88 - rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d); 60.89 - if ( unlikely(!rc) ) 60.90 - rc = get_l3_linear_pagetable(l3e, pfn, d); 60.91 + rc = get_page_and_type_from_pagenr( 60.92 + l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible); 60.93 + if ( unlikely(rc) && rc != -EAGAIN && rc != -EINTR && 60.94 + get_l3_linear_pagetable(l3e, pfn, d) ) 60.95 + rc = -EINVAL; 60.96 60.97 return rc; 60.98 } 60.99 -#endif /* 3 level */ 60.100 60.101 #if CONFIG_PAGING_LEVELS >= 4 60.102 define_get_linear_pagetable(l4); 60.103 static int 60.104 get_page_from_l4e( 60.105 - l4_pgentry_t l4e, unsigned long pfn, struct domain *d) 60.106 + l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible) 60.107 { 60.108 int rc; 60.109 60.110 @@ -804,12 +809,14 @@ get_page_from_l4e( 60.111 if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) ) 60.112 { 60.113 MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK); 60.114 - return 0; 60.115 + return -EINVAL; 60.116 } 60.117 60.118 - rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d); 60.119 - if ( unlikely(!rc) ) 60.120 - rc = get_l4_linear_pagetable(l4e, pfn, d); 60.121 + rc = get_page_and_type_from_pagenr( 60.122 + l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible); 60.123 + if ( unlikely(rc) && rc != -EAGAIN && rc != -EINTR && 60.124 + get_l4_linear_pagetable(l4e, pfn, d) ) 60.125 + rc = -EINVAL; 60.126 60.127 return rc; 60.128 } 60.129 @@ -946,29 +953,35 @@ void put_page_from_l1e(l1_pgentry_t l1e, 60.130 * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. 60.131 * Note also that this automatically deals correctly with linear p.t.'s. 60.132 */ 60.133 -static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) 60.134 +static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) 60.135 { 60.136 if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && 60.137 (l2e_get_pfn(l2e) != pfn) ) 60.138 + { 60.139 put_page_and_type(l2e_get_page(l2e)); 60.140 + return 0; 60.141 + } 60.142 + return 1; 60.143 } 60.144 60.145 60.146 -#if CONFIG_PAGING_LEVELS >= 3 60.147 -static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn) 60.148 +static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, 60.149 + int preemptible) 60.150 { 60.151 if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && 60.152 (l3e_get_pfn(l3e) != pfn) ) 60.153 - put_page_and_type(l3e_get_page(l3e)); 60.154 + return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); 60.155 + return 1; 60.156 } 60.157 -#endif 60.158 60.159 #if CONFIG_PAGING_LEVELS >= 4 60.160 -static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn) 60.161 +static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, 60.162 + int preemptible) 60.163 { 60.164 if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && 60.165 (l4e_get_pfn(l4e) != pfn) ) 60.166 - put_page_and_type(l4e_get_page(l4e)); 60.167 + return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible); 60.168 + return 1; 60.169 } 60.170 #endif 60.171 60.172 @@ -977,7 +990,7 @@ static int alloc_l1_table(struct page_in 60.173 struct domain *d = page_get_owner(page); 60.174 unsigned long pfn = page_to_mfn(page); 60.175 l1_pgentry_t *pl1e; 60.176 - int i; 60.177 + unsigned int i; 60.178 60.179 pl1e = map_domain_page(pfn); 60.180 60.181 @@ -991,7 +1004,7 @@ static int alloc_l1_table(struct page_in 60.182 } 60.183 60.184 unmap_domain_page(pl1e); 60.185 - return 1; 60.186 + return 0; 60.187 60.188 fail: 60.189 MEM_LOG("Failure in alloc_l1_table: entry %d", i); 60.190 @@ -1000,7 +1013,7 @@ static int alloc_l1_table(struct page_in 60.191 put_page_from_l1e(pl1e[i], d); 60.192 60.193 unmap_domain_page(pl1e); 60.194 - return 0; 60.195 + return -EINVAL; 60.196 } 60.197 60.198 static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e) 60.199 @@ -1128,47 +1141,53 @@ static void pae_flush_pgd( 60.200 # define pae_flush_pgd(mfn, idx, nl3e) ((void)0) 60.201 #endif 60.202 60.203 -static int alloc_l2_table(struct page_info *page, unsigned long type) 60.204 +static int alloc_l2_table(struct page_info *page, unsigned long type, 60.205 + int preemptible) 60.206 { 60.207 struct domain *d = page_get_owner(page); 60.208 unsigned long pfn = page_to_mfn(page); 60.209 l2_pgentry_t *pl2e; 60.210 - int i; 60.211 + unsigned int i; 60.212 + int rc = 0; 60.213 60.214 pl2e = map_domain_page(pfn); 60.215 60.216 - for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) 60.217 + for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ ) 60.218 { 60.219 - if ( !is_guest_l2_slot(d, type, i) ) 60.220 + if ( preemptible && i && hypercall_preempt_check() ) 60.221 + { 60.222 + page->nr_validated_ptes = i; 60.223 + rc = -EAGAIN; 60.224 + break; 60.225 + } 60.226 + 60.227 + if ( !is_guest_l2_slot(d, type, i) || 60.228 + (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 ) 60.229 continue; 60.230 60.231 - if ( unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) ) 60.232 - goto fail; 60.233 - 60.234 + if ( rc < 0 ) 60.235 + { 60.236 + MEM_LOG("Failure in alloc_l2_table: entry %d", i); 60.237 + while ( i-- > 0 ) 60.238 + if ( is_guest_l2_slot(d, type, i) ) 60.239 + put_page_from_l2e(pl2e[i], pfn); 60.240 + break; 60.241 + } 60.242 + 60.243 adjust_guest_l2e(pl2e[i], d); 60.244 } 60.245 60.246 unmap_domain_page(pl2e); 60.247 - return 1; 60.248 - 60.249 - fail: 60.250 - MEM_LOG("Failure in alloc_l2_table: entry %d", i); 60.251 - while ( i-- > 0 ) 60.252 - if ( is_guest_l2_slot(d, type, i) ) 60.253 - put_page_from_l2e(pl2e[i], pfn); 60.254 - 60.255 - unmap_domain_page(pl2e); 60.256 - return 0; 60.257 + return rc > 0 ? 0 : rc; 60.258 } 60.259 60.260 - 60.261 -#if CONFIG_PAGING_LEVELS >= 3 60.262 -static int alloc_l3_table(struct page_info *page) 60.263 +static int alloc_l3_table(struct page_info *page, int preemptible) 60.264 { 60.265 struct domain *d = page_get_owner(page); 60.266 unsigned long pfn = page_to_mfn(page); 60.267 l3_pgentry_t *pl3e; 60.268 - int i; 60.269 + unsigned int i; 60.270 + int rc = 0; 60.271 60.272 #if CONFIG_PAGING_LEVELS == 3 60.273 /* 60.274 @@ -1181,7 +1200,7 @@ static int alloc_l3_table(struct page_in 60.275 d->vcpu[0] && d->vcpu[0]->is_initialised ) 60.276 { 60.277 MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn); 60.278 - return 0; 60.279 + return -EINVAL; 60.280 } 60.281 #endif 60.282 60.283 @@ -1197,64 +1216,96 @@ static int alloc_l3_table(struct page_in 60.284 if ( is_pv_32on64_domain(d) ) 60.285 memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e)); 60.286 60.287 - for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) 60.288 + for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ ) 60.289 { 60.290 if ( is_pv_32bit_domain(d) && (i == 3) ) 60.291 { 60.292 if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) || 60.293 - (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) || 60.294 - !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]), 60.295 - PGT_l2_page_table | 60.296 - PGT_pae_xen_l2, 60.297 - d) ) 60.298 - goto fail; 60.299 + (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ) 60.300 + rc = -EINVAL; 60.301 + else 60.302 + rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]), 60.303 + PGT_l2_page_table | 60.304 + PGT_pae_xen_l2, 60.305 + d, preemptible); 60.306 } 60.307 - else if ( !is_guest_l3_slot(i) ) 60.308 + else if ( !is_guest_l3_slot(i) || 60.309 + (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 ) 60.310 continue; 60.311 - else if ( unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) ) 60.312 - goto fail; 60.313 + 60.314 + if ( rc == -EAGAIN ) 60.315 + { 60.316 + page->nr_validated_ptes = i; 60.317 + page->partial_pte = 1; 60.318 + } 60.319 + else if ( rc == -EINTR && i ) 60.320 + { 60.321 + page->nr_validated_ptes = i; 60.322 + page->partial_pte = 0; 60.323 + rc = -EAGAIN; 60.324 + } 60.325 + if ( rc < 0 ) 60.326 + break; 60.327 60.328 adjust_guest_l3e(pl3e[i], d); 60.329 } 60.330 60.331 - if ( !create_pae_xen_mappings(d, pl3e) ) 60.332 - goto fail; 60.333 - 60.334 - unmap_domain_page(pl3e); 60.335 - return 1; 60.336 - 60.337 - fail: 60.338 - MEM_LOG("Failure in alloc_l3_table: entry %d", i); 60.339 - while ( i-- > 0 ) 60.340 + if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) ) 60.341 + rc = -EINVAL; 60.342 + if ( rc < 0 && rc != -EAGAIN && rc != -EINTR ) 60.343 { 60.344 - if ( !is_guest_l3_slot(i) ) 60.345 - continue; 60.346 - unadjust_guest_l3e(pl3e[i], d); 60.347 - put_page_from_l3e(pl3e[i], pfn); 60.348 + MEM_LOG("Failure in alloc_l3_table: entry %d", i); 60.349 + while ( i-- > 0 ) 60.350 + { 60.351 + if ( !is_guest_l3_slot(i) ) 60.352 + continue; 60.353 + unadjust_guest_l3e(pl3e[i], d); 60.354 + put_page_from_l3e(pl3e[i], pfn, 0); 60.355 + } 60.356 } 60.357 60.358 unmap_domain_page(pl3e); 60.359 - return 0; 60.360 + return rc > 0 ? 0 : rc; 60.361 } 60.362 -#else 60.363 -#define alloc_l3_table(page) (0) 60.364 -#endif 60.365 60.366 #if CONFIG_PAGING_LEVELS >= 4 60.367 -static int alloc_l4_table(struct page_info *page) 60.368 +static int alloc_l4_table(struct page_info *page, int preemptible) 60.369 { 60.370 struct domain *d = page_get_owner(page); 60.371 unsigned long pfn = page_to_mfn(page); 60.372 l4_pgentry_t *pl4e = page_to_virt(page); 60.373 - int i; 60.374 - 60.375 - for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) 60.376 + unsigned int i; 60.377 + int rc = 0; 60.378 + 60.379 + for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ ) 60.380 { 60.381 - if ( !is_guest_l4_slot(d, i) ) 60.382 + if ( !is_guest_l4_slot(d, i) || 60.383 + (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 ) 60.384 continue; 60.385 60.386 - if ( unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) ) 60.387 - goto fail; 60.388 + if ( rc == -EAGAIN ) 60.389 + { 60.390 + page->nr_validated_ptes = i; 60.391 + page->partial_pte = 1; 60.392 + } 60.393 + else if ( rc == -EINTR ) 60.394 + { 60.395 + if ( i ) 60.396 + { 60.397 + page->nr_validated_ptes = i; 60.398 + page->partial_pte = 0; 60.399 + rc = -EAGAIN; 60.400 + } 60.401 + } 60.402 + else if ( rc < 0 ) 60.403 + { 60.404 + MEM_LOG("Failure in alloc_l4_table: entry %d", i); 60.405 + while ( i-- > 0 ) 60.406 + if ( is_guest_l4_slot(d, i) ) 60.407 + put_page_from_l4e(pl4e[i], pfn, 0); 60.408 + } 60.409 + if ( rc < 0 ) 60.410 + return rc; 60.411 60.412 adjust_guest_l4e(pl4e[i], d); 60.413 } 60.414 @@ -1269,18 +1320,10 @@ static int alloc_l4_table(struct page_in 60.415 l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3), 60.416 __PAGE_HYPERVISOR); 60.417 60.418 - return 1; 60.419 - 60.420 - fail: 60.421 - MEM_LOG("Failure in alloc_l4_table: entry %d", i); 60.422 - while ( i-- > 0 ) 60.423 - if ( is_guest_l4_slot(d, i) ) 60.424 - put_page_from_l4e(pl4e[i], pfn); 60.425 - 60.426 - return 0; 60.427 + return rc > 0 ? 0 : rc; 60.428 } 60.429 #else 60.430 -#define alloc_l4_table(page) (0) 60.431 +#define alloc_l4_table(page, preemptible) (-EINVAL) 60.432 #endif 60.433 60.434 60.435 @@ -1289,7 +1332,7 @@ static void free_l1_table(struct page_in 60.436 struct domain *d = page_get_owner(page); 60.437 unsigned long pfn = page_to_mfn(page); 60.438 l1_pgentry_t *pl1e; 60.439 - int i; 60.440 + unsigned int i; 60.441 60.442 pl1e = map_domain_page(pfn); 60.443 60.444 @@ -1301,74 +1344,114 @@ static void free_l1_table(struct page_in 60.445 } 60.446 60.447 60.448 -static void free_l2_table(struct page_info *page) 60.449 +static int free_l2_table(struct page_info *page, int preemptible) 60.450 { 60.451 #ifdef CONFIG_COMPAT 60.452 struct domain *d = page_get_owner(page); 60.453 #endif 60.454 unsigned long pfn = page_to_mfn(page); 60.455 l2_pgentry_t *pl2e; 60.456 - int i; 60.457 + unsigned int i = page->nr_validated_ptes - 1; 60.458 + int err = 0; 60.459 60.460 pl2e = map_domain_page(pfn); 60.461 60.462 - for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) 60.463 - if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) ) 60.464 - put_page_from_l2e(pl2e[i], pfn); 60.465 + ASSERT(page->nr_validated_ptes); 60.466 + do { 60.467 + if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) && 60.468 + put_page_from_l2e(pl2e[i], pfn) == 0 && 60.469 + preemptible && i && hypercall_preempt_check() ) 60.470 + { 60.471 + page->nr_validated_ptes = i; 60.472 + err = -EAGAIN; 60.473 + } 60.474 + } while ( !err && i-- ); 60.475 60.476 unmap_domain_page(pl2e); 60.477 60.478 - page->u.inuse.type_info &= ~PGT_pae_xen_l2; 60.479 + if ( !err ) 60.480 + page->u.inuse.type_info &= ~PGT_pae_xen_l2; 60.481 + 60.482 + return err; 60.483 } 60.484 60.485 - 60.486 -#if CONFIG_PAGING_LEVELS >= 3 60.487 - 60.488 -static void free_l3_table(struct page_info *page) 60.489 +static int free_l3_table(struct page_info *page, int preemptible) 60.490 { 60.491 struct domain *d = page_get_owner(page); 60.492 unsigned long pfn = page_to_mfn(page); 60.493 l3_pgentry_t *pl3e; 60.494 - int i; 60.495 + unsigned int i = page->nr_validated_ptes - !page->partial_pte; 60.496 + int rc = 0; 60.497 60.498 #ifdef DOMAIN_DESTRUCT_AVOID_RECURSION 60.499 if ( d->arch.relmem == RELMEM_l3 ) 60.500 - return; 60.501 + return 0; 60.502 #endif 60.503 60.504 pl3e = map_domain_page(pfn); 60.505 60.506 - for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) 60.507 + do { 60.508 if ( is_guest_l3_slot(i) ) 60.509 { 60.510 - put_page_from_l3e(pl3e[i], pfn); 60.511 + rc = put_page_from_l3e(pl3e[i], pfn, preemptible); 60.512 + if ( rc > 0 ) 60.513 + continue; 60.514 + if ( rc ) 60.515 + break; 60.516 unadjust_guest_l3e(pl3e[i], d); 60.517 } 60.518 + } while ( i-- ); 60.519 60.520 unmap_domain_page(pl3e); 60.521 + 60.522 + if ( rc == -EAGAIN ) 60.523 + { 60.524 + page->nr_validated_ptes = i; 60.525 + page->partial_pte = 1; 60.526 + } 60.527 + else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 ) 60.528 + { 60.529 + page->nr_validated_ptes = i + 1; 60.530 + page->partial_pte = 0; 60.531 + rc = -EAGAIN; 60.532 + } 60.533 + return rc > 0 ? 0 : rc; 60.534 } 60.535 60.536 -#endif 60.537 - 60.538 #if CONFIG_PAGING_LEVELS >= 4 60.539 - 60.540 -static void free_l4_table(struct page_info *page) 60.541 +static int free_l4_table(struct page_info *page, int preemptible) 60.542 { 60.543 struct domain *d = page_get_owner(page); 60.544 unsigned long pfn = page_to_mfn(page); 60.545 l4_pgentry_t *pl4e = page_to_virt(page); 60.546 - int i; 60.547 + unsigned int i = page->nr_validated_ptes - !page->partial_pte; 60.548 + int rc = 0; 60.549 60.550 #ifdef DOMAIN_DESTRUCT_AVOID_RECURSION 60.551 if ( d->arch.relmem == RELMEM_l4 ) 60.552 - return; 60.553 + return 0; 60.554 #endif 60.555 60.556 - for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) 60.557 + do { 60.558 if ( is_guest_l4_slot(d, i) ) 60.559 - put_page_from_l4e(pl4e[i], pfn); 60.560 + rc = put_page_from_l4e(pl4e[i], pfn, preemptible); 60.561 + } while ( rc >= 0 && i-- ); 60.562 + 60.563 + if ( rc == -EAGAIN ) 60.564 + { 60.565 + page->nr_validated_ptes = i; 60.566 + page->partial_pte = 1; 60.567 + } 60.568 + else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 ) 60.569 + { 60.570 + page->nr_validated_ptes = i + 1; 60.571 + page->partial_pte = 0; 60.572 + rc = -EAGAIN; 60.573 + } 60.574 + return rc > 0 ? 0 : rc; 60.575 } 60.576 - 60.577 +#else 60.578 +#define free_l4_table(page, preemptible) (-EINVAL) 60.579 #endif 60.580 60.581 static void page_lock(struct page_info *page) 60.582 @@ -1560,7 +1643,7 @@ static int mod_l2_entry(l2_pgentry_t *pl 60.583 return rc; 60.584 } 60.585 60.586 - if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) ) 60.587 + if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) ) 60.588 return page_unlock(l2pg), 0; 60.589 60.590 adjust_guest_l2e(nl2e, d); 60.591 @@ -1583,24 +1666,23 @@ static int mod_l2_entry(l2_pgentry_t *pl 60.592 return rc; 60.593 } 60.594 60.595 -#if CONFIG_PAGING_LEVELS >= 3 60.596 - 60.597 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */ 60.598 static int mod_l3_entry(l3_pgentry_t *pl3e, 60.599 l3_pgentry_t nl3e, 60.600 unsigned long pfn, 60.601 - int preserve_ad) 60.602 + int preserve_ad, 60.603 + int preemptible) 60.604 { 60.605 l3_pgentry_t ol3e; 60.606 struct vcpu *curr = current; 60.607 struct domain *d = curr->domain; 60.608 struct page_info *l3pg = mfn_to_page(pfn); 60.609 - int rc = 1; 60.610 + int rc = 0; 60.611 60.612 if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) 60.613 { 60.614 MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e); 60.615 - return 0; 60.616 + return -EINVAL; 60.617 } 60.618 60.619 /* 60.620 @@ -1608,12 +1690,12 @@ static int mod_l3_entry(l3_pgentry_t *pl 60.621 * would be a pain to ensure they remain continuously valid throughout. 60.622 */ 60.623 if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) ) 60.624 - return 0; 60.625 + return -EINVAL; 60.626 60.627 page_lock(l3pg); 60.628 60.629 if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) ) 60.630 - return page_unlock(l3pg), 0; 60.631 + return page_unlock(l3pg), -EFAULT; 60.632 60.633 if ( l3e_get_flags(nl3e) & _PAGE_PRESENT ) 60.634 { 60.635 @@ -1622,7 +1704,7 @@ static int mod_l3_entry(l3_pgentry_t *pl 60.636 page_unlock(l3pg); 60.637 MEM_LOG("Bad L3 flags %x", 60.638 l3e_get_flags(nl3e) & l3_disallow_mask(d)); 60.639 - return 0; 60.640 + return -EINVAL; 60.641 } 60.642 60.643 /* Fast path for identical mapping and presence. */ 60.644 @@ -1631,28 +1713,30 @@ static int mod_l3_entry(l3_pgentry_t *pl 60.645 adjust_guest_l3e(nl3e, d); 60.646 rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad); 60.647 page_unlock(l3pg); 60.648 - return rc; 60.649 + return rc ? 0 : -EFAULT; 60.650 } 60.651 60.652 - if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) ) 60.653 - return page_unlock(l3pg), 0; 60.654 + rc = get_page_from_l3e(nl3e, pfn, d, preemptible); 60.655 + if ( unlikely(rc < 0) ) 60.656 + return page_unlock(l3pg), rc; 60.657 + rc = 0; 60.658 60.659 adjust_guest_l3e(nl3e, d); 60.660 if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, 60.661 preserve_ad)) ) 60.662 { 60.663 ol3e = nl3e; 60.664 - rc = 0; 60.665 + rc = -EFAULT; 60.666 } 60.667 } 60.668 else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, 60.669 preserve_ad)) ) 60.670 { 60.671 page_unlock(l3pg); 60.672 - return 0; 60.673 + return -EFAULT; 60.674 } 60.675 60.676 - if ( likely(rc) ) 60.677 + if ( likely(rc == 0) ) 60.678 { 60.679 if ( !create_pae_xen_mappings(d, pl3e) ) 60.680 BUG(); 60.681 @@ -1661,36 +1745,35 @@ static int mod_l3_entry(l3_pgentry_t *pl 60.682 } 60.683 60.684 page_unlock(l3pg); 60.685 - put_page_from_l3e(ol3e, pfn); 60.686 + put_page_from_l3e(ol3e, pfn, 0); 60.687 return rc; 60.688 } 60.689 60.690 -#endif 60.691 - 60.692 #if CONFIG_PAGING_LEVELS >= 4 60.693 60.694 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */ 60.695 static int mod_l4_entry(l4_pgentry_t *pl4e, 60.696 l4_pgentry_t nl4e, 60.697 unsigned long pfn, 60.698 - int preserve_ad) 60.699 + int preserve_ad, 60.700 + int preemptible) 60.701 { 60.702 struct vcpu *curr = current; 60.703 struct domain *d = curr->domain; 60.704 l4_pgentry_t ol4e; 60.705 struct page_info *l4pg = mfn_to_page(pfn); 60.706 - int rc = 1; 60.707 + int rc = 0; 60.708 60.709 if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) ) 60.710 { 60.711 MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e); 60.712 - return 0; 60.713 + return -EINVAL; 60.714 } 60.715 60.716 page_lock(l4pg); 60.717 60.718 if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) ) 60.719 - return page_unlock(l4pg), 0; 60.720 + return page_unlock(l4pg), -EFAULT; 60.721 60.722 if ( l4e_get_flags(nl4e) & _PAGE_PRESENT ) 60.723 { 60.724 @@ -1699,7 +1782,7 @@ static int mod_l4_entry(l4_pgentry_t *pl 60.725 page_unlock(l4pg); 60.726 MEM_LOG("Bad L4 flags %x", 60.727 l4e_get_flags(nl4e) & L4_DISALLOW_MASK); 60.728 - return 0; 60.729 + return -EINVAL; 60.730 } 60.731 60.732 /* Fast path for identical mapping and presence. */ 60.733 @@ -1708,29 +1791,31 @@ static int mod_l4_entry(l4_pgentry_t *pl 60.734 adjust_guest_l4e(nl4e, d); 60.735 rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad); 60.736 page_unlock(l4pg); 60.737 - return rc; 60.738 + return rc ? 0 : -EFAULT; 60.739 } 60.740 60.741 - if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) ) 60.742 - return page_unlock(l4pg), 0; 60.743 + rc = get_page_from_l4e(nl4e, pfn, d, preemptible); 60.744 + if ( unlikely(rc < 0) ) 60.745 + return page_unlock(l4pg), rc; 60.746 + rc = 0; 60.747 60.748 adjust_guest_l4e(nl4e, d); 60.749 if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, 60.750 preserve_ad)) ) 60.751 { 60.752 ol4e = nl4e; 60.753 - rc = 0; 60.754 + rc = -EFAULT; 60.755 } 60.756 } 60.757 else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, 60.758 preserve_ad)) ) 60.759 { 60.760 page_unlock(l4pg); 60.761 - return 0; 60.762 + return -EFAULT; 60.763 } 60.764 60.765 page_unlock(l4pg); 60.766 - put_page_from_l4e(ol4e, pfn); 60.767 + put_page_from_l4e(ol4e, pfn, 0); 60.768 return rc; 60.769 } 60.770 60.771 @@ -1788,9 +1873,11 @@ int get_page(struct page_info *page, str 60.772 } 60.773 60.774 60.775 -static int alloc_page_type(struct page_info *page, unsigned long type) 60.776 +static int alloc_page_type(struct page_info *page, unsigned long type, 60.777 + int preemptible) 60.778 { 60.779 struct domain *owner = page_get_owner(page); 60.780 + int rc; 60.781 60.782 /* A page table is dirtied when its type count becomes non-zero. */ 60.783 if ( likely(owner != NULL) ) 60.784 @@ -1799,30 +1886,65 @@ static int alloc_page_type(struct page_i 60.785 switch ( type & PGT_type_mask ) 60.786 { 60.787 case PGT_l1_page_table: 60.788 - return alloc_l1_table(page); 60.789 + alloc_l1_table(page); 60.790 + rc = 0; 60.791 + break; 60.792 case PGT_l2_page_table: 60.793 - return alloc_l2_table(page, type); 60.794 + rc = alloc_l2_table(page, type, preemptible); 60.795 + break; 60.796 case PGT_l3_page_table: 60.797 - return alloc_l3_table(page); 60.798 + rc = alloc_l3_table(page, preemptible); 60.799 + break; 60.800 case PGT_l4_page_table: 60.801 - return alloc_l4_table(page); 60.802 + rc = alloc_l4_table(page, preemptible); 60.803 + break; 60.804 case PGT_seg_desc_page: 60.805 - return alloc_segdesc_page(page); 60.806 + rc = alloc_segdesc_page(page); 60.807 + break; 60.808 default: 60.809 printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n", 60.810 type, page->u.inuse.type_info, 60.811 page->count_info); 60.812 + rc = -EINVAL; 60.813 BUG(); 60.814 } 60.815 60.816 - return 0; 60.817 + /* No need for atomic update of type_info here: noone else updates it. */ 60.818 + wmb(); 60.819 + if ( rc == -EAGAIN ) 60.820 + { 60.821 + page->u.inuse.type_info |= PGT_partial; 60.822 + } 60.823 + else if ( rc == -EINTR ) 60.824 + { 60.825 + ASSERT((page->u.inuse.type_info & 60.826 + (PGT_count_mask|PGT_validated|PGT_partial)) == 1); 60.827 + page->u.inuse.type_info &= ~PGT_count_mask; 60.828 + } 60.829 + else if ( rc ) 60.830 + { 60.831 + ASSERT(rc < 0); 60.832 + MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %" 60.833 + PRtype_info ": caf=%08x taf=%" PRtype_info, 60.834 + page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), 60.835 + type, page->count_info, page->u.inuse.type_info); 60.836 + page->u.inuse.type_info = 0; 60.837 + } 60.838 + else 60.839 + { 60.840 + page->u.inuse.type_info |= PGT_validated; 60.841 + } 60.842 + 60.843 + return rc; 60.844 } 60.845 60.846 60.847 -void free_page_type(struct page_info *page, unsigned long type) 60.848 +int free_page_type(struct page_info *page, unsigned long type, 60.849 + int preemptible) 60.850 { 60.851 struct domain *owner = page_get_owner(page); 60.852 unsigned long gmfn; 60.853 + int rc; 60.854 60.855 if ( likely(owner != NULL) ) 60.856 { 60.857 @@ -1842,7 +1964,7 @@ void free_page_type(struct page_info *pa 60.858 paging_mark_dirty(owner, page_to_mfn(page)); 60.859 60.860 if ( shadow_mode_refcounts(owner) ) 60.861 - return; 60.862 + return 0; 60.863 60.864 gmfn = mfn_to_gmfn(owner, page_to_mfn(page)); 60.865 ASSERT(VALID_M2P(gmfn)); 60.866 @@ -1850,42 +1972,80 @@ void free_page_type(struct page_info *pa 60.867 } 60.868 } 60.869 60.870 + if ( !(type & PGT_partial) ) 60.871 + { 60.872 + page->nr_validated_ptes = 1U << PAGETABLE_ORDER; 60.873 + page->partial_pte = 0; 60.874 + } 60.875 switch ( type & PGT_type_mask ) 60.876 { 60.877 case PGT_l1_page_table: 60.878 free_l1_table(page); 60.879 + rc = 0; 60.880 break; 60.881 - 60.882 case PGT_l2_page_table: 60.883 - free_l2_table(page); 60.884 + rc = free_l2_table(page, preemptible); 60.885 break; 60.886 - 60.887 -#if CONFIG_PAGING_LEVELS >= 3 60.888 case PGT_l3_page_table: 60.889 - free_l3_table(page); 60.890 - break; 60.891 +#if CONFIG_PAGING_LEVELS == 3 60.892 + if ( !(type & PGT_partial) ) 60.893 + page->nr_validated_ptes = L3_PAGETABLE_ENTRIES; 60.894 #endif 60.895 - 60.896 -#if CONFIG_PAGING_LEVELS >= 4 60.897 - case PGT_l4_page_table: 60.898 - free_l4_table(page); 60.899 + rc = free_l3_table(page, preemptible); 60.900 break; 60.901 -#endif 60.902 - 60.903 + case PGT_l4_page_table: 60.904 + rc = free_l4_table(page, preemptible); 60.905 + break; 60.906 default: 60.907 - printk("%s: type %lx pfn %lx\n",__FUNCTION__, 60.908 - type, page_to_mfn(page)); 60.909 + MEM_LOG("type %lx pfn %lx\n", type, page_to_mfn(page)); 60.910 + rc = -EINVAL; 60.911 BUG(); 60.912 } 60.913 + 60.914 + /* No need for atomic update of type_info here: noone else updates it. */ 60.915 + if ( rc == 0 ) 60.916 + { 60.917 + /* 60.918 + * Record TLB information for flush later. We do not stamp page tables 60.919 + * when running in shadow mode: 60.920 + * 1. Pointless, since it's the shadow pt's which must be tracked. 60.921 + * 2. Shadow mode reuses this field for shadowed page tables to 60.922 + * store flags info -- we don't want to conflict with that. 60.923 + */ 60.924 + if ( !(shadow_mode_enabled(page_get_owner(page)) && 60.925 + (page->count_info & PGC_page_table)) ) 60.926 + page->tlbflush_timestamp = tlbflush_current_time(); 60.927 + wmb(); 60.928 + page->u.inuse.type_info--; 60.929 + } 60.930 + else if ( rc == -EINTR ) 60.931 + { 60.932 + ASSERT(!(page->u.inuse.type_info & 60.933 + (PGT_count_mask|PGT_validated|PGT_partial))); 60.934 + if ( !(shadow_mode_enabled(page_get_owner(page)) && 60.935 + (page->count_info & PGC_page_table)) ) 60.936 + page->tlbflush_timestamp = tlbflush_current_time(); 60.937 + wmb(); 60.938 + page->u.inuse.type_info |= PGT_validated; 60.939 + } 60.940 + else 60.941 + { 60.942 + BUG_ON(rc != -EAGAIN); 60.943 + wmb(); 60.944 + page->u.inuse.type_info |= PGT_partial; 60.945 + } 60.946 + 60.947 + return rc; 60.948 } 60.949 60.950 60.951 -void put_page_type(struct page_info *page) 60.952 +static int __put_page_type(struct page_info *page, 60.953 + int preemptible) 60.954 { 60.955 unsigned long nx, x, y = page->u.inuse.type_info; 60.956 60.957 - again: 60.958 - do { 60.959 + for ( ; ; ) 60.960 + { 60.961 x = y; 60.962 nx = x - 1; 60.963 60.964 @@ -1894,21 +2054,19 @@ void put_page_type(struct page_info *pag 60.965 if ( unlikely((nx & PGT_count_mask) == 0) ) 60.966 { 60.967 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && 60.968 - likely(nx & PGT_validated) ) 60.969 + likely(nx & (PGT_validated|PGT_partial)) ) 60.970 { 60.971 /* 60.972 * Page-table pages must be unvalidated when count is zero. The 60.973 * 'free' is safe because the refcnt is non-zero and validated 60.974 * bit is clear => other ops will spin or fail. 60.975 */ 60.976 - if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, 60.977 - x & ~PGT_validated)) != x) ) 60.978 - goto again; 60.979 + nx = x & ~(PGT_validated|PGT_partial); 60.980 + if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, 60.981 + x, nx)) != x) ) 60.982 + continue; 60.983 /* We cleared the 'valid bit' so we do the clean up. */ 60.984 - free_page_type(page, x); 60.985 - /* Carry on, but with the 'valid bit' now clear. */ 60.986 - x &= ~PGT_validated; 60.987 - nx &= ~PGT_validated; 60.988 + return free_page_type(page, x, preemptible); 60.989 } 60.990 60.991 /* 60.992 @@ -1922,25 +2080,33 @@ void put_page_type(struct page_info *pag 60.993 (page->count_info & PGC_page_table)) ) 60.994 page->tlbflush_timestamp = tlbflush_current_time(); 60.995 } 60.996 + 60.997 + if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) 60.998 + break; 60.999 + 60.1000 + if ( preemptible && hypercall_preempt_check() ) 60.1001 + return -EINTR; 60.1002 } 60.1003 - while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); 60.1004 + 60.1005 + return 0; 60.1006 } 60.1007 60.1008 60.1009 -int get_page_type(struct page_info *page, unsigned long type) 60.1010 +static int __get_page_type(struct page_info *page, unsigned long type, 60.1011 + int preemptible) 60.1012 { 60.1013 unsigned long nx, x, y = page->u.inuse.type_info; 60.1014 60.1015 ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2))); 60.1016 60.1017 - again: 60.1018 - do { 60.1019 + for ( ; ; ) 60.1020 + { 60.1021 x = y; 60.1022 nx = x + 1; 60.1023 if ( unlikely((nx & PGT_count_mask) == 0) ) 60.1024 { 60.1025 MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page)); 60.1026 - return 0; 60.1027 + return -EINVAL; 60.1028 } 60.1029 else if ( unlikely((x & PGT_count_mask) == 0) ) 60.1030 { 60.1031 @@ -1993,28 +2159,43 @@ int get_page_type(struct page_info *page 60.1032 /* Don't log failure if it could be a recursive-mapping attempt. */ 60.1033 if ( ((x & PGT_type_mask) == PGT_l2_page_table) && 60.1034 (type == PGT_l1_page_table) ) 60.1035 - return 0; 60.1036 + return -EINVAL; 60.1037 if ( ((x & PGT_type_mask) == PGT_l3_page_table) && 60.1038 (type == PGT_l2_page_table) ) 60.1039 - return 0; 60.1040 + return -EINVAL; 60.1041 if ( ((x & PGT_type_mask) == PGT_l4_page_table) && 60.1042 (type == PGT_l3_page_table) ) 60.1043 - return 0; 60.1044 + return -EINVAL; 60.1045 MEM_LOG("Bad type (saw %" PRtype_info " != exp %" PRtype_info ") " 60.1046 "for mfn %lx (pfn %lx)", 60.1047 x, type, page_to_mfn(page), 60.1048 get_gpfn_from_mfn(page_to_mfn(page))); 60.1049 - return 0; 60.1050 + return -EINVAL; 60.1051 } 60.1052 else if ( unlikely(!(x & PGT_validated)) ) 60.1053 { 60.1054 - /* Someone else is updating validation of this page. Wait... */ 60.1055 - while ( (y = page->u.inuse.type_info) == x ) 60.1056 - cpu_relax(); 60.1057 - goto again; 60.1058 + if ( !(x & PGT_partial) ) 60.1059 + { 60.1060 + /* Someone else is updating validation of this page. Wait... */ 60.1061 + while ( (y = page->u.inuse.type_info) == x ) 60.1062 + { 60.1063 + if ( preemptible && hypercall_preempt_check() ) 60.1064 + return -EINTR; 60.1065 + cpu_relax(); 60.1066 + } 60.1067 + continue; 60.1068 + } 60.1069 + /* Type ref count was left at 1 when PGT_partial got set. */ 60.1070 + ASSERT((x & PGT_count_mask) == 1); 60.1071 + nx = x & ~PGT_partial; 60.1072 } 60.1073 + 60.1074 + if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) 60.1075 + break; 60.1076 + 60.1077 + if ( preemptible && hypercall_preempt_check() ) 60.1078 + return -EINTR; 60.1079 } 60.1080 - while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); 60.1081 60.1082 if ( unlikely((x & PGT_type_mask) != type) ) 60.1083 { 60.1084 @@ -2032,25 +2213,42 @@ int get_page_type(struct page_info *page 60.1085 60.1086 if ( unlikely(!(nx & PGT_validated)) ) 60.1087 { 60.1088 - /* Try to validate page type; drop the new reference on failure. */ 60.1089 - if ( unlikely(!alloc_page_type(page, type)) ) 60.1090 + if ( !(x & PGT_partial) ) 60.1091 { 60.1092 - MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %" 60.1093 - PRtype_info ": caf=%08x taf=%" PRtype_info, 60.1094 - page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), 60.1095 - type, page->count_info, page->u.inuse.type_info); 60.1096 - /* Noone else can get a reference. We hold the only ref. */ 60.1097 - page->u.inuse.type_info = 0; 60.1098 - return 0; 60.1099 + page->nr_validated_ptes = 0; 60.1100 + page->partial_pte = 0; 60.1101 } 60.1102 - 60.1103 - /* Noone else is updating simultaneously. */ 60.1104 - __set_bit(_PGT_validated, &page->u.inuse.type_info); 60.1105 + return alloc_page_type(page, type, preemptible); 60.1106 } 60.1107 60.1108 - return 1; 60.1109 + return 0; 60.1110 +} 60.1111 + 60.1112 +void put_page_type(struct page_info *page) 60.1113 +{ 60.1114 + int rc = __put_page_type(page, 0); 60.1115 + ASSERT(rc == 0); 60.1116 + (void)rc; 60.1117 } 60.1118 60.1119 +int get_page_type(struct page_info *page, unsigned long type) 60.1120 +{ 60.1121 + int rc = __get_page_type(page, type, 0); 60.1122 + if ( likely(rc == 0) ) 60.1123 + return 1; 60.1124 + ASSERT(rc == -EINVAL); 60.1125 + return 0; 60.1126 +} 60.1127 + 60.1128 +int put_page_type_preemptible(struct page_info *page) 60.1129 +{ 60.1130 + return __put_page_type(page, 1); 60.1131 +} 60.1132 + 60.1133 +int get_page_type_preemptible(struct page_info *page, unsigned long type) 60.1134 +{ 60.1135 + return __get_page_type(page, type, 1); 60.1136 +} 60.1137 60.1138 void cleanup_page_cacheattr(struct page_info *page) 60.1139 { 60.1140 @@ -2087,7 +2285,7 @@ int new_guest_cr3(unsigned long mfn) 60.1141 l4e_from_pfn( 60.1142 mfn, 60.1143 (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)), 60.1144 - pagetable_get_pfn(v->arch.guest_table), 0); 60.1145 + pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0; 60.1146 if ( unlikely(!okay) ) 60.1147 { 60.1148 MEM_LOG("Error while installing new compat baseptr %lx", mfn); 60.1149 @@ -2102,7 +2300,7 @@ int new_guest_cr3(unsigned long mfn) 60.1150 #endif 60.1151 okay = paging_mode_refcounts(d) 60.1152 ? get_page_from_pagenr(mfn, d) 60.1153 - : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); 60.1154 + : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0); 60.1155 if ( unlikely(!okay) ) 60.1156 { 60.1157 MEM_LOG("Error while installing new baseptr %lx", mfn); 60.1158 @@ -2276,9 +2474,7 @@ int do_mmuext_op( 60.1159 { 60.1160 if ( hypercall_preempt_check() ) 60.1161 { 60.1162 - rc = hypercall_create_continuation( 60.1163 - __HYPERVISOR_mmuext_op, "hihi", 60.1164 - uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); 60.1165 + rc = -EAGAIN; 60.1166 break; 60.1167 } 60.1168 60.1169 @@ -2325,10 +2521,14 @@ int do_mmuext_op( 60.1170 if ( paging_mode_refcounts(FOREIGNDOM) ) 60.1171 break; 60.1172 60.1173 - okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM); 60.1174 + rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1); 60.1175 + okay = !rc; 60.1176 if ( unlikely(!okay) ) 60.1177 { 60.1178 - MEM_LOG("Error while pinning mfn %lx", mfn); 60.1179 + if ( rc == -EINTR ) 60.1180 + rc = -EAGAIN; 60.1181 + else if ( rc != -EAGAIN ) 60.1182 + MEM_LOG("Error while pinning mfn %lx", mfn); 60.1183 break; 60.1184 } 60.1185 60.1186 @@ -2373,8 +2573,11 @@ int do_mmuext_op( 60.1187 { 60.1188 put_page_and_type(page); 60.1189 put_page(page); 60.1190 - /* A page is dirtied when its pin status is cleared. */ 60.1191 - paging_mark_dirty(d, mfn); 60.1192 + if ( !rc ) 60.1193 + { 60.1194 + /* A page is dirtied when its pin status is cleared. */ 60.1195 + paging_mark_dirty(d, mfn); 60.1196 + } 60.1197 } 60.1198 else 60.1199 { 60.1200 @@ -2398,8 +2601,8 @@ int do_mmuext_op( 60.1201 if ( paging_mode_refcounts(d) ) 60.1202 okay = get_page_from_pagenr(mfn, d); 60.1203 else 60.1204 - okay = get_page_and_type_from_pagenr( 60.1205 - mfn, PGT_root_page_table, d); 60.1206 + okay = !get_page_and_type_from_pagenr( 60.1207 + mfn, PGT_root_page_table, d, 0); 60.1208 if ( unlikely(!okay) ) 60.1209 { 60.1210 MEM_LOG("Error while installing new mfn %lx", mfn); 60.1211 @@ -2517,6 +2720,11 @@ int do_mmuext_op( 60.1212 guest_handle_add_offset(uops, 1); 60.1213 } 60.1214 60.1215 + if ( rc == -EAGAIN ) 60.1216 + rc = hypercall_create_continuation( 60.1217 + __HYPERVISOR_mmuext_op, "hihi", 60.1218 + uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); 60.1219 + 60.1220 process_deferred_ops(); 60.1221 60.1222 perfc_add(num_mmuext_ops, i); 60.1223 @@ -2576,9 +2784,7 @@ int do_mmu_update( 60.1224 { 60.1225 if ( hypercall_preempt_check() ) 60.1226 { 60.1227 - rc = hypercall_create_continuation( 60.1228 - __HYPERVISOR_mmu_update, "hihi", 60.1229 - ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); 60.1230 + rc = -EAGAIN; 60.1231 break; 60.1232 } 60.1233 60.1234 @@ -2653,27 +2859,29 @@ int do_mmu_update( 60.1235 cmd == MMU_PT_UPDATE_PRESERVE_AD); 60.1236 } 60.1237 break; 60.1238 -#if CONFIG_PAGING_LEVELS >= 3 60.1239 case PGT_l3_page_table: 60.1240 { 60.1241 l3_pgentry_t l3e = l3e_from_intpte(req.val); 60.1242 - okay = mod_l3_entry(va, l3e, mfn, 60.1243 - cmd == MMU_PT_UPDATE_PRESERVE_AD); 60.1244 + rc = mod_l3_entry(va, l3e, mfn, 60.1245 + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1); 60.1246 + okay = !rc; 60.1247 } 60.1248 break; 60.1249 -#endif 60.1250 #if CONFIG_PAGING_LEVELS >= 4 60.1251 case PGT_l4_page_table: 60.1252 { 60.1253 l4_pgentry_t l4e = l4e_from_intpte(req.val); 60.1254 - okay = mod_l4_entry(va, l4e, mfn, 60.1255 - cmd == MMU_PT_UPDATE_PRESERVE_AD); 60.1256 + rc = mod_l4_entry(va, l4e, mfn, 60.1257 + cmd == MMU_PT_UPDATE_PRESERVE_AD, 1); 60.1258 + okay = !rc; 60.1259 } 60.1260 break; 60.1261 #endif 60.1262 } 60.1263 60.1264 put_page_type(page); 60.1265 + if ( rc == -EINTR ) 60.1266 + rc = -EAGAIN; 60.1267 } 60.1268 break; 60.1269 60.1270 @@ -2742,6 +2950,11 @@ int do_mmu_update( 60.1271 guest_handle_add_offset(ureqs, 1); 60.1272 } 60.1273 60.1274 + if ( rc == -EAGAIN ) 60.1275 + rc = hypercall_create_continuation( 60.1276 + __HYPERVISOR_mmu_update, "hihi", 60.1277 + ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); 60.1278 + 60.1279 process_deferred_ops(); 60.1280 60.1281 domain_mmap_cache_destroy(&mapcache); 60.1282 @@ -3339,6 +3552,7 @@ DEFINE_XEN_GUEST_HANDLE(e820entry_t); 60.1283 60.1284 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) 60.1285 { 60.1286 + struct page_info *page = NULL; 60.1287 switch ( op ) 60.1288 { 60.1289 case XENMEM_add_to_physmap: 60.1290 @@ -3389,12 +3603,22 @@ long arch_memory_op(int op, XEN_GUEST_HA 60.1291 60.1292 spin_unlock(&d->grant_table->lock); 60.1293 break; 60.1294 + case XENMAPSPACE_mfn: 60.1295 + { 60.1296 + if ( get_page_from_pagenr(xatp.idx, d) ) { 60.1297 + mfn = xatp.idx; 60.1298 + page = mfn_to_page(mfn); 60.1299 + } 60.1300 + break; 60.1301 + } 60.1302 default: 60.1303 break; 60.1304 } 60.1305 60.1306 if ( !paging_mode_translate(d) || (mfn == 0) ) 60.1307 { 60.1308 + if ( page ) 60.1309 + put_page(page); 60.1310 rcu_unlock_domain(d); 60.1311 return -EINVAL; 60.1312 } 60.1313 @@ -3423,6 +3647,53 @@ long arch_memory_op(int op, XEN_GUEST_HA 60.1314 60.1315 domain_unlock(d); 60.1316 60.1317 + if ( page ) 60.1318 + put_page(page); 60.1319 + 60.1320 + rcu_unlock_domain(d); 60.1321 + 60.1322 + break; 60.1323 + } 60.1324 + 60.1325 + case XENMEM_remove_from_physmap: 60.1326 + { 60.1327 + struct xen_remove_from_physmap xrfp; 60.1328 + unsigned long mfn; 60.1329 + struct domain *d; 60.1330 + 60.1331 + if ( copy_from_guest(&xrfp, arg, 1) ) 60.1332 + return -EFAULT; 60.1333 + 60.1334 + if ( xrfp.domid == DOMID_SELF ) 60.1335 + { 60.1336 + d = rcu_lock_current_domain(); 60.1337 + } 60.1338 + else 60.1339 + { 60.1340 + if ( (d = rcu_lock_domain_by_id(xrfp.domid)) == NULL ) 60.1341 + return -ESRCH; 60.1342 + if ( !IS_PRIV_FOR(current->domain, d) ) 60.1343 + { 60.1344 + rcu_unlock_domain(d); 60.1345 + return -EPERM; 60.1346 + } 60.1347 + } 60.1348 + 60.1349 + if ( xsm_remove_from_physmap(current->domain, d) ) 60.1350 + { 60.1351 + rcu_unlock_domain(d); 60.1352 + return -EPERM; 60.1353 + } 60.1354 + 60.1355 + domain_lock(d); 60.1356 + 60.1357 + mfn = gmfn_to_mfn(d, xrfp.gpfn); 60.1358 + 60.1359 + if ( mfn_valid(mfn) ) 60.1360 + guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0); 60.1361 + 60.1362 + domain_unlock(d); 60.1363 + 60.1364 rcu_unlock_domain(d); 60.1365 60.1366 break; 60.1367 @@ -3637,9 +3908,8 @@ static int ptwr_emulated_update( 60.1368 nl1e = l1e_from_intpte(val); 60.1369 if ( unlikely(!get_page_from_l1e(nl1e, d)) ) 60.1370 { 60.1371 - if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) && 60.1372 - (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg && 60.1373 - (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) 60.1374 + if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) && 60.1375 + !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) 60.1376 { 60.1377 /* 60.1378 * If this is an upper-half write to a PAE PTE then we assume that
61.1 --- a/xen/arch/x86/platform_hypercall.c Tue Sep 02 16:34:53 2008 -0700 61.2 +++ b/xen/arch/x86/platform_hypercall.c Tue Sep 02 16:55:55 2008 -0700 61.3 @@ -147,8 +147,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe 61.4 61.5 case XENPF_microcode_update: 61.6 { 61.7 - extern int microcode_update(XEN_GUEST_HANDLE(void), unsigned long len); 61.8 - XEN_GUEST_HANDLE(void) data; 61.9 + XEN_GUEST_HANDLE(const_void) data; 61.10 61.11 ret = xsm_microcode(); 61.12 if ( ret )
62.1 --- a/xen/arch/x86/time.c Tue Sep 02 16:34:53 2008 -0700 62.2 +++ b/xen/arch/x86/time.c Tue Sep 02 16:55:55 2008 -0700 62.3 @@ -840,12 +840,11 @@ struct cpu_calibration { 62.4 u64 local_tsc_stamp; 62.5 s_time_t stime_local_stamp; 62.6 s_time_t stime_master_stamp; 62.7 - struct timer softirq_callback; 62.8 }; 62.9 static DEFINE_PER_CPU(struct cpu_calibration, cpu_calibration); 62.10 62.11 /* Softirq handler for per-CPU time calibration. */ 62.12 -static void local_time_calibration(void *unused) 62.13 +static void local_time_calibration(void) 62.14 { 62.15 struct cpu_time *t = &this_cpu(cpu_time); 62.16 struct cpu_calibration *c = &this_cpu(cpu_calibration); 62.17 @@ -1004,13 +1003,12 @@ static void time_calibration_rendezvous( 62.18 struct cpu_calibration *c = &this_cpu(cpu_calibration); 62.19 struct calibration_rendezvous *r = _r; 62.20 62.21 - local_irq_disable(); 62.22 - 62.23 if ( smp_processor_id() == 0 ) 62.24 { 62.25 while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) ) 62.26 cpu_relax(); 62.27 r->master_stime = read_platform_stime(); 62.28 + mb(); /* write r->master_stime /then/ signal */ 62.29 atomic_inc(&r->nr_cpus); 62.30 } 62.31 else 62.32 @@ -1018,16 +1016,14 @@ static void time_calibration_rendezvous( 62.33 atomic_inc(&r->nr_cpus); 62.34 while ( atomic_read(&r->nr_cpus) != total_cpus ) 62.35 cpu_relax(); 62.36 + mb(); /* receive signal /then/ read r->master_stime */ 62.37 } 62.38 62.39 rdtscll(c->local_tsc_stamp); 62.40 c->stime_local_stamp = get_s_time(); 62.41 c->stime_master_stamp = r->master_stime; 62.42 62.43 - local_irq_enable(); 62.44 - 62.45 - /* Callback in softirq context as soon as possible. */ 62.46 - set_timer(&c->softirq_callback, c->stime_local_stamp); 62.47 + raise_softirq(TIME_CALIBRATE_SOFTIRQ); 62.48 } 62.49 62.50 static void time_calibration(void *unused) 62.51 @@ -1036,6 +1032,7 @@ static void time_calibration(void *unuse 62.52 .nr_cpus = ATOMIC_INIT(0) 62.53 }; 62.54 62.55 + /* @wait=1 because we must wait for all cpus before freeing @r. */ 62.56 on_each_cpu(time_calibration_rendezvous, &r, 0, 1); 62.57 } 62.58 62.59 @@ -1053,9 +1050,6 @@ void init_percpu_time(void) 62.60 t->stime_master_stamp = now; 62.61 t->stime_local_stamp = now; 62.62 62.63 - init_timer(&this_cpu(cpu_calibration).softirq_callback, 62.64 - local_time_calibration, NULL, smp_processor_id()); 62.65 - 62.66 if ( smp_processor_id() == 0 ) 62.67 { 62.68 init_timer(&calibration_timer, time_calibration, NULL, 0); 62.69 @@ -1073,6 +1067,8 @@ int __init init_xen_time(void) 62.70 if ( cpuid_edx(0x80000007) & (1u<<8) ) 62.71 tsc_invariant = 1; 62.72 62.73 + open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration); 62.74 + 62.75 init_percpu_time(); 62.76 62.77 stime_platform_stamp = 0; 62.78 @@ -1180,7 +1176,7 @@ int time_suspend(void) 62.79 } 62.80 62.81 /* Better to cancel calibration timer for accuracy. */ 62.82 - kill_timer(&this_cpu(cpu_calibration).softirq_callback); 62.83 + clear_bit(TIME_CALIBRATE_SOFTIRQ, &softirq_pending(smp_processor_id())); 62.84 62.85 return 0; 62.86 }
63.1 --- a/xen/arch/x86/traps.c Tue Sep 02 16:34:53 2008 -0700 63.2 +++ b/xen/arch/x86/traps.c Tue Sep 02 16:55:55 2008 -0700 63.3 @@ -2124,6 +2124,36 @@ static int emulate_privileged_op(struct 63.4 if ( wrmsr_safe(regs->ecx, eax, edx) != 0 ) 63.5 goto fail; 63.6 break; 63.7 + case MSR_AMD64_NB_CFG: 63.8 + if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD || 63.9 + boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x11 ) 63.10 + goto fail; 63.11 + if ( !IS_PRIV(v->domain) ) 63.12 + break; 63.13 + if ( (rdmsr_safe(MSR_AMD64_NB_CFG, l, h) != 0) || 63.14 + (eax != l) || 63.15 + ((edx ^ h) & ~(1 << (AMD64_NB_CFG_CF8_EXT_ENABLE_BIT - 32))) ) 63.16 + goto invalid; 63.17 + if ( wrmsr_safe(MSR_AMD64_NB_CFG, eax, edx) != 0 ) 63.18 + goto fail; 63.19 + break; 63.20 + case MSR_FAM10H_MMIO_CONF_BASE: 63.21 + if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD || 63.22 + boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x11 ) 63.23 + goto fail; 63.24 + if ( !IS_PRIV(v->domain) ) 63.25 + break; 63.26 + if ( (rdmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, l, h) != 0) || 63.27 + (((((u64)h << 32) | l) ^ res) & 63.28 + ~((1 << FAM10H_MMIO_CONF_ENABLE_BIT) | 63.29 + (FAM10H_MMIO_CONF_BUSRANGE_MASK << 63.30 + FAM10H_MMIO_CONF_BUSRANGE_SHIFT) | 63.31 + ((u64)FAM10H_MMIO_CONF_BASE_MASK << 63.32 + FAM10H_MMIO_CONF_BASE_SHIFT))) ) 63.33 + goto invalid; 63.34 + if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, eax, edx) != 0 ) 63.35 + goto fail; 63.36 + break; 63.37 case MSR_IA32_PERF_CTL: 63.38 if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) 63.39 goto fail; 63.40 @@ -2137,6 +2167,7 @@ static int emulate_privileged_op(struct 63.41 break; 63.42 if ( (rdmsr_safe(regs->ecx, l, h) != 0) || 63.43 (eax != l) || (edx != h) ) 63.44 + invalid: 63.45 gdprintk(XENLOG_WARNING, "Domain attempted WRMSR %p from " 63.46 "%08x:%08x to %08x:%08x.\n", 63.47 _p(regs->ecx), h, l, edx, eax);
64.1 --- a/xen/arch/x86/x86_64/compat/mm.c Tue Sep 02 16:34:53 2008 -0700 64.2 +++ b/xen/arch/x86/x86_64/compat/mm.c Tue Sep 02 16:55:55 2008 -0700 64.3 @@ -69,6 +69,20 @@ int compat_arch_memory_op(int op, XEN_GU 64.4 break; 64.5 } 64.6 64.7 + case XENMEM_remove_from_physmap: 64.8 + { 64.9 + struct compat_remove_from_physmap cmp; 64.10 + struct xen_remove_from_physmap *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE; 64.11 + 64.12 + if ( copy_from_guest(&cmp, arg, 1) ) 64.13 + return -EFAULT; 64.14 + 64.15 + XLAT_remove_from_physmap(nat, &cmp); 64.16 + rc = arch_memory_op(op, guest_handle_from_ptr(nat, void)); 64.17 + 64.18 + break; 64.19 + } 64.20 + 64.21 case XENMEM_set_memory_map: 64.22 { 64.23 struct compat_foreign_memory_map cmp;
65.1 --- a/xen/common/softirq.c Tue Sep 02 16:34:53 2008 -0700 65.2 +++ b/xen/common/softirq.c Tue Sep 02 16:55:55 2008 -0700 65.3 @@ -49,6 +49,7 @@ asmlinkage void do_softirq(void) 65.4 65.5 void open_softirq(int nr, softirq_handler handler) 65.6 { 65.7 + ASSERT(nr < NR_SOFTIRQS); 65.8 softirq_handlers[nr] = handler; 65.9 } 65.10
66.1 --- a/xen/common/timer.c Tue Sep 02 16:34:53 2008 -0700 66.2 +++ b/xen/common/timer.c Tue Sep 02 16:55:55 2008 -0700 66.3 @@ -30,6 +30,7 @@ 66.4 struct timers { 66.5 spinlock_t lock; 66.6 struct timer **heap; 66.7 + struct timer *list; 66.8 struct timer *running; 66.9 } __cacheline_aligned; 66.10 66.11 @@ -86,13 +87,11 @@ static void up_heap(struct timer **heap, 66.12 66.13 66.14 /* Delete @t from @heap. Return TRUE if new top of heap. */ 66.15 -static int remove_entry(struct timer **heap, struct timer *t) 66.16 +static int remove_from_heap(struct timer **heap, struct timer *t) 66.17 { 66.18 int sz = GET_HEAP_SIZE(heap); 66.19 int pos = t->heap_offset; 66.20 66.21 - t->heap_offset = 0; 66.22 - 66.23 if ( unlikely(pos == sz) ) 66.24 { 66.25 SET_HEAP_SIZE(heap, sz-1); 66.26 @@ -115,7 +114,7 @@ static int remove_entry(struct timer **h 66.27 66.28 66.29 /* Add new entry @t to @heap. Return TRUE if new top of heap. */ 66.30 -static int add_entry(struct timer ***pheap, struct timer *t) 66.31 +static int add_to_heap(struct timer ***pheap, struct timer *t) 66.32 { 66.33 struct timer **heap = *pheap; 66.34 int sz = GET_HEAP_SIZE(heap); 66.35 @@ -126,8 +125,11 @@ static int add_entry(struct timer ***phe 66.36 /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */ 66.37 int old_limit = GET_HEAP_LIMIT(heap); 66.38 int new_limit = ((old_limit + 1) << 4) - 1; 66.39 + if ( in_irq() ) 66.40 + goto out; 66.41 heap = xmalloc_array(struct timer *, new_limit + 1); 66.42 - BUG_ON(heap == NULL); 66.43 + if ( heap == NULL ) 66.44 + goto out; 66.45 memcpy(heap, *pheap, (old_limit + 1) * sizeof(*heap)); 66.46 SET_HEAP_LIMIT(heap, new_limit); 66.47 if ( old_limit != 0 ) 66.48 @@ -139,26 +141,95 @@ static int add_entry(struct timer ***phe 66.49 heap[sz] = t; 66.50 t->heap_offset = sz; 66.51 up_heap(heap, sz); 66.52 + out: 66.53 return (t->heap_offset == 1); 66.54 } 66.55 66.56 66.57 /**************************************************************************** 66.58 + * LINKED LIST OPERATIONS. 66.59 + */ 66.60 + 66.61 +static int remove_from_list(struct timer **pprev, struct timer *t) 66.62 +{ 66.63 + struct timer *curr, **_pprev = pprev; 66.64 + 66.65 + while ( (curr = *_pprev) != t ) 66.66 + _pprev = &curr->list_next; 66.67 + 66.68 + *_pprev = t->list_next; 66.69 + 66.70 + return (_pprev == pprev); 66.71 +} 66.72 + 66.73 +static int add_to_list(struct timer **pprev, struct timer *t) 66.74 +{ 66.75 + struct timer *curr, **_pprev = pprev; 66.76 + 66.77 + while ( ((curr = *_pprev) != NULL) && (curr->expires <= t->expires) ) 66.78 + _pprev = &curr->list_next; 66.79 + 66.80 + t->list_next = curr; 66.81 + *_pprev = t; 66.82 + 66.83 + return (_pprev == pprev); 66.84 +} 66.85 + 66.86 + 66.87 +/**************************************************************************** 66.88 * TIMER OPERATIONS. 66.89 */ 66.90 66.91 +static int remove_entry(struct timers *timers, struct timer *t) 66.92 +{ 66.93 + int rc; 66.94 + 66.95 + switch ( t->status ) 66.96 + { 66.97 + case TIMER_STATUS_in_heap: 66.98 + rc = remove_from_heap(timers->heap, t); 66.99 + break; 66.100 + case TIMER_STATUS_in_list: 66.101 + rc = remove_from_list(&timers->list, t); 66.102 + break; 66.103 + default: 66.104 + rc = 0; 66.105 + BUG(); 66.106 + } 66.107 + 66.108 + t->status = TIMER_STATUS_inactive; 66.109 + return rc; 66.110 +} 66.111 + 66.112 +static int add_entry(struct timers *timers, struct timer *t) 66.113 +{ 66.114 + int rc; 66.115 + 66.116 + ASSERT(t->status == TIMER_STATUS_inactive); 66.117 + 66.118 + /* Try to add to heap. t->heap_offset indicates whether we succeed. */ 66.119 + t->heap_offset = 0; 66.120 + t->status = TIMER_STATUS_in_heap; 66.121 + rc = add_to_heap(&timers->heap, t); 66.122 + if ( t->heap_offset != 0 ) 66.123 + return rc; 66.124 + 66.125 + /* Fall back to adding to the slower linked list. */ 66.126 + t->status = TIMER_STATUS_in_list; 66.127 + return add_to_list(&timers->list, t); 66.128 +} 66.129 + 66.130 static inline void __add_timer(struct timer *timer) 66.131 { 66.132 int cpu = timer->cpu; 66.133 - if ( add_entry(&per_cpu(timers, cpu).heap, timer) ) 66.134 + if ( add_entry(&per_cpu(timers, cpu), timer) ) 66.135 cpu_raise_softirq(cpu, TIMER_SOFTIRQ); 66.136 } 66.137 66.138 - 66.139 static inline void __stop_timer(struct timer *timer) 66.140 { 66.141 int cpu = timer->cpu; 66.142 - if ( remove_entry(per_cpu(timers, cpu).heap, timer) ) 66.143 + if ( remove_entry(&per_cpu(timers, cpu), timer) ) 66.144 cpu_raise_softirq(cpu, TIMER_SOFTIRQ); 66.145 } 66.146 66.147 @@ -203,7 +274,7 @@ void set_timer(struct timer *timer, s_ti 66.148 66.149 timer->expires = expires; 66.150 66.151 - if ( likely(!timer->killed) ) 66.152 + if ( likely(timer->status != TIMER_STATUS_killed) ) 66.153 __add_timer(timer); 66.154 66.155 timer_unlock_irqrestore(timer, flags); 66.156 @@ -278,7 +349,7 @@ void kill_timer(struct timer *timer) 66.157 66.158 if ( active_timer(timer) ) 66.159 __stop_timer(timer); 66.160 - timer->killed = 1; 66.161 + timer->status = TIMER_STATUS_killed; 66.162 66.163 timer_unlock_irqrestore(timer, flags); 66.164 66.165 @@ -290,43 +361,76 @@ void kill_timer(struct timer *timer) 66.166 66.167 static void timer_softirq_action(void) 66.168 { 66.169 - struct timer *t, **heap; 66.170 + struct timer *t, **heap, *next; 66.171 struct timers *ts; 66.172 - s_time_t now; 66.173 + s_time_t now, deadline; 66.174 void (*fn)(void *); 66.175 void *data; 66.176 66.177 ts = &this_cpu(timers); 66.178 66.179 spin_lock_irq(&ts->lock); 66.180 - 66.181 - do { 66.182 - heap = ts->heap; 66.183 - now = NOW(); 66.184 66.185 - while ( (GET_HEAP_SIZE(heap) != 0) && 66.186 - ((t = heap[1])->expires < (now + TIMER_SLOP)) ) 66.187 - { 66.188 - remove_entry(heap, t); 66.189 + /* Try to move timers from overflow linked list to more efficient heap. */ 66.190 + next = ts->list; 66.191 + ts->list = NULL; 66.192 + while ( unlikely((t = next) != NULL) ) 66.193 + { 66.194 + next = t->list_next; 66.195 + t->status = TIMER_STATUS_inactive; 66.196 + add_entry(ts, t); 66.197 + } 66.198 + 66.199 + heap = ts->heap; 66.200 + now = NOW(); 66.201 + 66.202 + while ( (GET_HEAP_SIZE(heap) != 0) && 66.203 + ((t = heap[1])->expires < (now + TIMER_SLOP)) ) 66.204 + { 66.205 + remove_entry(ts, t); 66.206 + 66.207 + ts->running = t; 66.208 66.209 - ts->running = t; 66.210 + fn = t->function; 66.211 + data = t->data; 66.212 66.213 - fn = t->function; 66.214 - data = t->data; 66.215 + spin_unlock_irq(&ts->lock); 66.216 + (*fn)(data); 66.217 + spin_lock_irq(&ts->lock); 66.218 66.219 - spin_unlock_irq(&ts->lock); 66.220 - (*fn)(data); 66.221 - spin_lock_irq(&ts->lock); 66.222 + /* Heap may have grown while the lock was released. */ 66.223 + heap = ts->heap; 66.224 + } 66.225 + 66.226 + deadline = GET_HEAP_SIZE(heap) ? heap[1]->expires : 0; 66.227 66.228 - /* Heap may have grown while the lock was released. */ 66.229 - heap = ts->heap; 66.230 + while ( unlikely((t = ts->list) != NULL) ) 66.231 + { 66.232 + if ( t->expires >= (now + TIMER_SLOP) ) 66.233 + { 66.234 + if ( (deadline == 0) || (deadline > t->expires) ) 66.235 + deadline = t->expires; 66.236 + break; 66.237 } 66.238 66.239 - ts->running = NULL; 66.240 + ts->list = t->list_next; 66.241 + t->status = TIMER_STATUS_inactive; 66.242 + 66.243 + ts->running = t; 66.244 + 66.245 + fn = t->function; 66.246 + data = t->data; 66.247 66.248 - this_cpu(timer_deadline) = GET_HEAP_SIZE(heap) ? heap[1]->expires : 0; 66.249 + spin_unlock_irq(&ts->lock); 66.250 + (*fn)(data); 66.251 + spin_lock_irq(&ts->lock); 66.252 } 66.253 - while ( !reprogram_timer(this_cpu(timer_deadline)) ); 66.254 + 66.255 + ts->running = NULL; 66.256 + 66.257 + this_cpu(timer_deadline) = deadline; 66.258 + if ( !reprogram_timer(deadline) ) 66.259 + raise_softirq(TIMER_SOFTIRQ); 66.260 66.261 spin_unlock_irq(&ts->lock); 66.262 } 66.263 @@ -364,6 +468,9 @@ static void dump_timerq(unsigned char ke 66.264 printk (" %d : %p ex=0x%08X%08X %p\n", 66.265 j, t, (u32)(t->expires>>32), (u32)t->expires, t->data); 66.266 } 66.267 + for ( t = ts->list, j = 0; t != NULL; t = t->list_next, j++ ) 66.268 + printk (" L%d : %p ex=0x%08X%08X %p\n", 66.269 + j, t, (u32)(t->expires>>32), (u32)t->expires, t->data); 66.270 spin_unlock_irqrestore(&ts->lock, flags); 66.271 printk("\n"); 66.272 }
67.1 --- a/xen/common/xmalloc.c Tue Sep 02 16:34:53 2008 -0700 67.2 +++ b/xen/common/xmalloc.c Tue Sep 02 16:55:55 2008 -0700 67.3 @@ -136,15 +136,14 @@ static void maybe_split(struct xmalloc_h 67.4 static void *xmalloc_new_page(size_t size) 67.5 { 67.6 struct xmalloc_hdr *hdr; 67.7 - unsigned long flags; 67.8 67.9 hdr = alloc_xenheap_page(); 67.10 if ( hdr == NULL ) 67.11 return NULL; 67.12 67.13 - spin_lock_irqsave(&freelist_lock, flags); 67.14 + spin_lock(&freelist_lock); 67.15 maybe_split(hdr, size, PAGE_SIZE); 67.16 - spin_unlock_irqrestore(&freelist_lock, flags); 67.17 + spin_unlock(&freelist_lock); 67.18 67.19 return data_from_header(hdr); 67.20 } 67.21 @@ -175,7 +174,6 @@ static inline size_t align_up(size_t siz 67.22 void *_xmalloc(size_t size, size_t align) 67.23 { 67.24 struct xmalloc_hdr *i; 67.25 - unsigned long flags; 67.26 67.27 ASSERT(!in_irq()); 67.28 67.29 @@ -196,17 +194,17 @@ void *_xmalloc(size_t size, size_t align 67.30 return xmalloc_whole_pages(size); 67.31 67.32 /* Search free list. */ 67.33 - spin_lock_irqsave(&freelist_lock, flags); 67.34 + spin_lock(&freelist_lock); 67.35 list_for_each_entry( i, &freelist, freelist ) 67.36 { 67.37 if ( i->size < size ) 67.38 continue; 67.39 del_from_freelist(i); 67.40 maybe_split(i, size, i->size); 67.41 - spin_unlock_irqrestore(&freelist_lock, flags); 67.42 + spin_unlock(&freelist_lock); 67.43 return data_from_header(i); 67.44 } 67.45 - spin_unlock_irqrestore(&freelist_lock, flags); 67.46 + spin_unlock(&freelist_lock); 67.47 67.48 /* Alloc a new page and return from that. */ 67.49 return xmalloc_new_page(size); 67.50 @@ -214,7 +212,6 @@ void *_xmalloc(size_t size, size_t align 67.51 67.52 void xfree(void *p) 67.53 { 67.54 - unsigned long flags; 67.55 struct xmalloc_hdr *i, *tmp, *hdr; 67.56 67.57 ASSERT(!in_irq()); 67.58 @@ -238,7 +235,7 @@ void xfree(void *p) 67.59 } 67.60 67.61 /* Merge with other free block, or put in list. */ 67.62 - spin_lock_irqsave(&freelist_lock, flags); 67.63 + spin_lock(&freelist_lock); 67.64 list_for_each_entry_safe( i, tmp, &freelist, freelist ) 67.65 { 67.66 unsigned long _i = (unsigned long)i; 67.67 @@ -275,7 +272,7 @@ void xfree(void *p) 67.68 add_to_freelist(hdr); 67.69 } 67.70 67.71 - spin_unlock_irqrestore(&freelist_lock, flags); 67.72 + spin_unlock(&freelist_lock); 67.73 } 67.74 67.75 /*
68.1 --- a/xen/drivers/passthrough/vtd/intremap.c Tue Sep 02 16:34:53 2008 -0700 68.2 +++ b/xen/drivers/passthrough/vtd/intremap.c Tue Sep 02 16:55:55 2008 -0700 68.3 @@ -43,7 +43,7 @@ u16 apicid_to_bdf(int apic_id) 68.4 return 0; 68.5 } 68.6 68.7 -static void remap_entry_to_ioapic_rte( 68.8 +static int remap_entry_to_ioapic_rte( 68.9 struct iommu *iommu, struct IO_APIC_route_entry *old_rte) 68.10 { 68.11 struct iremap_entry *iremap_entry = NULL, *iremap_entries; 68.12 @@ -56,15 +56,19 @@ static void remap_entry_to_ioapic_rte( 68.13 { 68.14 dprintk(XENLOG_ERR VTDPREFIX, 68.15 "remap_entry_to_ioapic_rte: ir_ctl is not ready\n"); 68.16 - return; 68.17 + return -EFAULT; 68.18 } 68.19 68.20 remap_rte = (struct IO_APIC_route_remap_entry *) old_rte; 68.21 index = (remap_rte->index_15 << 15) | remap_rte->index_0_14; 68.22 68.23 if ( index > ir_ctrl->iremap_index ) 68.24 - panic("%s: index (%d) is larger than remap table entry size (%d)!\n", 68.25 - __func__, index, ir_ctrl->iremap_index); 68.26 + { 68.27 + dprintk(XENLOG_ERR VTDPREFIX, 68.28 + "%s: index (%d) is larger than remap table entry size (%d)!\n", 68.29 + __func__, index, ir_ctrl->iremap_index); 68.30 + return -EFAULT; 68.31 + } 68.32 68.33 spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); 68.34 68.35 @@ -82,9 +86,10 @@ static void remap_entry_to_ioapic_rte( 68.36 68.37 unmap_vtd_domain_page(iremap_entries); 68.38 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); 68.39 + return 0; 68.40 } 68.41 68.42 -static void ioapic_rte_to_remap_entry(struct iommu *iommu, 68.43 +static int ioapic_rte_to_remap_entry(struct iommu *iommu, 68.44 int apic_id, struct IO_APIC_route_entry *old_rte, 68.45 unsigned int rte_upper, unsigned int value) 68.46 { 68.47 @@ -108,7 +113,14 @@ static void ioapic_rte_to_remap_entry(st 68.48 index = (remap_rte->index_15 << 15) | remap_rte->index_0_14; 68.49 68.50 if ( index > IREMAP_ENTRY_NR - 1 ) 68.51 - panic("ioapic_rte_to_remap_entry: intremap index is more than 256!\n"); 68.52 + { 68.53 + dprintk(XENLOG_ERR VTDPREFIX, 68.54 + "%s: intremap index (%d) is larger than" 68.55 + " the maximum index (%ld)!\n", 68.56 + __func__, index, IREMAP_ENTRY_NR - 1); 68.57 + spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); 68.58 + return -EFAULT; 68.59 + } 68.60 68.61 iremap_entries = 68.62 (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr); 68.63 @@ -159,7 +171,7 @@ static void ioapic_rte_to_remap_entry(st 68.64 68.65 unmap_vtd_domain_page(iremap_entries); 68.66 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); 68.67 - return; 68.68 + return 0; 68.69 } 68.70 68.71 unsigned int io_apic_read_remap_rte( 68.72 @@ -189,23 +201,22 @@ unsigned int io_apic_read_remap_rte( 68.73 68.74 remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte; 68.75 68.76 - if ( remap_rte->mask || (remap_rte->format == 0) ) 68.77 + if ( remap_rte->format == 0 ) 68.78 { 68.79 - *IO_APIC_BASE(apic) = reg; 68.80 + *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg; 68.81 return *(IO_APIC_BASE(apic)+4); 68.82 } 68.83 68.84 - remap_entry_to_ioapic_rte(iommu, &old_rte); 68.85 - if ( rte_upper ) 68.86 + if ( remap_entry_to_ioapic_rte(iommu, &old_rte) ) 68.87 { 68.88 - *IO_APIC_BASE(apic) = reg + 1; 68.89 - return (*(((u32 *)&old_rte) + 1)); 68.90 + *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg; 68.91 + return *(IO_APIC_BASE(apic)+4); 68.92 } 68.93 + 68.94 + if ( rte_upper ) 68.95 + return (*(((u32 *)&old_rte) + 1)); 68.96 else 68.97 - { 68.98 - *IO_APIC_BASE(apic) = reg; 68.99 return (*(((u32 *)&old_rte) + 0)); 68.100 - } 68.101 } 68.102 68.103 void io_apic_write_remap_rte( 68.104 @@ -243,8 +254,13 @@ void io_apic_write_remap_rte( 68.105 *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0); 68.106 remap_rte->mask = saved_mask; 68.107 68.108 - ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid, 68.109 - &old_rte, rte_upper, value); 68.110 + if ( ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid, 68.111 + &old_rte, rte_upper, value) ) 68.112 + { 68.113 + *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg; 68.114 + *(IO_APIC_BASE(apic)+4) = value; 68.115 + return; 68.116 + } 68.117 68.118 /* write new entry to ioapic */ 68.119 *IO_APIC_BASE(apic) = reg; 68.120 @@ -253,7 +269,7 @@ void io_apic_write_remap_rte( 68.121 *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+1); 68.122 } 68.123 68.124 -static void remap_entry_to_msi_msg( 68.125 +static int remap_entry_to_msi_msg( 68.126 struct iommu *iommu, struct msi_msg *msg) 68.127 { 68.128 struct iremap_entry *iremap_entry = NULL, *iremap_entries; 68.129 @@ -266,7 +282,7 @@ static void remap_entry_to_msi_msg( 68.130 { 68.131 dprintk(XENLOG_ERR VTDPREFIX, 68.132 "remap_entry_to_msi_msg: ir_ctl == NULL"); 68.133 - return; 68.134 + return -EFAULT; 68.135 } 68.136 68.137 remap_rte = (struct msi_msg_remap_entry *) msg; 68.138 @@ -274,8 +290,12 @@ static void remap_entry_to_msi_msg( 68.139 remap_rte->address_lo.index_0_14; 68.140 68.141 if ( index > ir_ctrl->iremap_index ) 68.142 - panic("%s: index (%d) is larger than remap table entry size (%d)\n", 68.143 - __func__, index, ir_ctrl->iremap_index); 68.144 + { 68.145 + dprintk(XENLOG_ERR VTDPREFIX, 68.146 + "%s: index (%d) is larger than remap table entry size (%d)\n", 68.147 + __func__, index, ir_ctrl->iremap_index); 68.148 + return -EFAULT; 68.149 + } 68.150 68.151 spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); 68.152 68.153 @@ -304,9 +324,10 @@ static void remap_entry_to_msi_msg( 68.154 68.155 unmap_vtd_domain_page(iremap_entries); 68.156 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); 68.157 + return 0; 68.158 } 68.159 68.160 -static void msi_msg_to_remap_entry( 68.161 +static int msi_msg_to_remap_entry( 68.162 struct iommu *iommu, struct pci_dev *pdev, struct msi_msg *msg) 68.163 { 68.164 struct iremap_entry *iremap_entry = NULL, *iremap_entries; 68.165 @@ -343,7 +364,15 @@ static void msi_msg_to_remap_entry( 68.166 index = i; 68.167 68.168 if ( index > IREMAP_ENTRY_NR - 1 ) 68.169 - panic("msi_msg_to_remap_entry: intremap index is more than 256!\n"); 68.170 + { 68.171 + dprintk(XENLOG_ERR VTDPREFIX, 68.172 + "%s: intremap index (%d) is larger than" 68.173 + " the maximum index (%ld)!\n", 68.174 + __func__, index, IREMAP_ENTRY_NR - 1); 68.175 + unmap_vtd_domain_page(iremap_entries); 68.176 + spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); 68.177 + return -EFAULT; 68.178 + } 68.179 68.180 iremap_entry = &iremap_entries[index]; 68.181 memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry)); 68.182 @@ -385,7 +414,7 @@ static void msi_msg_to_remap_entry( 68.183 68.184 unmap_vtd_domain_page(iremap_entries); 68.185 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); 68.186 - return; 68.187 + return 0; 68.188 } 68.189 68.190 void msi_msg_read_remap_rte(
69.1 --- a/xen/drivers/passthrough/vtd/iommu.c Tue Sep 02 16:34:53 2008 -0700 69.2 +++ b/xen/drivers/passthrough/vtd/iommu.c Tue Sep 02 16:55:55 2008 -0700 69.3 @@ -624,15 +624,10 @@ static int iommu_set_root_entry(struct i 69.4 unsigned long flags; 69.5 s_time_t start_time; 69.6 69.7 - if ( iommu->root_maddr != 0 ) 69.8 - { 69.9 - free_pgtable_maddr(iommu->root_maddr); 69.10 - iommu->root_maddr = 0; 69.11 - } 69.12 - 69.13 spin_lock_irqsave(&iommu->register_lock, flags); 69.14 69.15 - iommu->root_maddr = alloc_pgtable_maddr(); 69.16 + if ( iommu->root_maddr == 0 ) 69.17 + iommu->root_maddr = alloc_pgtable_maddr(); 69.18 if ( iommu->root_maddr == 0 ) 69.19 { 69.20 spin_unlock_irqrestore(&iommu->register_lock, flags); 69.21 @@ -1864,37 +1859,31 @@ static int intel_iommu_group_id(u8 bus, 69.22 return -1; 69.23 } 69.24 69.25 -u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS]; 69.26 +static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS]; 69.27 int iommu_suspend(void) 69.28 { 69.29 struct acpi_drhd_unit *drhd; 69.30 struct iommu *iommu; 69.31 - int i = 0; 69.32 + u32 i; 69.33 + 69.34 + if ( !vtd_enabled ) 69.35 + return 0; 69.36 69.37 iommu_flush_all(); 69.38 69.39 for_each_drhd_unit ( drhd ) 69.40 { 69.41 iommu = drhd->iommu; 69.42 - iommu_state[DMAR_RTADDR_REG * i] = 69.43 - (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG); 69.44 - iommu_state[DMAR_FECTL_REG * i] = 69.45 + i = iommu->index; 69.46 + 69.47 + iommu_state[i][DMAR_FECTL_REG] = 69.48 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG); 69.49 - iommu_state[DMAR_FEDATA_REG * i] = 69.50 + iommu_state[i][DMAR_FEDATA_REG] = 69.51 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG); 69.52 - iommu_state[DMAR_FEADDR_REG * i] = 69.53 + iommu_state[i][DMAR_FEADDR_REG] = 69.54 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG); 69.55 - iommu_state[DMAR_FEUADDR_REG * i] = 69.56 + iommu_state[i][DMAR_FEUADDR_REG] = 69.57 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG); 69.58 - iommu_state[DMAR_PLMBASE_REG * i] = 69.59 - (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG); 69.60 - iommu_state[DMAR_PLMLIMIT_REG * i] = 69.61 - (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG); 69.62 - iommu_state[DMAR_PHMBASE_REG * i] = 69.63 - (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG); 69.64 - iommu_state[DMAR_PHMLIMIT_REG * i] = 69.65 - (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG); 69.66 - i++; 69.67 } 69.68 69.69 return 0; 69.70 @@ -1904,37 +1893,34 @@ int iommu_resume(void) 69.71 { 69.72 struct acpi_drhd_unit *drhd; 69.73 struct iommu *iommu; 69.74 - int i = 0; 69.75 + u32 i; 69.76 + 69.77 + if ( !vtd_enabled ) 69.78 + return 0; 69.79 69.80 iommu_flush_all(); 69.81 69.82 - init_vtd_hw(); 69.83 + if ( init_vtd_hw() != 0 && force_iommu ) 69.84 + panic("IOMMU setup failed, crash Xen for security purpose!\n"); 69.85 + 69.86 for_each_drhd_unit ( drhd ) 69.87 { 69.88 iommu = drhd->iommu; 69.89 - dmar_writeq( iommu->reg, DMAR_RTADDR_REG, 69.90 - (u64) iommu_state[DMAR_RTADDR_REG * i]); 69.91 + i = iommu->index; 69.92 + 69.93 dmar_writel(iommu->reg, DMAR_FECTL_REG, 69.94 - (u32) iommu_state[DMAR_FECTL_REG * i]); 69.95 + (u32) iommu_state[i][DMAR_FECTL_REG]); 69.96 dmar_writel(iommu->reg, DMAR_FEDATA_REG, 69.97 - (u32) iommu_state[DMAR_FEDATA_REG * i]); 69.98 + (u32) iommu_state[i][DMAR_FEDATA_REG]); 69.99 dmar_writel(iommu->reg, DMAR_FEADDR_REG, 69.100 - (u32) iommu_state[DMAR_FEADDR_REG * i]); 69.101 + (u32) iommu_state[i][DMAR_FEADDR_REG]); 69.102 dmar_writel(iommu->reg, DMAR_FEUADDR_REG, 69.103 - (u32) iommu_state[DMAR_FEUADDR_REG * i]); 69.104 - dmar_writel(iommu->reg, DMAR_PLMBASE_REG, 69.105 - (u32) iommu_state[DMAR_PLMBASE_REG * i]); 69.106 - dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG, 69.107 - (u32) iommu_state[DMAR_PLMLIMIT_REG * i]); 69.108 - dmar_writeq(iommu->reg, DMAR_PHMBASE_REG, 69.109 - (u64) iommu_state[DMAR_PHMBASE_REG * i]); 69.110 - dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG, 69.111 - (u64) iommu_state[DMAR_PHMLIMIT_REG * i]); 69.112 + (u32) iommu_state[i][DMAR_FEUADDR_REG]); 69.113 69.114 if ( iommu_enable_translation(iommu) ) 69.115 return -EIO; 69.116 - i++; 69.117 } 69.118 + 69.119 return 0; 69.120 } 69.121
70.1 --- a/xen/include/asm-x86/io_apic.h Tue Sep 02 16:34:53 2008 -0700 70.2 +++ b/xen/include/asm-x86/io_apic.h Tue Sep 02 16:55:55 2008 -0700 70.3 @@ -125,7 +125,7 @@ extern int mpc_default_type; 70.4 70.5 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 70.6 { 70.7 - if (vtd_enabled) 70.8 + if (iommu_enabled) 70.9 return io_apic_read_remap_rte(apic, reg); 70.10 *IO_APIC_BASE(apic) = reg; 70.11 return *(IO_APIC_BASE(apic)+4); 70.12 @@ -152,6 +152,8 @@ extern int sis_apic_bug; 70.13 #endif 70.14 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) 70.15 { 70.16 + if (iommu_enabled) 70.17 + return iommu_update_ire_from_apic(apic, reg, value); 70.18 if (sis_apic_bug) 70.19 *IO_APIC_BASE(apic) = reg; 70.20 *(IO_APIC_BASE(apic)+4) = value;
71.1 --- a/xen/include/asm-x86/mm.h Tue Sep 02 16:34:53 2008 -0700 71.2 +++ b/xen/include/asm-x86/mm.h Tue Sep 02 16:55:55 2008 -0700 71.3 @@ -59,6 +59,17 @@ struct page_info 71.4 u32 tlbflush_timestamp; 71.5 71.6 /* 71.7 + * When PGT_partial is true then this field is valid and indicates 71.8 + * that PTEs in the range [0, @nr_validated_ptes) have been validated. 71.9 + * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been 71.10 + * partially validated. 71.11 + */ 71.12 + struct { 71.13 + u16 nr_validated_ptes; 71.14 + bool_t partial_pte; 71.15 + }; 71.16 + 71.17 + /* 71.18 * Guest pages with a shadow. This does not conflict with 71.19 * tlbflush_timestamp since page table pages are explicitly not 71.20 * tracked for TLB-flush avoidance when a guest runs in shadow mode. 71.21 @@ -86,9 +97,12 @@ struct page_info 71.22 /* PAE only: is this an L2 page directory containing Xen-private mappings? */ 71.23 #define _PGT_pae_xen_l2 26 71.24 #define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2) 71.25 +/* Has this page been *partially* validated for use as its current type? */ 71.26 +#define _PGT_partial 25 71.27 +#define PGT_partial (1U<<_PGT_partial) 71.28 71.29 - /* 26-bit count of uses of this frame as its current type. */ 71.30 -#define PGT_count_mask ((1U<<26)-1) 71.31 + /* 25-bit count of uses of this frame as its current type. */ 71.32 +#define PGT_count_mask ((1U<<25)-1) 71.33 71.34 /* Cleared when the owning guest 'frees' this page. */ 71.35 #define _PGC_allocated 31 71.36 @@ -154,7 +168,8 @@ extern unsigned long max_page; 71.37 extern unsigned long total_pages; 71.38 void init_frametable(void); 71.39 71.40 -void free_page_type(struct page_info *page, unsigned long type); 71.41 +int free_page_type(struct page_info *page, unsigned long type, 71.42 + int preemptible); 71.43 int _shadow_mode_refcounts(struct domain *d); 71.44 71.45 void cleanup_page_cacheattr(struct page_info *page); 71.46 @@ -165,6 +180,8 @@ void put_page(struct page_info *page); 71.47 int get_page(struct page_info *page, struct domain *domain); 71.48 void put_page_type(struct page_info *page); 71.49 int get_page_type(struct page_info *page, unsigned long type); 71.50 +int put_page_type_preemptible(struct page_info *page); 71.51 +int get_page_type_preemptible(struct page_info *page, unsigned long type); 71.52 int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d); 71.53 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d); 71.54 71.55 @@ -174,6 +191,19 @@ static inline void put_page_and_type(str 71.56 put_page(page); 71.57 } 71.58 71.59 +static inline int put_page_and_type_preemptible(struct page_info *page, 71.60 + int preemptible) 71.61 +{ 71.62 + int rc = 0; 71.63 + 71.64 + if ( preemptible ) 71.65 + rc = put_page_type_preemptible(page); 71.66 + else 71.67 + put_page_type(page); 71.68 + if ( likely(rc == 0) ) 71.69 + put_page(page); 71.70 + return rc; 71.71 +} 71.72 71.73 static inline int get_page_and_type(struct page_info *page, 71.74 struct domain *domain,
72.1 --- a/xen/include/asm-x86/msr-index.h Tue Sep 02 16:34:53 2008 -0700 72.2 +++ b/xen/include/asm-x86/msr-index.h Tue Sep 02 16:55:55 2008 -0700 72.3 @@ -187,15 +187,30 @@ 72.4 #define MSR_K8_VM_CR 0xc0010114 72.5 #define MSR_K8_VM_HSAVE_PA 0xc0010117 72.6 72.7 +#define MSR_K8_FEATURE_MASK 0xc0011004 72.8 +#define MSR_K8_EXT_FEATURE_MASK 0xc0011005 72.9 + 72.10 /* MSR_K8_VM_CR bits: */ 72.11 #define _K8_VMCR_SVME_DISABLE 4 72.12 #define K8_VMCR_SVME_DISABLE (1 << _K8_VMCR_SVME_DISABLE) 72.13 72.14 +/* AMD64 MSRs */ 72.15 +#define MSR_AMD64_NB_CFG 0xc001001f 72.16 +#define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT 46 72.17 + 72.18 /* AMD Family10h machine check MSRs */ 72.19 #define MSR_F10_MC4_MISC1 0xc0000408 72.20 #define MSR_F10_MC4_MISC2 0xc0000409 72.21 #define MSR_F10_MC4_MISC3 0xc000040A 72.22 72.23 +/* Other AMD Fam10h MSRs */ 72.24 +#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 72.25 +#define FAM10H_MMIO_CONF_ENABLE_BIT 0 72.26 +#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf 72.27 +#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 72.28 +#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff 72.29 +#define FAM10H_MMIO_CONF_BASE_SHIFT 20 72.30 + 72.31 /* K6 MSRs */ 72.32 #define MSR_K6_EFER 0xc0000080 72.33 #define MSR_K6_STAR 0xc0000081
73.1 --- a/xen/include/asm-x86/processor.h Tue Sep 02 16:34:53 2008 -0700 73.2 +++ b/xen/include/asm-x86/processor.h Tue Sep 02 16:55:55 2008 -0700 73.3 @@ -583,6 +583,8 @@ int rdmsr_hypervisor_regs( 73.4 int wrmsr_hypervisor_regs( 73.5 uint32_t idx, uint32_t eax, uint32_t edx); 73.6 73.7 +int microcode_update(XEN_GUEST_HANDLE(const_void), unsigned long len); 73.8 + 73.9 #endif /* !__ASSEMBLY__ */ 73.10 73.11 #endif /* __ASM_X86_PROCESSOR_H */
74.1 --- a/xen/include/asm-x86/softirq.h Tue Sep 02 16:34:53 2008 -0700 74.2 +++ b/xen/include/asm-x86/softirq.h Tue Sep 02 16:55:55 2008 -0700 74.3 @@ -1,8 +1,9 @@ 74.4 #ifndef __ASM_SOFTIRQ_H__ 74.5 #define __ASM_SOFTIRQ_H__ 74.6 74.7 -#define NMI_MCE_SOFTIRQ (NR_COMMON_SOFTIRQS + 0) 74.8 +#define NMI_MCE_SOFTIRQ (NR_COMMON_SOFTIRQS + 0) 74.9 +#define TIME_CALIBRATE_SOFTIRQ (NR_COMMON_SOFTIRQS + 1) 74.10 74.11 -#define NR_ARCH_SOFTIRQS 1 74.12 +#define NR_ARCH_SOFTIRQS 2 74.13 74.14 #endif /* __ASM_SOFTIRQ_H__ */
75.1 --- a/xen/include/public/memory.h Tue Sep 02 16:34:53 2008 -0700 75.2 +++ b/xen/include/public/memory.h Tue Sep 02 16:55:55 2008 -0700 75.3 @@ -204,6 +204,7 @@ struct xen_add_to_physmap { 75.4 /* Source mapping space. */ 75.5 #define XENMAPSPACE_shared_info 0 /* shared info page */ 75.6 #define XENMAPSPACE_grant_table 1 /* grant table page */ 75.7 +#define XENMAPSPACE_mfn 2 /* usual MFN */ 75.8 unsigned int space; 75.9 75.10 /* Index into source mapping space. */ 75.11 @@ -216,6 +217,22 @@ typedef struct xen_add_to_physmap xen_ad 75.12 DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); 75.13 75.14 /* 75.15 + * Unmaps the page appearing at a particular GPFN from the specified guest's 75.16 + * pseudophysical address space. 75.17 + * arg == addr of xen_remove_from_physmap_t. 75.18 + */ 75.19 +#define XENMEM_remove_from_physmap 15 75.20 +struct xen_remove_from_physmap { 75.21 + /* Which domain to change the mapping for. */ 75.22 + domid_t domid; 75.23 + 75.24 + /* GPFN of the current mapping of the page. */ 75.25 + xen_pfn_t gpfn; 75.26 +}; 75.27 +typedef struct xen_remove_from_physmap xen_remove_from_physmap_t; 75.28 +DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t); 75.29 + 75.30 +/* 75.31 * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error 75.32 * code on failure. This call only works for auto-translated guests. 75.33 */
76.1 --- a/xen/include/public/platform.h Tue Sep 02 16:34:53 2008 -0700 76.2 +++ b/xen/include/public/platform.h Tue Sep 02 16:55:55 2008 -0700 76.3 @@ -97,7 +97,7 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_read_memty 76.4 #define XENPF_microcode_update 35 76.5 struct xenpf_microcode_update { 76.6 /* IN variables. */ 76.7 - XEN_GUEST_HANDLE(void) data; /* Pointer to microcode data */ 76.8 + XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */ 76.9 uint32_t length; /* Length of microcode data. */ 76.10 }; 76.11 typedef struct xenpf_microcode_update xenpf_microcode_update_t;
77.1 --- a/xen/include/xen/compat.h Tue Sep 02 16:34:53 2008 -0700 77.2 +++ b/xen/include/xen/compat.h Tue Sep 02 16:55:55 2008 -0700 77.3 @@ -19,7 +19,9 @@ 77.4 type *_[0] __attribute__((__packed__)); \ 77.5 } __compat_handle_ ## name 77.6 77.7 -#define DEFINE_COMPAT_HANDLE(name) __DEFINE_COMPAT_HANDLE(name, name) 77.8 +#define DEFINE_COMPAT_HANDLE(name) \ 77.9 + __DEFINE_COMPAT_HANDLE(name, name); \ 77.10 + __DEFINE_COMPAT_HANDLE(const_ ## name, const name) 77.11 #define COMPAT_HANDLE(name) __compat_handle_ ## name 77.12 77.13 /* Is the compat handle a NULL reference? */
78.1 --- a/xen/include/xen/iommu.h Tue Sep 02 16:34:53 2008 -0700 78.2 +++ b/xen/include/xen/iommu.h Tue Sep 02 16:55:55 2008 -0700 78.3 @@ -109,4 +109,8 @@ struct iommu_ops { 78.4 78.5 void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value); 78.6 void iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg); 78.7 + 78.8 +int iommu_suspend(void); 78.9 +int iommu_resume(void); 78.10 + 78.11 #endif /* _IOMMU_H_ */
79.1 --- a/xen/include/xen/timer.h Tue Sep 02 16:34:53 2008 -0700 79.2 +++ b/xen/include/xen/timer.h Tue Sep 02 16:55:55 2008 -0700 79.3 @@ -14,16 +14,29 @@ 79.4 79.5 struct timer { 79.6 /* System time expiry value (nanoseconds since boot). */ 79.7 - s_time_t expires; 79.8 - /* CPU on which this timer will be installed and executed. */ 79.9 - unsigned int cpu; 79.10 + s_time_t expires; 79.11 + 79.12 + /* Position in active-timer data structure. */ 79.13 + union { 79.14 + /* Timer-heap offset. */ 79.15 + unsigned int heap_offset; 79.16 + /* Overflow linked list. */ 79.17 + struct timer *list_next; 79.18 + }; 79.19 + 79.20 /* On expiry, '(*function)(data)' will be executed in softirq context. */ 79.21 - void (*function)(void *); 79.22 - void *data; 79.23 - /* Timer-heap offset. */ 79.24 - unsigned int heap_offset; 79.25 - /* Has this timer been killed (cannot be activated)? */ 79.26 - int killed; 79.27 + void (*function)(void *); 79.28 + void *data; 79.29 + 79.30 + /* CPU on which this timer will be installed and executed. */ 79.31 + uint16_t cpu; 79.32 + 79.33 + /* Timer status. */ 79.34 +#define TIMER_STATUS_inactive 0 /* Not in use; can be activated. */ 79.35 +#define TIMER_STATUS_killed 1 /* Not in use; canot be activated. */ 79.36 +#define TIMER_STATUS_in_heap 2 /* In use; on timer heap. */ 79.37 +#define TIMER_STATUS_in_list 3 /* In use; on overflow linked list. */ 79.38 + uint8_t status; 79.39 }; 79.40 79.41 /* 79.42 @@ -37,7 +50,7 @@ struct timer { 79.43 */ 79.44 static inline int active_timer(struct timer *timer) 79.45 { 79.46 - return (timer->heap_offset != 0); 79.47 + return (timer->status >= TIMER_STATUS_in_heap); 79.48 } 79.49 79.50 /*
80.1 --- a/xen/include/xlat.lst Tue Sep 02 16:34:53 2008 -0700 80.2 +++ b/xen/include/xlat.lst Tue Sep 02 16:55:55 2008 -0700 80.3 @@ -33,6 +33,7 @@ 80.4 ! kexec_image kexec.h 80.5 ! kexec_range kexec.h 80.6 ! add_to_physmap memory.h 80.7 +! remove_from_physmap memory.h 80.8 ! foreign_memory_map memory.h 80.9 ! memory_exchange memory.h 80.10 ! memory_map memory.h
81.1 --- a/xen/include/xsm/xsm.h Tue Sep 02 16:34:53 2008 -0700 81.2 +++ b/xen/include/xsm/xsm.h Tue Sep 02 16:55:55 2008 -0700 81.3 @@ -136,6 +136,7 @@ struct xsm_operations { 81.4 int (*mmu_machphys_update) (struct domain *d, unsigned long mfn); 81.5 int (*update_va_mapping) (struct domain *d, l1_pgentry_t pte); 81.6 int (*add_to_physmap) (struct domain *d1, struct domain *d2); 81.7 + int (*remove_from_physmap) (struct domain *d1, struct domain *d2); 81.8 #endif 81.9 }; 81.10 81.11 @@ -532,6 +533,11 @@ static inline int xsm_add_to_physmap(str 81.12 { 81.13 return xsm_call(add_to_physmap(d1, d2)); 81.14 } 81.15 + 81.16 +static inline int xsm_remove_from_physmap(struct domain *d1, struct domain *d2) 81.17 +{ 81.18 + return xsm_call(remove_from_physmap(d1, d2)); 81.19 +} 81.20 #endif /* CONFIG_X86 */ 81.21 81.22 #endif /* __XSM_H */
82.1 --- a/xen/xsm/dummy.c Tue Sep 02 16:34:53 2008 -0700 82.2 +++ b/xen/xsm/dummy.c Tue Sep 02 16:55:55 2008 -0700 82.3 @@ -385,6 +385,11 @@ static int dummy_add_to_physmap (struct 82.4 { 82.5 return 0; 82.6 } 82.7 + 82.8 +static int dummy_remove_from_physmap (struct domain *d1, struct domain *d2) 82.9 +{ 82.10 + return 0; 82.11 +} 82.12 #endif 82.13 82.14 struct xsm_operations dummy_xsm_ops; 82.15 @@ -484,5 +489,6 @@ void xsm_fixup_ops (struct xsm_operation 82.16 set_to_dummy_if_null(ops, mmu_machphys_update); 82.17 set_to_dummy_if_null(ops, update_va_mapping); 82.18 set_to_dummy_if_null(ops, add_to_physmap); 82.19 + set_to_dummy_if_null(ops, remove_from_physmap); 82.20 #endif 82.21 }
83.1 --- a/xen/xsm/flask/hooks.c Tue Sep 02 16:34:53 2008 -0700 83.2 +++ b/xen/xsm/flask/hooks.c Tue Sep 02 16:55:55 2008 -0700 83.3 @@ -1028,6 +1028,11 @@ static int flask_add_to_physmap(struct d 83.4 { 83.5 return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__PHYSMAP); 83.6 } 83.7 + 83.8 +static int flask_remove_from_physmap(struct domain *d1, struct domain *d2) 83.9 +{ 83.10 + return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__PHYSMAP); 83.11 +} 83.12 #endif 83.13 83.14 long do_flask_op(XEN_GUEST_HANDLE(xsm_op_t) u_flask_op); 83.15 @@ -1115,6 +1120,7 @@ static struct xsm_operations flask_ops = 83.16 .mmu_machphys_update = flask_mmu_machphys_update, 83.17 .update_va_mapping = flask_update_va_mapping, 83.18 .add_to_physmap = flask_add_to_physmap, 83.19 + .remove_from_physmap = flask_remove_from_physmap, 83.20 #endif 83.21 }; 83.22