debuggers.hg

view xen/common/dom0_ops.c @ 2671:44ab932026a6

bitkeeper revision 1.1159.1.227 (416ca176osUUBQBGiZS4hIALhIyUsg)

Slightly smarter initial CPU placement.
author iap10@labyrinth.cl.cam.ac.uk
date Wed Oct 13 03:31:02 2004 +0000 (2004-10-13)
parents 875c25208085
children dc59c5558adc d7ee91c5f868
line source
1 /******************************************************************************
2 * dom0_ops.c
3 *
4 * Process command requests from domain-0 guest OS.
5 *
6 * Copyright (c) 2002, K A Fraser
7 */
9 #include <xen/config.h>
10 #include <xen/types.h>
11 #include <xen/lib.h>
12 #include <xen/mm.h>
13 #include <hypervisor-ifs/dom0_ops.h>
14 #include <xen/sched.h>
15 #include <xen/event.h>
16 #include <asm/domain_page.h>
17 #include <asm/pdb.h>
18 #include <xen/trace.h>
19 #include <xen/console.h>
20 #include <asm/shadow.h>
21 #include <hypervisor-ifs/sched_ctl.h>
23 #define TRC_DOM0OP_ENTER_BASE 0x00020000
24 #define TRC_DOM0OP_LEAVE_BASE 0x00030000
26 extern unsigned int alloc_new_dom_mem(struct domain *, unsigned int);
27 extern long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op);
28 extern void arch_getdomaininfo_ctxt(
29 struct domain *, full_execution_context_t *);
31 static inline int is_free_domid(domid_t dom)
32 {
33 struct domain *d;
35 if ( dom >= DOMID_FIRST_RESERVED )
36 return 0;
38 if ( (d = find_domain_by_id(dom)) == NULL )
39 return 1;
41 put_domain(d);
42 return 0;
43 }
45 /** Allocate a free domain id. We try to reuse domain ids in a fairly low range,
46 * only expanding the range when there are no free domain ids. This is to
47 * keep domain ids in a range depending on the number that exist simultaneously,
48 * rather than incrementing domain ids in the full 32-bit range.
49 */
50 static int allocate_domid(domid_t *pdom)
51 {
52 static spinlock_t domid_lock = SPIN_LOCK_UNLOCKED;
53 static domid_t curdom = 0;
54 static domid_t topdom = 101;
55 int err = 0;
56 domid_t dom;
58 spin_lock(&domid_lock);
60 /* Try to use a domain id in the range 0..topdom, starting at curdom. */
61 for ( dom = curdom + 1; dom != curdom; dom++ )
62 {
63 if ( dom == topdom )
64 dom = 1;
65 if ( is_free_domid(dom) )
66 goto exit;
67 }
69 /* Couldn't find a free domain id in 0..topdom, try higher. */
70 for ( dom = topdom; dom < DOMID_FIRST_RESERVED; dom++ )
71 {
72 if ( is_free_domid(dom) )
73 {
74 topdom = dom + 1;
75 goto exit;
76 }
77 }
79 /* No free domain ids. */
80 err = -ENOMEM;
82 exit:
83 if ( err == 0 )
84 {
85 curdom = dom;
86 *pdom = dom;
87 }
89 spin_unlock(&domid_lock);
90 return err;
91 }
93 long do_dom0_op(dom0_op_t *u_dom0_op)
94 {
95 long ret = 0;
96 dom0_op_t curop, *op = &curop;
98 if ( !IS_PRIV(current) )
99 return -EPERM;
101 if ( copy_from_user(op, u_dom0_op, sizeof(*op)) )
102 {
103 return -EFAULT;
104 }
106 if ( op->interface_version != DOM0_INTERFACE_VERSION )
107 {
108 return -EACCES;
109 }
111 TRACE_5D(TRC_DOM0OP_ENTER_BASE + op->cmd,
112 0, op->u.dummy[0], op->u.dummy[1],
113 op->u.dummy[2], op->u.dummy[3] );
115 switch ( op->cmd )
116 {
118 case DOM0_BUILDDOMAIN:
119 {
120 struct domain *d = find_domain_by_id(op->u.builddomain.domain);
121 ret = -EINVAL;
122 if ( d != NULL )
123 {
124 ret = final_setup_guestos(d, &op->u.builddomain);
125 put_domain(d);
126 }
127 }
128 break;
130 case DOM0_PAUSEDOMAIN:
131 {
132 struct domain *d = find_domain_by_id(op->u.pausedomain.domain);
133 ret = -ESRCH;
134 if ( d != NULL )
135 {
136 ret = -EINVAL;
137 if ( d != current )
138 {
139 domain_pause_by_systemcontroller(d);
140 ret = 0;
141 }
142 put_domain(d);
143 }
144 }
145 break;
147 case DOM0_UNPAUSEDOMAIN:
148 {
149 struct domain *d = find_domain_by_id(op->u.unpausedomain.domain);
150 ret = -ESRCH;
151 if ( d != NULL )
152 {
153 ret = -EINVAL;
154 if ( test_bit(DF_CONSTRUCTED, &d->flags) )
155 {
156 domain_unpause_by_systemcontroller(d);
157 ret = 0;
158 }
159 put_domain(d);
160 }
161 }
162 break;
164 case DOM0_CREATEDOMAIN:
165 {
166 struct domain *d;
167 unsigned int pro;
168 domid_t dom;
170 dom = op->u.createdomain.domain;
171 if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
172 {
173 ret = -EINVAL;
174 if ( !is_free_domid(dom) )
175 break;
176 }
177 else if ( (ret = allocate_domid(&dom)) != 0 )
178 break;
180 if ( op->u.createdomain.cpu == -1 )
181 {
182 /* Do an initial placement. Fix me for hyperthreading! */
183 struct domain *d;
184 int i, j=0, c[smp_num_cpus];
186 pro=0; /* keep compiler happy */
188 for (i=0;i<smp_num_cpus;i++)
189 c[i]=0;
191 for_each_domain ( d ) {
192 c[d->processor]++;
193 j++;
194 }
196 for (i=0;i<smp_num_cpus;i++) {
197 if( c[i]<j )
198 {
199 j = c[i];
200 pro = i;
201 }
202 }
203 }
204 else
205 pro = op->u.createdomain.cpu % smp_num_cpus;
207 ret = -ENOMEM;
208 if ( (d = do_createdomain(dom, pro)) == NULL )
209 break;
211 if ( op->u.createdomain.name[0] )
212 {
213 strncpy(d->name, op->u.createdomain.name, MAX_DOMAIN_NAME);
214 d->name[MAX_DOMAIN_NAME - 1] = '\0';
215 }
217 ret = alloc_new_dom_mem(d, op->u.createdomain.memory_kb);
218 if ( ret != 0 )
219 {
220 domain_kill(d);
221 break;
222 }
224 ret = 0;
226 op->u.createdomain.domain = d->domain;
227 copy_to_user(u_dom0_op, op, sizeof(*op));
228 }
229 break;
231 case DOM0_DESTROYDOMAIN:
232 {
233 struct domain *d = find_domain_by_id(op->u.destroydomain.domain);
234 ret = -ESRCH;
235 if ( d != NULL )
236 {
237 ret = -EINVAL;
238 if ( d != current )
239 {
240 domain_kill(d);
241 ret = 0;
242 }
243 put_domain(d);
244 }
245 }
246 break;
248 case DOM0_PINCPUDOMAIN:
249 {
250 domid_t dom = op->u.pincpudomain.domain;
251 struct domain *d = find_domain_by_id(dom);
252 int cpu = op->u.pincpudomain.cpu;
254 if ( d == NULL )
255 {
256 ret = -ESRCH;
257 break;
258 }
260 if ( d == current )
261 {
262 ret = -EINVAL;
263 put_domain(d);
264 break;
265 }
267 if ( cpu == -1 )
268 {
269 clear_bit(DF_CPUPINNED, &d->flags);
270 }
271 else
272 {
273 domain_pause(d);
274 if(d->processor != cpu % smp_num_cpus)
275 set_bit(DF_MIGRATED, &d->flags);
276 set_bit(DF_CPUPINNED, &d->flags);
277 d->processor = cpu % smp_num_cpus;
278 domain_unpause(d);
279 }
281 put_domain(d);
282 }
283 break;
285 case DOM0_SCHEDCTL:
286 {
287 ret = sched_ctl(&op->u.schedctl);
288 copy_to_user(u_dom0_op, op, sizeof(*op));
289 }
290 break;
292 case DOM0_ADJUSTDOM:
293 {
294 ret = sched_adjdom(&op->u.adjustdom);
295 copy_to_user(u_dom0_op, op, sizeof(*op));
296 }
297 break;
299 case DOM0_GETMEMLIST:
300 {
301 int i;
302 struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
303 unsigned long max_pfns = op->u.getmemlist.max_pfns;
304 unsigned long pfn;
305 unsigned long *buffer = op->u.getmemlist.buffer;
306 struct list_head *list_ent;
308 ret = -EINVAL;
309 if ( d != NULL )
310 {
311 ret = 0;
313 spin_lock(&d->page_alloc_lock);
314 list_ent = d->page_list.next;
315 for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
316 {
317 pfn = list_entry(list_ent, struct pfn_info, list) -
318 frame_table;
319 if ( put_user(pfn, buffer) )
320 {
321 ret = -EFAULT;
322 break;
323 }
324 buffer++;
325 list_ent = frame_table[pfn].list.next;
326 }
327 spin_unlock(&d->page_alloc_lock);
329 op->u.getmemlist.num_pfns = i;
330 copy_to_user(u_dom0_op, op, sizeof(*op));
332 put_domain(d);
333 }
334 }
335 break;
337 case DOM0_GETDOMAININFO:
338 {
339 full_execution_context_t *c;
340 struct domain *d;
341 unsigned long flags;
343 read_lock_irqsave(&tasklist_lock, flags);
345 for_each_domain ( d )
346 {
347 if ( d->domain >= op->u.getdomaininfo.domain )
348 break;
349 }
351 if ( (d == NULL) || !get_domain(d) )
352 {
353 read_unlock_irqrestore(&tasklist_lock, flags);
354 ret = -ESRCH;
355 break;
356 }
358 read_unlock_irqrestore(&tasklist_lock, flags);
360 op->u.getdomaininfo.domain = d->domain;
361 strcpy(op->u.getdomaininfo.name, d->name);
363 op->u.getdomaininfo.flags =
364 (test_bit(DF_DYING, &d->flags) ? DOMFLAGS_DYING : 0) |
365 (test_bit(DF_CRASHED, &d->flags) ? DOMFLAGS_CRASHED : 0) |
366 (test_bit(DF_SHUTDOWN, &d->flags) ? DOMFLAGS_SHUTDOWN : 0) |
367 (test_bit(DF_CTRLPAUSE, &d->flags) ? DOMFLAGS_PAUSED : 0) |
368 (test_bit(DF_BLOCKED, &d->flags) ? DOMFLAGS_BLOCKED : 0) |
369 (test_bit(DF_RUNNING, &d->flags) ? DOMFLAGS_RUNNING : 0);
371 op->u.getdomaininfo.flags |= d->processor << DOMFLAGS_CPUSHIFT;
372 op->u.getdomaininfo.flags |=
373 d->shutdown_code << DOMFLAGS_SHUTDOWNSHIFT;
375 op->u.getdomaininfo.tot_pages = d->tot_pages;
376 op->u.getdomaininfo.max_pages = d->max_pages;
377 op->u.getdomaininfo.cpu_time = d->cpu_time;
378 op->u.getdomaininfo.shared_info_frame =
379 __pa(d->shared_info) >> PAGE_SHIFT;
381 if ( op->u.getdomaininfo.ctxt != NULL )
382 {
383 if ( (c = xmalloc(sizeof(*c))) == NULL )
384 {
385 ret = -ENOMEM;
386 put_domain(d);
387 break;
388 }
390 if ( d != current )
391 domain_pause(d);
393 arch_getdomaininfo_ctxt(d,c);
395 if ( d != current )
396 domain_unpause(d);
398 if ( copy_to_user(op->u.getdomaininfo.ctxt, c, sizeof(*c)) )
399 ret = -EINVAL;
401 if ( c != NULL )
402 xfree(c);
403 }
405 if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
406 ret = -EINVAL;
408 put_domain(d);
409 }
410 break;
412 case DOM0_GETPAGEFRAMEINFO:
413 {
414 struct pfn_info *page;
415 unsigned long pfn = op->u.getpageframeinfo.pfn;
416 domid_t dom = op->u.getpageframeinfo.domain;
417 struct domain *d;
419 ret = -EINVAL;
421 if ( unlikely(pfn >= max_page) ||
422 unlikely((d = find_domain_by_id(dom)) == NULL) )
423 break;
425 page = &frame_table[pfn];
427 if ( likely(get_page(page, d)) )
428 {
429 ret = 0;
431 op->u.getpageframeinfo.type = NOTAB;
433 if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
434 {
435 switch ( page->u.inuse.type_info & PGT_type_mask )
436 {
437 case PGT_l1_page_table:
438 op->u.getpageframeinfo.type = L1TAB;
439 break;
440 case PGT_l2_page_table:
441 op->u.getpageframeinfo.type = L2TAB;
442 break;
443 case PGT_l3_page_table:
444 op->u.getpageframeinfo.type = L3TAB;
445 break;
446 case PGT_l4_page_table:
447 op->u.getpageframeinfo.type = L4TAB;
448 break;
449 }
450 }
452 put_page(page);
453 }
455 put_domain(d);
457 copy_to_user(u_dom0_op, op, sizeof(*op));
458 }
459 break;
461 case DOM0_IOPL:
462 {
463 extern long do_iopl(domid_t, unsigned int);
464 ret = do_iopl(op->u.iopl.domain, op->u.iopl.iopl);
465 }
466 break;
468 #ifdef XEN_DEBUGGER
469 case DOM0_DEBUG:
470 {
471 pdb_do_debug(op);
472 copy_to_user(u_dom0_op, op, sizeof(*op));
473 ret = 0;
474 }
475 break;
476 #endif
478 case DOM0_SETTIME:
479 {
480 do_settime(op->u.settime.secs,
481 op->u.settime.usecs,
482 op->u.settime.system_time);
483 ret = 0;
484 }
485 break;
487 #ifdef TRACE_BUFFER
488 case DOM0_GETTBUFS:
489 {
490 ret = get_tb_info(&op->u.gettbufs);
491 copy_to_user(u_dom0_op, op, sizeof(*op));
492 }
493 break;
494 #endif
496 case DOM0_READCONSOLE:
497 {
498 ret = read_console_ring(op->u.readconsole.str,
499 op->u.readconsole.count,
500 op->u.readconsole.cmd);
501 }
502 break;
504 case DOM0_PHYSINFO:
505 {
506 extern int phys_proc_id[];
508 dom0_physinfo_t *pi = &op->u.physinfo;
510 int old_id = phys_proc_id[0];
511 int ht = 0;
513 while( ( ht < smp_num_cpus ) && ( phys_proc_id[ht] == old_id ) ) ht++;
515 pi->ht_per_core = ht;
516 pi->cores = smp_num_cpus / pi->ht_per_core;
517 pi->total_pages = max_page;
518 pi->free_pages = avail_domheap_pages();
519 pi->cpu_khz = cpu_khz;
521 copy_to_user(u_dom0_op, op, sizeof(*op));
522 ret = 0;
523 }
524 break;
526 case DOM0_PCIDEV_ACCESS:
527 {
528 extern int physdev_pci_access_modify(domid_t, int, int, int, int);
529 ret = physdev_pci_access_modify(op->u.pcidev_access.domain,
530 op->u.pcidev_access.bus,
531 op->u.pcidev_access.dev,
532 op->u.pcidev_access.func,
533 op->u.pcidev_access.enable);
534 }
535 break;
537 case DOM0_SCHED_ID:
538 {
539 op->u.sched_id.sched_id = sched_id();
540 copy_to_user(u_dom0_op, op, sizeof(*op));
541 ret = 0;
542 }
543 break;
545 case DOM0_SETDOMAINNAME:
546 {
547 struct domain *d;
548 ret = -ESRCH;
549 d = find_domain_by_id( op->u.setdomainname.domain );
550 if ( d != NULL )
551 {
552 strncpy(d->name, op->u.setdomainname.name, MAX_DOMAIN_NAME);
553 put_domain(d);
554 ret = 0;
555 }
556 }
557 break;
559 case DOM0_SETDOMAININITIALMEM:
560 {
561 struct domain *d;
562 ret = -ESRCH;
563 d = find_domain_by_id(op->u.setdomaininitialmem.domain);
564 if ( d != NULL )
565 {
566 /* should only be used *before* domain is built. */
567 if ( !test_bit(DF_CONSTRUCTED, &d->flags) )
568 ret = alloc_new_dom_mem(
569 d, op->u.setdomaininitialmem.initial_memkb );
570 else
571 ret = -EINVAL;
572 put_domain(d);
573 }
574 }
575 break;
577 case DOM0_SETDOMAINMAXMEM:
578 {
579 struct domain *d;
580 ret = -ESRCH;
581 d = find_domain_by_id( op->u.setdomainmaxmem.domain );
582 if ( d != NULL )
583 {
584 d->max_pages =
585 (op->u.setdomainmaxmem.max_memkb+PAGE_SIZE-1)>> PAGE_SHIFT;
586 put_domain(d);
587 ret = 0;
588 }
589 }
590 break;
592 case DOM0_GETPAGEFRAMEINFO2:
593 {
594 #define GPF2_BATCH 128
595 int n,j;
596 int num = op->u.getpageframeinfo2.num;
597 domid_t dom = op->u.getpageframeinfo2.domain;
598 unsigned long *s_ptr = (unsigned long*) op->u.getpageframeinfo2.array;
599 struct domain *d;
600 unsigned long l_arr[GPF2_BATCH];
601 ret = -ESRCH;
603 if ( unlikely((d = find_domain_by_id(dom)) == NULL) )
604 break;
606 if ( unlikely(num > 1024) )
607 {
608 ret = -E2BIG;
609 break;
610 }
612 ret = 0;
613 for( n = 0; n < num; )
614 {
615 int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n);
617 if ( copy_from_user(l_arr, &s_ptr[n], k*sizeof(unsigned long)) )
618 {
619 ret = -EINVAL;
620 break;
621 }
623 for( j = 0; j < k; j++ )
624 {
625 struct pfn_info *page;
626 unsigned long mfn = l_arr[j];
628 if ( unlikely(mfn >= max_page) )
629 goto e2_err;
631 page = &frame_table[mfn];
633 if ( likely(get_page(page, d)) )
634 {
635 unsigned long type = 0;
637 switch( page->u.inuse.type_info & PGT_type_mask )
638 {
639 case PGT_l1_page_table:
640 type = L1TAB;
641 break;
642 case PGT_l2_page_table:
643 type = L2TAB;
644 break;
645 case PGT_l3_page_table:
646 type = L3TAB;
647 break;
648 case PGT_l4_page_table:
649 type = L4TAB;
650 break;
651 }
653 if ( page->u.inuse.type_info & PGT_pinned )
654 type |= LPINTAB;
655 l_arr[j] |= type;
656 put_page(page);
657 }
658 else
659 {
660 e2_err:
661 l_arr[j] |= XTAB;
662 }
664 }
666 if ( copy_to_user(&s_ptr[n], l_arr, k*sizeof(unsigned long)) )
667 {
668 ret = -EINVAL;
669 break;
670 }
672 n += j;
673 }
675 put_domain(d);
676 }
677 break;
679 case DOM0_SETDOMAINVMASSIST:
680 {
681 struct domain *d;
682 ret = -ESRCH;
683 d = find_domain_by_id( op->u.setdomainmaxmem.domain );
684 if ( d != NULL )
685 {
686 vm_assist(d, op->u.setdomainvmassist.cmd,
687 op->u.setdomainvmassist.type);
688 put_domain(d);
689 ret = 0;
690 }
691 }
692 break;
694 default:
695 ret = arch_do_dom0_op(op,u_dom0_op);
697 }
699 TRACE_5D(TRC_DOM0OP_LEAVE_BASE + op->cmd, ret,
700 op->u.dummy[0], op->u.dummy[1], op->u.dummy[2], op->u.dummy[3]);
703 return ret;
704 }