debuggers.hg

view xen/arch/x86/io_apic.c @ 20903:66587cc5f1e1

x86: Polarity-switch method only effective in non-directed EOI case.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jan 26 15:54:09 2010 +0000 (2010-01-26)
parents b543acc1aaad
children 2629c6a79d64
line source
1 /*
2 * Intel IO-APIC support for multi-Pentium hosts.
3 *
4 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
5 *
6 * Many thanks to Stig Venaas for trying out countless experimental
7 * patches and reporting/debugging problems patiently!
8 *
9 * (c) 1999, Multiple IO-APIC support, developed by
10 * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
11 * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
12 * further tested and cleaned up by Zach Brown <zab@redhat.com>
13 * and Ingo Molnar <mingo@redhat.com>
14 *
15 * Fixes
16 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
17 * thanks to Eric Gilmore
18 * and Rolf G. Tews
19 * for testing these extensively
20 * Paul Diefenbaugh : Added full ACPI support
21 */
23 #include <xen/config.h>
24 #include <xen/lib.h>
25 #include <xen/init.h>
26 #include <xen/irq.h>
27 #include <xen/delay.h>
28 #include <xen/sched.h>
29 #include <xen/acpi.h>
30 #include <xen/pci.h>
31 #include <xen/pci_regs.h>
32 #include <xen/keyhandler.h>
33 #include <asm/mc146818rtc.h>
34 #include <asm/smp.h>
35 #include <asm/desc.h>
36 #include <asm/msi.h>
37 #include <mach_apic.h>
38 #include <io_ports.h>
39 #include <public/physdev.h>
41 /* Different to Linux: our implementation can be simpler. */
42 #define make_8259A_irq(irq) (io_apic_irqs &= ~(1<<(irq)))
44 int (*ioapic_renumber_irq)(int ioapic, int irq);
45 atomic_t irq_mis_count;
47 /* Where if anywhere is the i8259 connect in external int mode */
48 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
50 static DEFINE_SPINLOCK(ioapic_lock);
52 int skip_ioapic_setup;
54 #ifndef sis_apic_bug
55 /*
56 * Is the SiS APIC rmw bug present?
57 * -1 = don't know, 0 = no, 1 = yes
58 */
59 int sis_apic_bug = -1;
60 #endif
62 /*
63 * # of IRQ routing registers
64 */
65 int __read_mostly nr_ioapic_registers[MAX_IO_APICS];
66 int __read_mostly nr_ioapics;
68 int disable_timer_pin_1 __initdata;
70 /*
71 * Rough estimation of how many shared IRQs there are, can
72 * be changed anytime.
73 */
74 #define MAX_PLUS_SHARED_IRQS nr_irqs_gsi
75 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + nr_irqs_gsi)
77 /*
78 * This is performance-critical, we want to do it O(1)
79 *
80 * the indexing order of this array favors 1:1 mappings
81 * between pins and IRQs.
82 */
84 static struct irq_pin_list {
85 int apic, pin;
86 unsigned int next;
87 } *irq_2_pin;
89 static unsigned int irq_2_pin_free_entry;
91 /*
92 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
93 * shared ISA-space IRQs, so we have to support them. We are super
94 * fast in the common case, and fast for shared ISA-space IRQs.
95 */
96 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
97 {
98 struct irq_pin_list *entry = irq_2_pin + irq;
100 while (entry->next) {
101 BUG_ON((entry->apic == apic) && (entry->pin == pin));
102 entry = irq_2_pin + entry->next;
103 }
105 BUG_ON((entry->apic == apic) && (entry->pin == pin));
107 if (entry->pin != -1) {
108 if (irq_2_pin_free_entry >= PIN_MAP_SIZE)
109 panic("io_apic.c: whoops");
110 entry->next = irq_2_pin_free_entry;
111 entry = irq_2_pin + entry->next;
112 irq_2_pin_free_entry = entry->next;
113 entry->next = 0;
114 }
115 entry->apic = apic;
116 entry->pin = pin;
117 }
119 /*
120 * Reroute an IRQ to a different pin.
121 */
122 static void __init replace_pin_at_irq(unsigned int irq,
123 int oldapic, int oldpin,
124 int newapic, int newpin)
125 {
126 struct irq_pin_list *entry = irq_2_pin + irq;
128 while (1) {
129 if (entry->apic == oldapic && entry->pin == oldpin) {
130 entry->apic = newapic;
131 entry->pin = newpin;
132 }
133 if (!entry->next)
134 break;
135 entry = irq_2_pin + entry->next;
136 }
137 }
139 static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
140 {
141 struct irq_pin_list *entry = irq_2_pin + irq;
142 unsigned int pin, reg;
144 for (;;) {
145 pin = entry->pin;
146 if (pin == -1)
147 break;
148 reg = io_apic_read(entry->apic, 0x10 + pin*2);
149 reg &= ~disable;
150 reg |= enable;
151 io_apic_modify(entry->apic, 0x10 + pin*2, reg);
152 if (!entry->next)
153 break;
154 entry = irq_2_pin + entry->next;
155 }
156 }
158 /* mask = 1 */
159 static void __mask_IO_APIC_irq (unsigned int irq)
160 {
161 __modify_IO_APIC_irq(irq, 0x00010000, 0);
162 }
164 /* mask = 0 */
165 static void __unmask_IO_APIC_irq (unsigned int irq)
166 {
167 __modify_IO_APIC_irq(irq, 0, 0x00010000);
168 }
170 /* trigger = 0 */
171 static void __edge_IO_APIC_irq (unsigned int irq)
172 {
173 __modify_IO_APIC_irq(irq, 0, 0x00008000);
174 }
176 /* trigger = 1 */
177 static void __level_IO_APIC_irq (unsigned int irq)
178 {
179 __modify_IO_APIC_irq(irq, 0x00008000, 0);
180 }
182 static void mask_IO_APIC_irq (unsigned int irq)
183 {
184 unsigned long flags;
186 spin_lock_irqsave(&ioapic_lock, flags);
187 __mask_IO_APIC_irq(irq);
188 spin_unlock_irqrestore(&ioapic_lock, flags);
189 }
191 static void unmask_IO_APIC_irq (unsigned int irq)
192 {
193 unsigned long flags;
195 spin_lock_irqsave(&ioapic_lock, flags);
196 __unmask_IO_APIC_irq(irq);
197 spin_unlock_irqrestore(&ioapic_lock, flags);
198 }
200 static void __eoi_IO_APIC_irq(unsigned int irq)
201 {
202 struct irq_pin_list *entry = irq_2_pin + irq;
203 unsigned int pin, vector = IO_APIC_VECTOR(irq);
205 for (;;) {
206 pin = entry->pin;
207 if (pin == -1)
208 break;
209 io_apic_eoi(entry->apic, vector);
210 if (!entry->next)
211 break;
212 entry = irq_2_pin + entry->next;
213 }
214 }
216 static void eoi_IO_APIC_irq(unsigned int irq)
217 {
218 unsigned long flags;
219 spin_lock_irqsave(&ioapic_lock, flags);
220 __eoi_IO_APIC_irq(irq);
221 spin_unlock_irqrestore(&ioapic_lock, flags);
222 }
224 #define clear_IO_APIC_pin(a,p) __clear_IO_APIC_pin(a,p,0)
225 #define clear_IO_APIC_pin_raw(a,p) __clear_IO_APIC_pin(a,p,1)
226 static void __clear_IO_APIC_pin(unsigned int apic, unsigned int pin, int raw)
227 {
228 unsigned int (*read)(unsigned int, unsigned int)
229 = raw ? __io_apic_read : io_apic_read;
230 void (*write)(unsigned int, unsigned int, unsigned int)
231 = raw ? __io_apic_write : io_apic_write;
232 struct IO_APIC_route_entry entry;
233 unsigned long flags;
235 /* Check delivery_mode to be sure we're not clearing an SMI pin */
236 spin_lock_irqsave(&ioapic_lock, flags);
237 *(((int*)&entry) + 0) = (*read)(apic, 0x10 + 2 * pin);
238 *(((int*)&entry) + 1) = (*read)(apic, 0x11 + 2 * pin);
239 spin_unlock_irqrestore(&ioapic_lock, flags);
240 if (entry.delivery_mode == dest_SMI)
241 return;
243 /*
244 * Disable it in the IO-APIC irq-routing table:
245 */
246 memset(&entry, 0, sizeof(entry));
247 entry.mask = 1;
248 spin_lock_irqsave(&ioapic_lock, flags);
249 (*write)(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
250 (*write)(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
251 spin_unlock_irqrestore(&ioapic_lock, flags);
252 }
254 static void clear_IO_APIC (void)
255 {
256 int apic, pin;
258 for (apic = 0; apic < nr_ioapics; apic++) {
259 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
260 clear_IO_APIC_pin(apic, pin);
261 clear_IO_APIC_pin_raw(apic, pin);
262 }
263 }
264 }
266 #ifdef CONFIG_SMP
267 fastcall void smp_irq_move_cleanup_interrupt(struct cpu_user_regs *regs)
268 {
269 unsigned vector, me;
270 struct cpu_user_regs *old_regs = set_irq_regs(regs);
272 ack_APIC_irq();
273 irq_enter();
275 me = smp_processor_id();
276 for (vector = FIRST_DYNAMIC_VECTOR; vector < NR_VECTORS; vector++) {
277 unsigned int irq;
278 unsigned int irr;
279 struct irq_desc *desc;
280 struct irq_cfg *cfg;
281 irq = __get_cpu_var(vector_irq)[vector];
283 if (irq == -1)
284 continue;
286 desc = irq_to_desc(irq);
287 if (!desc)
288 continue;
290 cfg = desc->chip_data;
291 spin_lock(&desc->lock);
292 if (!cfg->move_cleanup_count)
293 goto unlock;
295 if (vector == cfg->vector && cpu_isset(me, cfg->domain))
296 goto unlock;
298 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
299 /*
300 * Check if the vector that needs to be cleanedup is
301 * registered at the cpu's IRR. If so, then this is not
302 * the best time to clean it up. Lets clean it up in the
303 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
304 * to myself.
305 */
306 if (irr & (1 << (vector % 32))) {
307 genapic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
308 goto unlock;
309 }
310 __get_cpu_var(vector_irq)[vector] = -1;
311 cfg->move_cleanup_count--;
312 unlock:
313 spin_unlock(&desc->lock);
314 }
316 irq_exit();
317 set_irq_regs(old_regs);
318 }
320 static void send_cleanup_vector(struct irq_cfg *cfg)
321 {
322 cpumask_t cleanup_mask;
324 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
325 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
326 genapic->send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
328 cfg->move_in_progress = 0;
329 }
331 void irq_complete_move(struct irq_desc **descp)
332 {
333 struct irq_desc *desc = *descp;
334 struct irq_cfg *cfg = desc->chip_data;
335 unsigned vector, me;
337 if (likely(!cfg->move_in_progress))
338 return;
340 vector = get_irq_regs()->entry_vector;
341 me = smp_processor_id();
343 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
344 send_cleanup_vector(cfg);
345 }
347 unsigned int set_desc_affinity(struct irq_desc *desc, cpumask_t mask)
348 {
349 struct irq_cfg *cfg;
350 unsigned int irq;
351 int ret;
352 unsigned long flags;
353 cpumask_t dest_mask;
355 if (!cpus_intersects(mask, cpu_online_map))
356 return BAD_APICID;
358 irq = desc->irq;
359 cfg = desc->chip_data;
361 local_irq_save(flags);
362 lock_vector_lock();
363 ret = __assign_irq_vector(irq, cfg, mask);
364 unlock_vector_lock();
365 local_irq_restore(flags);
367 if (ret < 0)
368 return BAD_APICID;
370 cpus_copy(desc->affinity, mask);
371 cpus_and(dest_mask, desc->affinity, cfg->domain);
373 return cpu_mask_to_apicid(dest_mask);
374 }
376 static void
377 set_ioapic_affinity_irq_desc(struct irq_desc *desc,
378 const struct cpumask mask)
379 {
380 unsigned long flags;
381 unsigned int dest;
382 int pin, irq;
383 struct irq_cfg *cfg;
384 struct irq_pin_list *entry;
386 irq = desc->irq;
387 cfg = desc->chip_data;
389 spin_lock_irqsave(&ioapic_lock, flags);
390 dest = set_desc_affinity(desc, mask);
391 if (dest != BAD_APICID) {
392 if ( !x2apic_enabled )
393 dest = SET_APIC_LOGICAL_ID(dest);
394 entry = irq_2_pin + irq;
395 for (;;) {
396 unsigned int data;
397 pin = entry->pin;
398 if (pin == -1)
399 break;
401 io_apic_write(entry->apic, 0x10 + 1 + pin*2, dest);
402 data = io_apic_read(entry->apic, 0x10 + pin*2);
403 data &= ~IO_APIC_REDIR_VECTOR_MASK;
404 data |= cfg->vector & 0xFF;
405 io_apic_modify(entry->apic, 0x10 + pin*2, data);
407 if (!entry->next)
408 break;
409 entry = irq_2_pin + entry->next;
410 }
411 }
412 spin_unlock_irqrestore(&ioapic_lock, flags);
414 }
416 static void
417 set_ioapic_affinity_irq(unsigned int irq, const struct cpumask mask)
418 {
419 struct irq_desc *desc;
421 desc = irq_to_desc(irq);
423 set_ioapic_affinity_irq_desc(desc, mask);
424 }
425 #endif /* CONFIG_SMP */
427 /*
428 * Find the IRQ entry number of a certain pin.
429 */
430 static int find_irq_entry(int apic, int pin, int type)
431 {
432 int i;
434 for (i = 0; i < mp_irq_entries; i++)
435 if (mp_irqs[i].mpc_irqtype == type &&
436 (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
437 mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
438 mp_irqs[i].mpc_dstirq == pin)
439 return i;
441 return -1;
442 }
444 /*
445 * Find the pin to which IRQ[irq] (ISA) is connected
446 */
447 static int __init find_isa_irq_pin(int irq, int type)
448 {
449 int i;
451 for (i = 0; i < mp_irq_entries; i++) {
452 int lbus = mp_irqs[i].mpc_srcbus;
454 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
455 mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
456 mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
457 mp_bus_id_to_type[lbus] == MP_BUS_NEC98
458 ) &&
459 (mp_irqs[i].mpc_irqtype == type) &&
460 (mp_irqs[i].mpc_srcbusirq == irq))
462 return mp_irqs[i].mpc_dstirq;
463 }
464 return -1;
465 }
467 static int __init find_isa_irq_apic(int irq, int type)
468 {
469 int i;
471 for (i = 0; i < mp_irq_entries; i++) {
472 int lbus = mp_irqs[i].mpc_srcbus;
474 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
475 mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
476 mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
477 mp_bus_id_to_type[lbus] == MP_BUS_NEC98
478 ) &&
479 (mp_irqs[i].mpc_irqtype == type) &&
480 (mp_irqs[i].mpc_srcbusirq == irq))
481 break;
482 }
483 if (i < mp_irq_entries) {
484 int apic;
485 for(apic = 0; apic < nr_ioapics; apic++) {
486 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
487 return apic;
488 }
489 }
491 return -1;
492 }
494 /*
495 * Find a specific PCI IRQ entry.
496 * Not an __init, possibly needed by modules
497 */
498 static int pin_2_irq(int idx, int apic, int pin);
500 /*
501 * This function currently is only a helper for the i386 smp boot process where
502 * we need to reprogram the ioredtbls to cater for the cpus which have come online
503 * so mask in all cases should simply be TARGET_CPUS
504 */
505 #ifdef CONFIG_SMP
506 void /*__init*/ setup_ioapic_dest(void)
507 {
508 int pin, ioapic, irq, irq_entry;
509 struct irq_cfg *cfg;
511 if (skip_ioapic_setup == 1)
512 return;
514 for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
515 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
516 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
517 if (irq_entry == -1)
518 continue;
519 irq = pin_2_irq(irq_entry, ioapic, pin);
520 cfg = irq_cfg(irq);
521 BUG_ON(cpus_empty(cfg->domain));
522 set_ioapic_affinity_irq(irq, cfg->domain);
523 }
525 }
526 }
527 #endif
529 /*
530 * EISA Edge/Level control register, ELCR
531 */
532 static int EISA_ELCR(unsigned int irq)
533 {
534 if (platform_legacy_irq(irq)) {
535 unsigned int port = 0x4d0 + (irq >> 3);
536 return (inb(port) >> (irq & 7)) & 1;
537 }
538 apic_printk(APIC_VERBOSE, KERN_INFO
539 "Broken MPtable reports ISA irq %d\n", irq);
540 return 0;
541 }
543 /* EISA interrupts are always polarity zero and can be edge or level
544 * trigger depending on the ELCR value. If an interrupt is listed as
545 * EISA conforming in the MP table, that means its trigger type must
546 * be read in from the ELCR */
548 #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
549 #define default_EISA_polarity(idx) (0)
551 /* ISA interrupts are always polarity zero edge triggered,
552 * when listed as conforming in the MP table. */
554 #define default_ISA_trigger(idx) (0)
555 #define default_ISA_polarity(idx) (0)
557 /* PCI interrupts are always polarity one level triggered,
558 * when listed as conforming in the MP table. */
560 #define default_PCI_trigger(idx) (1)
561 #define default_PCI_polarity(idx) (1)
563 /* MCA interrupts are always polarity zero level triggered,
564 * when listed as conforming in the MP table. */
566 #define default_MCA_trigger(idx) (1)
567 #define default_MCA_polarity(idx) (0)
569 /* NEC98 interrupts are always polarity zero edge triggered,
570 * when listed as conforming in the MP table. */
572 #define default_NEC98_trigger(idx) (0)
573 #define default_NEC98_polarity(idx) (0)
575 static int __init MPBIOS_polarity(int idx)
576 {
577 int bus = mp_irqs[idx].mpc_srcbus;
578 int polarity;
580 /*
581 * Determine IRQ line polarity (high active or low active):
582 */
583 switch (mp_irqs[idx].mpc_irqflag & 3)
584 {
585 case 0: /* conforms, ie. bus-type dependent polarity */
586 {
587 switch (mp_bus_id_to_type[bus])
588 {
589 case MP_BUS_ISA: /* ISA pin */
590 {
591 polarity = default_ISA_polarity(idx);
592 break;
593 }
594 case MP_BUS_EISA: /* EISA pin */
595 {
596 polarity = default_EISA_polarity(idx);
597 break;
598 }
599 case MP_BUS_PCI: /* PCI pin */
600 {
601 polarity = default_PCI_polarity(idx);
602 break;
603 }
604 case MP_BUS_MCA: /* MCA pin */
605 {
606 polarity = default_MCA_polarity(idx);
607 break;
608 }
609 case MP_BUS_NEC98: /* NEC 98 pin */
610 {
611 polarity = default_NEC98_polarity(idx);
612 break;
613 }
614 default:
615 {
616 printk(KERN_WARNING "broken BIOS!!\n");
617 polarity = 1;
618 break;
619 }
620 }
621 break;
622 }
623 case 1: /* high active */
624 {
625 polarity = 0;
626 break;
627 }
628 case 2: /* reserved */
629 {
630 printk(KERN_WARNING "broken BIOS!!\n");
631 polarity = 1;
632 break;
633 }
634 case 3: /* low active */
635 {
636 polarity = 1;
637 break;
638 }
639 default: /* invalid */
640 {
641 printk(KERN_WARNING "broken BIOS!!\n");
642 polarity = 1;
643 break;
644 }
645 }
646 return polarity;
647 }
649 static int MPBIOS_trigger(int idx)
650 {
651 int bus = mp_irqs[idx].mpc_srcbus;
652 int trigger;
654 /*
655 * Determine IRQ trigger mode (edge or level sensitive):
656 */
657 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
658 {
659 case 0: /* conforms, ie. bus-type dependent */
660 {
661 switch (mp_bus_id_to_type[bus])
662 {
663 case MP_BUS_ISA: /* ISA pin */
664 {
665 trigger = default_ISA_trigger(idx);
666 break;
667 }
668 case MP_BUS_EISA: /* EISA pin */
669 {
670 trigger = default_EISA_trigger(idx);
671 break;
672 }
673 case MP_BUS_PCI: /* PCI pin */
674 {
675 trigger = default_PCI_trigger(idx);
676 break;
677 }
678 case MP_BUS_MCA: /* MCA pin */
679 {
680 trigger = default_MCA_trigger(idx);
681 break;
682 }
683 case MP_BUS_NEC98: /* NEC 98 pin */
684 {
685 trigger = default_NEC98_trigger(idx);
686 break;
687 }
688 default:
689 {
690 printk(KERN_WARNING "broken BIOS!!\n");
691 trigger = 1;
692 break;
693 }
694 }
695 break;
696 }
697 case 1: /* edge */
698 {
699 trigger = 0;
700 break;
701 }
702 case 2: /* reserved */
703 {
704 printk(KERN_WARNING "broken BIOS!!\n");
705 trigger = 1;
706 break;
707 }
708 case 3: /* level */
709 {
710 trigger = 1;
711 break;
712 }
713 default: /* invalid */
714 {
715 printk(KERN_WARNING "broken BIOS!!\n");
716 trigger = 0;
717 break;
718 }
719 }
720 return trigger;
721 }
723 static inline int irq_polarity(int idx)
724 {
725 return MPBIOS_polarity(idx);
726 }
728 static inline int irq_trigger(int idx)
729 {
730 return MPBIOS_trigger(idx);
731 }
733 static int pin_2_irq(int idx, int apic, int pin)
734 {
735 int irq, i;
736 int bus = mp_irqs[idx].mpc_srcbus;
738 /*
739 * Debugging check, we are in big trouble if this message pops up!
740 */
741 if (mp_irqs[idx].mpc_dstirq != pin)
742 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
744 switch (mp_bus_id_to_type[bus])
745 {
746 case MP_BUS_ISA: /* ISA pin */
747 case MP_BUS_EISA:
748 case MP_BUS_MCA:
749 case MP_BUS_NEC98:
750 {
751 irq = mp_irqs[idx].mpc_srcbusirq;
752 break;
753 }
754 case MP_BUS_PCI: /* PCI pin */
755 {
756 /*
757 * PCI IRQs are mapped in order
758 */
759 i = irq = 0;
760 while (i < apic)
761 irq += nr_ioapic_registers[i++];
762 irq += pin;
764 /*
765 * For MPS mode, so far only needed by ES7000 platform
766 */
767 if (ioapic_renumber_irq)
768 irq = ioapic_renumber_irq(apic, irq);
770 break;
771 }
772 default:
773 {
774 printk(KERN_ERR "unknown bus type %d.\n",bus);
775 irq = 0;
776 break;
777 }
778 }
780 return irq;
781 }
783 static inline int IO_APIC_irq_trigger(int irq)
784 {
785 int apic, idx, pin;
787 for (apic = 0; apic < nr_ioapics; apic++) {
788 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
789 idx = find_irq_entry(apic,pin,mp_INT);
790 if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
791 return irq_trigger(idx);
792 }
793 }
794 /*
795 * nonexistent IRQs are edge default
796 */
797 return 0;
798 }
800 static hw_irq_controller ioapic_level_type;
801 static hw_irq_controller ioapic_edge_type;
803 #define IOAPIC_AUTO -1
804 #define IOAPIC_EDGE 0
805 #define IOAPIC_LEVEL 1
807 #define SET_DEST(x, y, value) \
808 do { if ( x2apic_enabled ) x = value; else y = value; } while(0)
810 static inline void ioapic_register_intr(int irq, unsigned long trigger)
811 {
812 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
813 trigger == IOAPIC_LEVEL)
814 irq_desc[irq].handler = &ioapic_level_type;
815 else
816 irq_desc[irq].handler = &ioapic_edge_type;
817 }
819 static void __init setup_IO_APIC_irqs(void)
820 {
821 struct IO_APIC_route_entry entry;
822 int apic, pin, idx, irq, first_notcon = 1, vector;
823 unsigned long flags;
824 struct irq_cfg *cfg;
826 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
828 for (apic = 0; apic < nr_ioapics; apic++) {
829 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
831 /*
832 * add it to the IO-APIC irq-routing table:
833 */
834 memset(&entry,0,sizeof(entry));
836 entry.delivery_mode = INT_DELIVERY_MODE;
837 entry.dest_mode = INT_DEST_MODE;
838 entry.mask = 0; /* enable IRQ */
840 idx = find_irq_entry(apic,pin,mp_INT);
841 if (idx == -1) {
842 if (first_notcon) {
843 apic_printk(APIC_VERBOSE, KERN_DEBUG
844 " IO-APIC (apicid-pin) %d-%d",
845 mp_ioapics[apic].mpc_apicid,
846 pin);
847 first_notcon = 0;
848 } else
849 apic_printk(APIC_VERBOSE, ", %d-%d",
850 mp_ioapics[apic].mpc_apicid, pin);
851 continue;
852 }
854 entry.trigger = irq_trigger(idx);
855 entry.polarity = irq_polarity(idx);
857 if (irq_trigger(idx)) {
858 entry.trigger = 1;
859 entry.mask = 1;
860 }
862 irq = pin_2_irq(idx, apic, pin);
863 /*
864 * skip adding the timer int on secondary nodes, which causes
865 * a small but painful rift in the time-space continuum
866 */
867 if (multi_timer_check(apic, irq))
868 continue;
869 else
870 add_pin_to_irq(irq, apic, pin);
872 if (!apic && !IO_APIC_IRQ(irq))
873 continue;
875 if (IO_APIC_IRQ(irq)) {
876 vector = assign_irq_vector(irq);
877 BUG_ON(vector < 0);
878 entry.vector = vector;
879 ioapic_register_intr(irq, IOAPIC_AUTO);
881 if (!apic && platform_legacy_irq(irq))
882 disable_8259A_irq(irq);
883 }
884 cfg = irq_cfg(irq);
885 SET_DEST(entry.dest.dest32, entry.dest.logical.logical_dest,
886 cpu_mask_to_apicid(cfg->domain));
887 spin_lock_irqsave(&ioapic_lock, flags);
888 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
889 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
890 set_native_irq_info(irq, TARGET_CPUS);
891 spin_unlock_irqrestore(&ioapic_lock, flags);
892 }
893 }
895 if (!first_notcon)
896 apic_printk(APIC_VERBOSE, " not connected.\n");
897 }
899 /*
900 * Set up the 8259A-master output pin:
901 */
902 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
903 {
904 struct IO_APIC_route_entry entry;
905 unsigned long flags;
907 memset(&entry,0,sizeof(entry));
909 disable_8259A_irq(0);
911 /* mask LVT0 */
912 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
914 /*
915 * We use logical delivery to get the timer IRQ
916 * to the first CPU.
917 */
918 entry.dest_mode = INT_DEST_MODE;
919 entry.mask = 0; /* unmask IRQ now */
920 SET_DEST(entry.dest.dest32, entry.dest.logical.logical_dest,
921 cpu_mask_to_apicid(TARGET_CPUS));
922 entry.delivery_mode = INT_DELIVERY_MODE;
923 entry.polarity = 0;
924 entry.trigger = 0;
925 entry.vector = vector;
927 /*
928 * The timer IRQ doesn't have to know that behind the
929 * scene we have a 8259A-master in AEOI mode ...
930 */
931 irq_desc[0].handler = &ioapic_edge_type;
933 /*
934 * Add it to the IO-APIC irq-routing table:
935 */
936 spin_lock_irqsave(&ioapic_lock, flags);
937 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
938 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
939 spin_unlock_irqrestore(&ioapic_lock, flags);
941 enable_8259A_irq(0);
942 }
944 static inline void UNEXPECTED_IO_APIC(void)
945 {
946 }
948 static void /*__init*/ __print_IO_APIC(void)
949 {
950 int apic, i;
951 union IO_APIC_reg_00 reg_00;
952 union IO_APIC_reg_01 reg_01;
953 union IO_APIC_reg_02 reg_02;
954 union IO_APIC_reg_03 reg_03;
955 unsigned long flags;
957 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
958 for (i = 0; i < nr_ioapics; i++)
959 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
960 mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
962 /*
963 * We are a bit conservative about what we expect. We have to
964 * know about every hardware change ASAP.
965 */
966 printk(KERN_INFO "testing the IO APIC.......................\n");
968 for (apic = 0; apic < nr_ioapics; apic++) {
970 spin_lock_irqsave(&ioapic_lock, flags);
971 reg_00.raw = io_apic_read(apic, 0);
972 reg_01.raw = io_apic_read(apic, 1);
973 if (reg_01.bits.version >= 0x10)
974 reg_02.raw = io_apic_read(apic, 2);
975 if (reg_01.bits.version >= 0x20)
976 reg_03.raw = io_apic_read(apic, 3);
977 spin_unlock_irqrestore(&ioapic_lock, flags);
979 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
980 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
981 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
982 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
983 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
984 if (reg_00.bits.ID >= get_physical_broadcast())
985 UNEXPECTED_IO_APIC();
986 if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
987 UNEXPECTED_IO_APIC();
989 printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
990 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
991 if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
992 (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
993 (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
994 (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
995 (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
996 (reg_01.bits.entries != 0x2E) &&
997 (reg_01.bits.entries != 0x3F)
998 )
999 UNEXPECTED_IO_APIC();
1001 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
1002 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
1003 if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
1004 (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
1005 (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
1006 (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
1007 (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
1009 UNEXPECTED_IO_APIC();
1010 if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
1011 UNEXPECTED_IO_APIC();
1013 /*
1014 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1015 * but the value of reg_02 is read as the previous read register
1016 * value, so ignore it if reg_02 == reg_01.
1017 */
1018 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1019 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1020 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
1021 if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
1022 UNEXPECTED_IO_APIC();
1025 /*
1026 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1027 * or reg_03, but the value of reg_0[23] is read as the previous read
1028 * register value, so ignore it if reg_03 == reg_0[12].
1029 */
1030 if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1031 reg_03.raw != reg_01.raw) {
1032 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1033 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
1034 if (reg_03.bits.__reserved_1)
1035 UNEXPECTED_IO_APIC();
1038 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1040 printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
1041 " Stat Dest Deli Vect: \n");
1043 for (i = 0; i <= reg_01.bits.entries; i++) {
1044 struct IO_APIC_route_entry entry;
1046 spin_lock_irqsave(&ioapic_lock, flags);
1047 *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
1048 *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
1049 spin_unlock_irqrestore(&ioapic_lock, flags);
1051 printk(KERN_DEBUG " %02x %03X %02X ",
1052 i,
1053 entry.dest.logical.logical_dest,
1054 entry.dest.physical.physical_dest
1055 );
1057 printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
1058 entry.mask,
1059 entry.trigger,
1060 entry.irr,
1061 entry.polarity,
1062 entry.delivery_status,
1063 entry.dest_mode,
1064 entry.delivery_mode,
1065 entry.vector
1066 );
1069 printk(KERN_INFO "Using vector-based indexing\n");
1070 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1071 for (i = 0; i < nr_irqs_gsi; i++) {
1072 struct irq_pin_list *entry = irq_2_pin + i;
1073 if (entry->pin < 0)
1074 continue;
1075 printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
1076 for (;;) {
1077 printk("-> %d:%d", entry->apic, entry->pin);
1078 if (!entry->next)
1079 break;
1080 entry = irq_2_pin + entry->next;
1082 printk("\n");
1085 printk(KERN_INFO ".................................... done.\n");
1087 return;
1090 void print_IO_APIC(void)
1092 if (apic_verbosity != APIC_QUIET)
1093 __print_IO_APIC();
1096 static void _print_IO_APIC_keyhandler(unsigned char key)
1098 __print_IO_APIC();
1100 static struct keyhandler print_IO_APIC_keyhandler = {
1101 .diagnostic = 1,
1102 .u.fn = _print_IO_APIC_keyhandler,
1103 .desc = "print ioapic info"
1104 };
1106 static void __init enable_IO_APIC(void)
1108 int i8259_apic, i8259_pin;
1109 int i, apic;
1110 unsigned long flags;
1112 /* Initialise dynamic irq_2_pin free list. */
1113 irq_2_pin = xmalloc_array(struct irq_pin_list, PIN_MAP_SIZE);
1114 memset(irq_2_pin, 0, PIN_MAP_SIZE * sizeof(*irq_2_pin));
1116 for (i = 0; i < PIN_MAP_SIZE; i++)
1117 irq_2_pin[i].pin = -1;
1118 for (i = irq_2_pin_free_entry = nr_irqs_gsi; i < PIN_MAP_SIZE; i++)
1119 irq_2_pin[i].next = i + 1;
1121 for(apic = 0; apic < nr_ioapics; apic++) {
1122 int pin;
1123 /* See if any of the pins is in ExtINT mode */
1124 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1125 struct IO_APIC_route_entry entry;
1126 spin_lock_irqsave(&ioapic_lock, flags);
1127 *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1128 *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1129 spin_unlock_irqrestore(&ioapic_lock, flags);
1132 /* If the interrupt line is enabled and in ExtInt mode
1133 * I have found the pin where the i8259 is connected.
1134 */
1135 if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1136 ioapic_i8259.apic = apic;
1137 ioapic_i8259.pin = pin;
1138 goto found_i8259;
1142 found_i8259:
1143 /* Look to see what if the MP table has reported the ExtINT */
1144 /* If we could not find the appropriate pin by looking at the ioapic
1145 * the i8259 probably is not connected the ioapic but give the
1146 * mptable a chance anyway.
1147 */
1148 i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
1149 i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1150 /* Trust the MP table if nothing is setup in the hardware */
1151 if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
1152 printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
1153 ioapic_i8259.pin = i8259_pin;
1154 ioapic_i8259.apic = i8259_apic;
1156 /* Complain if the MP table and the hardware disagree */
1157 if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
1158 (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
1160 printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
1163 /*
1164 * Do not trust the IO-APIC being empty at bootup
1165 */
1166 clear_IO_APIC();
1169 /*
1170 * Not an __init, needed by the reboot code
1171 */
1172 void disable_IO_APIC(void)
1174 /*
1175 * Clear the IO-APIC before rebooting:
1176 */
1177 clear_IO_APIC();
1179 /*
1180 * If the i8259 is routed through an IOAPIC
1181 * Put that IOAPIC in virtual wire mode
1182 * so legacy interrupts can be delivered.
1183 */
1184 if (ioapic_i8259.pin != -1) {
1185 struct IO_APIC_route_entry entry;
1186 unsigned long flags;
1188 memset(&entry, 0, sizeof(entry));
1189 entry.mask = 0; /* Enabled */
1190 entry.trigger = 0; /* Edge */
1191 entry.irr = 0;
1192 entry.polarity = 0; /* High */
1193 entry.delivery_status = 0;
1194 entry.dest_mode = 0; /* Physical */
1195 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1196 entry.vector = 0;
1197 SET_DEST(entry.dest.dest32, entry.dest.physical.physical_dest,
1198 get_apic_id());
1200 /*
1201 * Add it to the IO-APIC irq-routing table:
1202 */
1203 spin_lock_irqsave(&ioapic_lock, flags);
1204 io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
1205 *(((int *)&entry)+1));
1206 io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
1207 *(((int *)&entry)+0));
1208 spin_unlock_irqrestore(&ioapic_lock, flags);
1210 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1213 /*
1214 * function to set the IO-APIC physical IDs based on the
1215 * values stored in the MPC table.
1217 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1218 */
1220 #ifndef CONFIG_X86_NUMAQ
1221 static void __init setup_ioapic_ids_from_mpc(void)
1223 union IO_APIC_reg_00 reg_00;
1224 physid_mask_t phys_id_present_map;
1225 int apic;
1226 int i;
1227 unsigned char old_id;
1228 unsigned long flags;
1230 /*
1231 * Don't check I/O APIC IDs for xAPIC systems. They have
1232 * no meaning without the serial APIC bus.
1233 */
1234 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1235 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1236 return;
1238 /*
1239 * This is broken; anything with a real cpu count has to
1240 * circumvent this idiocy regardless.
1241 */
1242 phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
1244 /*
1245 * Set the IOAPIC ID to the value stored in the MPC table.
1246 */
1247 for (apic = 0; apic < nr_ioapics; apic++) {
1249 /* Read the register 0 value */
1250 spin_lock_irqsave(&ioapic_lock, flags);
1251 reg_00.raw = io_apic_read(apic, 0);
1252 spin_unlock_irqrestore(&ioapic_lock, flags);
1254 old_id = mp_ioapics[apic].mpc_apicid;
1256 if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
1257 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1258 apic, mp_ioapics[apic].mpc_apicid);
1259 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1260 reg_00.bits.ID);
1261 mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
1264 /*
1265 * Sanity check, is the ID really free? Every APIC in a
1266 * system must have a unique ID or we get lots of nice
1267 * 'stuck on smp_invalidate_needed IPI wait' messages.
1268 */
1269 if (check_apicid_used(phys_id_present_map,
1270 mp_ioapics[apic].mpc_apicid)) {
1271 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1272 apic, mp_ioapics[apic].mpc_apicid);
1273 for (i = 0; i < get_physical_broadcast(); i++)
1274 if (!physid_isset(i, phys_id_present_map))
1275 break;
1276 if (i >= get_physical_broadcast())
1277 panic("Max APIC ID exceeded!\n");
1278 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1279 i);
1280 physid_set(i, phys_id_present_map);
1281 mp_ioapics[apic].mpc_apicid = i;
1282 } else {
1283 physid_mask_t tmp;
1284 tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
1285 apic_printk(APIC_VERBOSE, "Setting %d in the "
1286 "phys_id_present_map\n",
1287 mp_ioapics[apic].mpc_apicid);
1288 physids_or(phys_id_present_map, phys_id_present_map, tmp);
1292 /*
1293 * We need to adjust the IRQ routing table
1294 * if the ID changed.
1295 */
1296 if (old_id != mp_ioapics[apic].mpc_apicid)
1297 for (i = 0; i < mp_irq_entries; i++)
1298 if (mp_irqs[i].mpc_dstapic == old_id)
1299 mp_irqs[i].mpc_dstapic
1300 = mp_ioapics[apic].mpc_apicid;
1302 /*
1303 * Read the right value from the MPC table and
1304 * write it into the ID register.
1305 */
1306 apic_printk(APIC_VERBOSE, KERN_INFO
1307 "...changing IO-APIC physical APIC ID to %d ...",
1308 mp_ioapics[apic].mpc_apicid);
1310 reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
1311 spin_lock_irqsave(&ioapic_lock, flags);
1312 io_apic_write(apic, 0, reg_00.raw);
1313 spin_unlock_irqrestore(&ioapic_lock, flags);
1315 /*
1316 * Sanity check
1317 */
1318 spin_lock_irqsave(&ioapic_lock, flags);
1319 reg_00.raw = io_apic_read(apic, 0);
1320 spin_unlock_irqrestore(&ioapic_lock, flags);
1321 if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
1322 printk("could not set ID!\n");
1323 else
1324 apic_printk(APIC_VERBOSE, " ok.\n");
1327 #else
1328 static void __init setup_ioapic_ids_from_mpc(void) { }
1329 #endif
1331 /*
1332 * There is a nasty bug in some older SMP boards, their mptable lies
1333 * about the timer IRQ. We do the following to work around the situation:
1335 * - timer IRQ defaults to IO-APIC IRQ
1336 * - if this function detects that timer IRQs are defunct, then we fall
1337 * back to ISA timer IRQs
1338 */
1339 static int __init timer_irq_works(void)
1341 extern unsigned long pit0_ticks;
1342 unsigned long t1, flags;
1344 t1 = pit0_ticks;
1345 mb();
1347 local_save_flags(flags);
1348 local_irq_enable();
1349 /* Let ten ticks pass... */
1350 mdelay((10 * 1000) / HZ);
1351 local_irq_restore(flags);
1353 /*
1354 * Expect a few ticks at least, to be sure some possible
1355 * glue logic does not lock up after one or two first
1356 * ticks in a non-ExtINT mode. Also the local APIC
1357 * might have cached one ExtINT interrupt. Finally, at
1358 * least one tick may be lost due to delays.
1359 */
1360 mb();
1361 if (pit0_ticks - t1 > 4)
1362 return 1;
1364 return 0;
1367 /*
1368 * In the SMP+IOAPIC case it might happen that there are an unspecified
1369 * number of pending IRQ events unhandled. These cases are very rare,
1370 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
1371 * better to do it this way as thus we do not have to be aware of
1372 * 'pending' interrupts in the IRQ path, except at this point.
1373 */
1374 /*
1375 * Edge triggered needs to resend any interrupt
1376 * that was delayed but this is now handled in the device
1377 * independent code.
1378 */
1380 /*
1381 * Starting up a edge-triggered IO-APIC interrupt is
1382 * nasty - we need to make sure that we get the edge.
1383 * If it is already asserted for some reason, we need
1384 * return 1 to indicate that is was pending.
1386 * This is not complete - we should be able to fake
1387 * an edge even if it isn't on the 8259A...
1388 */
1389 static unsigned int startup_edge_ioapic_irq(unsigned int irq)
1391 int was_pending = 0;
1392 unsigned long flags;
1394 spin_lock_irqsave(&ioapic_lock, flags);
1395 if (platform_legacy_irq(irq)) {
1396 disable_8259A_irq(irq);
1397 if (i8259A_irq_pending(irq))
1398 was_pending = 1;
1400 __unmask_IO_APIC_irq(irq);
1401 spin_unlock_irqrestore(&ioapic_lock, flags);
1403 return was_pending;
1406 /*
1407 * Once we have recorded IRQ_PENDING already, we can mask the
1408 * interrupt for real. This prevents IRQ storms from unhandled
1409 * devices.
1410 */
1411 static void ack_edge_ioapic_irq(unsigned int irq)
1413 struct irq_desc *desc = irq_to_desc(irq);
1415 irq_complete_move(&desc);
1416 move_native_irq(irq);
1418 if ((desc->status & (IRQ_PENDING | IRQ_DISABLED))
1419 == (IRQ_PENDING | IRQ_DISABLED))
1420 mask_IO_APIC_irq(irq);
1421 ack_APIC_irq();
1424 /*
1425 * Level triggered interrupts can just be masked,
1426 * and shutting down and starting up the interrupt
1427 * is the same as enabling and disabling them -- except
1428 * with a startup need to return a "was pending" value.
1430 * Level triggered interrupts are special because we
1431 * do not touch any IO-APIC register while handling
1432 * them. We ack the APIC in the end-IRQ handler, not
1433 * in the start-IRQ-handler. Protection against reentrance
1434 * from the same interrupt is still provided, both by the
1435 * generic IRQ layer and by the fact that an unacked local
1436 * APIC does not accept IRQs.
1437 */
1438 static unsigned int startup_level_ioapic_irq (unsigned int irq)
1440 unmask_IO_APIC_irq(irq);
1442 return 0; /* don't check for pending */
1445 int __read_mostly ioapic_ack_new = 1;
1446 static void setup_ioapic_ack(char *s)
1448 if ( !strcmp(s, "old") )
1449 ioapic_ack_new = 0;
1450 else if ( !strcmp(s, "new") )
1451 ioapic_ack_new = 1;
1452 else
1453 printk("Unknown ioapic_ack value specified: '%s'\n", s);
1455 custom_param("ioapic_ack", setup_ioapic_ack);
1457 static bool_t io_apic_level_ack_pending(unsigned int irq)
1459 struct irq_pin_list *entry;
1460 unsigned long flags;
1462 spin_lock_irqsave(&ioapic_lock, flags);
1463 entry = &irq_2_pin[irq];
1464 for (;;) {
1465 unsigned int reg;
1466 int pin;
1468 if (!entry)
1469 break;
1471 pin = entry->pin;
1472 if (pin == -1)
1473 continue;
1474 reg = io_apic_read(entry->apic, 0x10 + pin*2);
1475 /* Is the remote IRR bit set? */
1476 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
1477 spin_unlock_irqrestore(&ioapic_lock, flags);
1478 return 1;
1480 if (!entry->next)
1481 break;
1482 entry = irq_2_pin + entry->next;
1484 spin_unlock_irqrestore(&ioapic_lock, flags);
1486 return 0;
1489 static void mask_and_ack_level_ioapic_irq (unsigned int irq)
1491 unsigned long v;
1492 int i;
1493 struct irq_desc *desc = irq_to_desc(irq);
1495 irq_complete_move(&desc);
1497 if ( ioapic_ack_new )
1498 return;
1500 if ( !directed_eoi_enabled )
1501 mask_IO_APIC_irq(irq);
1503 /*
1504 * It appears there is an erratum which affects at least version 0x11
1505 * of I/O APIC (that's the 82093AA and cores integrated into various
1506 * chipsets). Under certain conditions a level-triggered interrupt is
1507 * erroneously delivered as edge-triggered one but the respective IRR
1508 * bit gets set nevertheless. As a result the I/O unit expects an EOI
1509 * message but it will never arrive and further interrupts are blocked
1510 * from the source. The exact reason is so far unknown, but the
1511 * phenomenon was observed when two consecutive interrupt requests
1512 * from a given source get delivered to the same CPU and the source is
1513 * temporarily disabled in between.
1515 * A workaround is to simulate an EOI message manually. We achieve it
1516 * by setting the trigger mode to edge and then to level when the edge
1517 * trigger mode gets detected in the TMR of a local APIC for a
1518 * level-triggered interrupt. We mask the source for the time of the
1519 * operation to prevent an edge-triggered interrupt escaping meanwhile.
1520 * The idea is from Manfred Spraul. --macro
1521 */
1522 i = IO_APIC_VECTOR(irq);
1524 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1526 ack_APIC_irq();
1528 if ((irq_desc[irq].status & IRQ_MOVE_PENDING) &&
1529 !io_apic_level_ack_pending(irq))
1530 move_native_irq(irq);
1532 if (!directed_eoi_enabled && !(v & (1 << (i & 0x1f)))) {
1533 atomic_inc(&irq_mis_count);
1534 spin_lock(&ioapic_lock);
1535 __edge_IO_APIC_irq(irq);
1536 __level_IO_APIC_irq(irq);
1537 spin_unlock(&ioapic_lock);
1541 static void end_level_ioapic_irq (unsigned int irq)
1543 unsigned long v;
1544 int i;
1546 if ( !ioapic_ack_new )
1548 if ( irq_desc[irq].status & IRQ_DISABLED )
1549 return;
1551 if ( directed_eoi_enabled )
1552 eoi_IO_APIC_irq(irq);
1553 else
1554 unmask_IO_APIC_irq(irq);
1556 return;
1559 /*
1560 * It appears there is an erratum which affects at least version 0x11
1561 * of I/O APIC (that's the 82093AA and cores integrated into various
1562 * chipsets). Under certain conditions a level-triggered interrupt is
1563 * erroneously delivered as edge-triggered one but the respective IRR
1564 * bit gets set nevertheless. As a result the I/O unit expects an EOI
1565 * message but it will never arrive and further interrupts are blocked
1566 * from the source. The exact reason is so far unknown, but the
1567 * phenomenon was observed when two consecutive interrupt requests
1568 * from a given source get delivered to the same CPU and the source is
1569 * temporarily disabled in between.
1571 * A workaround is to simulate an EOI message manually. We achieve it
1572 * by setting the trigger mode to edge and then to level when the edge
1573 * trigger mode gets detected in the TMR of a local APIC for a
1574 * level-triggered interrupt. We mask the source for the time of the
1575 * operation to prevent an edge-triggered interrupt escaping meanwhile.
1576 * The idea is from Manfred Spraul. --macro
1577 */
1578 i = IO_APIC_VECTOR(irq);
1580 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1582 ack_APIC_irq();
1584 if ((irq_desc[irq].status & IRQ_MOVE_PENDING) &&
1585 !io_apic_level_ack_pending(irq))
1586 move_native_irq(irq);
1588 if (!(v & (1 << (i & 0x1f)))) {
1589 atomic_inc(&irq_mis_count);
1590 spin_lock(&ioapic_lock);
1591 __mask_IO_APIC_irq(irq);
1592 __edge_IO_APIC_irq(irq);
1593 __level_IO_APIC_irq(irq);
1594 if ( !(irq_desc[irq].status & IRQ_DISABLED) )
1595 __unmask_IO_APIC_irq(irq);
1596 spin_unlock(&ioapic_lock);
1600 static void disable_edge_ioapic_irq(unsigned int irq)
1604 static void end_edge_ioapic_irq(unsigned int irq)
1608 /*
1609 * Level and edge triggered IO-APIC interrupts need different handling,
1610 * so we use two separate IRQ descriptors. Edge triggered IRQs can be
1611 * handled with the level-triggered descriptor, but that one has slightly
1612 * more overhead. Level-triggered interrupts cannot be handled with the
1613 * edge-triggered handler, without risking IRQ storms and other ugly
1614 * races.
1615 */
1616 static hw_irq_controller ioapic_edge_type = {
1617 .typename = "IO-APIC-edge",
1618 .startup = startup_edge_ioapic_irq,
1619 .shutdown = disable_edge_ioapic_irq,
1620 .enable = unmask_IO_APIC_irq,
1621 .disable = disable_edge_ioapic_irq,
1622 .ack = ack_edge_ioapic_irq,
1623 .end = end_edge_ioapic_irq,
1624 .set_affinity = set_ioapic_affinity_irq,
1625 };
1627 static hw_irq_controller ioapic_level_type = {
1628 .typename = "IO-APIC-level",
1629 .startup = startup_level_ioapic_irq,
1630 .shutdown = mask_IO_APIC_irq,
1631 .enable = unmask_IO_APIC_irq,
1632 .disable = mask_IO_APIC_irq,
1633 .ack = mask_and_ack_level_ioapic_irq,
1634 .end = end_level_ioapic_irq,
1635 .set_affinity = set_ioapic_affinity_irq,
1636 };
1638 static unsigned int startup_msi_irq(unsigned int irq)
1640 unmask_msi_irq(irq);
1641 return 0;
1644 static void ack_msi_irq(unsigned int irq)
1646 struct irq_desc *desc = irq_to_desc(irq);
1648 irq_complete_move(&desc);
1649 move_native_irq(irq);
1651 if ( msi_maskable_irq(desc->msi_desc) )
1652 ack_APIC_irq(); /* ACKTYPE_NONE */
1655 static void end_msi_irq(unsigned int irq)
1657 if ( !msi_maskable_irq(irq_desc[irq].msi_desc) )
1658 ack_APIC_irq(); /* ACKTYPE_EOI */
1661 static void shutdown_msi_irq(unsigned int irq)
1663 mask_msi_irq(irq);
1666 static void set_msi_affinity_irq(unsigned int irq, cpumask_t cpu_mask)
1668 set_msi_affinity(irq, cpu_mask);
1671 /*
1672 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
1673 * which implement the MSI or MSI-X Capability Structure.
1674 */
1675 hw_irq_controller pci_msi_type = {
1676 .typename = "PCI-MSI",
1677 .startup = startup_msi_irq,
1678 .shutdown = shutdown_msi_irq,
1679 .enable = unmask_msi_irq,
1680 .disable = mask_msi_irq,
1681 .ack = ack_msi_irq,
1682 .end = end_msi_irq,
1683 .set_affinity = set_msi_affinity_irq,
1684 };
1686 static inline void init_IO_APIC_traps(void)
1688 int irq;
1689 /* Xen: This is way simpler than the Linux implementation. */
1690 for (irq = 0; platform_legacy_irq(irq); irq++)
1691 if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq))
1692 make_8259A_irq(irq);
1695 static void enable_lapic_irq(unsigned int irq)
1697 unsigned long v;
1699 v = apic_read(APIC_LVT0);
1700 apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
1703 static void disable_lapic_irq(unsigned int irq)
1705 unsigned long v;
1707 v = apic_read(APIC_LVT0);
1708 apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
1711 static void ack_lapic_irq(unsigned int irq)
1713 ack_APIC_irq();
1716 static void end_lapic_irq(unsigned int irq) { /* nothing */ }
1718 static hw_irq_controller lapic_irq_type = {
1719 .typename = "local-APIC-edge",
1720 .startup = NULL, /* startup_irq() not used for IRQ0 */
1721 .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
1722 .enable = enable_lapic_irq,
1723 .disable = disable_lapic_irq,
1724 .ack = ack_lapic_irq,
1725 .end = end_lapic_irq,
1726 };
1728 /*
1729 * This looks a bit hackish but it's about the only one way of sending
1730 * a few INTA cycles to 8259As and any associated glue logic. ICR does
1731 * not support the ExtINT mode, unfortunately. We need to send these
1732 * cycles as some i82489DX-based boards have glue logic that keeps the
1733 * 8259A interrupt line asserted until INTA. --macro
1734 */
1735 static inline void unlock_ExtINT_logic(void)
1737 int apic, pin, i;
1738 struct IO_APIC_route_entry entry0, entry1;
1739 unsigned char save_control, save_freq_select;
1740 unsigned long flags;
1742 pin = find_isa_irq_pin(8, mp_INT);
1743 apic = find_isa_irq_apic(8, mp_INT);
1744 if (pin == -1)
1745 return;
1747 spin_lock_irqsave(&ioapic_lock, flags);
1748 *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1749 *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1750 spin_unlock_irqrestore(&ioapic_lock, flags);
1751 clear_IO_APIC_pin(apic, pin);
1753 memset(&entry1, 0, sizeof(entry1));
1755 entry1.dest_mode = 0; /* physical delivery */
1756 entry1.mask = 0; /* unmask IRQ now */
1757 SET_DEST(entry1.dest.dest32, entry1.dest.physical.physical_dest,
1758 hard_smp_processor_id());
1759 entry1.delivery_mode = dest_ExtINT;
1760 entry1.polarity = entry0.polarity;
1761 entry1.trigger = 0;
1762 entry1.vector = 0;
1764 spin_lock_irqsave(&ioapic_lock, flags);
1765 io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
1766 io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
1767 spin_unlock_irqrestore(&ioapic_lock, flags);
1769 save_control = CMOS_READ(RTC_CONTROL);
1770 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
1771 CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
1772 RTC_FREQ_SELECT);
1773 CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
1775 i = 100;
1776 while (i-- > 0) {
1777 mdelay(10);
1778 if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
1779 i -= 10;
1782 CMOS_WRITE(save_control, RTC_CONTROL);
1783 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
1784 clear_IO_APIC_pin(apic, pin);
1786 spin_lock_irqsave(&ioapic_lock, flags);
1787 io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
1788 io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
1789 spin_unlock_irqrestore(&ioapic_lock, flags);
1792 int timer_uses_ioapic_pin_0;
1794 /*
1795 * This code may look a bit paranoid, but it's supposed to cooperate with
1796 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
1797 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
1798 * fanatically on his truly buggy board.
1799 */
1800 static inline void check_timer(void)
1802 int apic1, pin1, apic2, pin2;
1803 int vector, ret;
1804 unsigned long flags;
1806 local_irq_save(flags);
1808 /*
1809 * get/set the timer IRQ vector:
1810 */
1811 disable_8259A_irq(0);
1812 vector = FIRST_HIPRIORITY_VECTOR;
1813 clear_irq_vector(0);
1815 if ((ret = bind_irq_vector(0, vector, (cpumask_t)CPU_MASK_ALL)))
1816 printk(KERN_ERR"..IRQ0 is not set correctly with ioapic!!!, err:%d\n", ret);
1818 irq_desc[0].depth = 0;
1819 irq_desc[0].status &= ~IRQ_DISABLED;
1820 irq_desc[0].handler = &ioapic_edge_type;
1822 /*
1823 * Subtle, code in do_timer_interrupt() expects an AEOI
1824 * mode for the 8259A whenever interrupts are routed
1825 * through I/O APICs. Also IRQ0 has to be enabled in
1826 * the 8259A which implies the virtual wire has to be
1827 * disabled in the local APIC.
1828 */
1829 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
1830 init_8259A(1);
1831 /* XEN: Ripped out the legacy missed-tick logic, so below is not needed. */
1832 /*timer_ack = 1;*/
1833 /*enable_8259A_irq(0);*/
1835 pin1 = find_isa_irq_pin(0, mp_INT);
1836 apic1 = find_isa_irq_apic(0, mp_INT);
1837 pin2 = ioapic_i8259.pin;
1838 apic2 = ioapic_i8259.apic;
1840 if (pin1 == 0)
1841 timer_uses_ioapic_pin_0 = 1;
1843 printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
1844 vector, apic1, pin1, apic2, pin2);
1846 if (pin1 != -1) {
1847 /*
1848 * Ok, does IRQ0 through the IOAPIC work?
1849 */
1850 unmask_IO_APIC_irq(0);
1851 if (timer_irq_works()) {
1852 local_irq_restore(flags);
1853 if (disable_timer_pin_1 > 0)
1854 clear_IO_APIC_pin(apic1, pin1);
1855 return;
1857 clear_IO_APIC_pin(apic1, pin1);
1858 printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
1859 "IO-APIC\n");
1862 printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
1863 if (pin2 != -1) {
1864 printk("\n..... (found pin %d) ...", pin2);
1865 /*
1866 * legacy devices should be connected to IO APIC #0
1867 */
1868 setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
1869 if (timer_irq_works()) {
1870 local_irq_restore(flags);
1871 printk("works.\n");
1872 if (pin1 != -1)
1873 replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
1874 else
1875 add_pin_to_irq(0, apic2, pin2);
1876 return;
1878 /*
1879 * Cleanup, just in case ...
1880 */
1881 clear_IO_APIC_pin(apic2, pin2);
1883 printk(" failed.\n");
1885 if (nmi_watchdog == NMI_IO_APIC) {
1886 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
1887 nmi_watchdog = 0;
1890 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
1892 disable_8259A_irq(0);
1893 irq_desc[0].handler = &lapic_irq_type;
1894 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
1895 enable_8259A_irq(0);
1897 if (timer_irq_works()) {
1898 local_irq_restore(flags);
1899 printk(" works.\n");
1900 return;
1902 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
1903 printk(" failed.\n");
1905 printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
1907 /*timer_ack = 0;*/
1908 init_8259A(0);
1909 make_8259A_irq(0);
1910 apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
1912 unlock_ExtINT_logic();
1914 local_irq_restore(flags);
1916 if (timer_irq_works()) {
1917 printk(" works.\n");
1918 return;
1920 printk(" failed :(.\n");
1921 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
1922 "report. Then try booting with the 'noapic' option");
1925 /*
1927 * IRQ's that are handled by the PIC in the MPS IOAPIC case.
1928 * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
1929 * Linux doesn't really care, as it's not actually used
1930 * for any interrupt handling anyway.
1931 */
1932 #define PIC_IRQS (1 << PIC_CASCADE_IR)
1934 static struct IO_APIC_route_entry *ioapic_pm_state;
1936 static void __init ioapic_pm_state_alloc(void)
1938 int i, nr_entry = 0;
1940 for (i = 0; i < nr_ioapics; i++)
1941 nr_entry += nr_ioapic_registers[i];
1943 ioapic_pm_state = _xmalloc(sizeof(struct IO_APIC_route_entry)*nr_entry,
1944 sizeof(struct IO_APIC_route_entry));
1945 BUG_ON(ioapic_pm_state == NULL);
1948 void __init setup_IO_APIC(void)
1950 enable_IO_APIC();
1952 if (acpi_ioapic)
1953 io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
1954 else
1955 io_apic_irqs = ~PIC_IRQS;
1957 printk("ENABLING IO-APIC IRQs\n");
1958 printk(" -> Using %s ACK method\n", ioapic_ack_new ? "new" : "old");
1960 /*
1961 * Set up IO-APIC IRQ routing.
1962 */
1963 if (!acpi_ioapic)
1964 setup_ioapic_ids_from_mpc();
1965 sync_Arb_IDs();
1966 setup_IO_APIC_irqs();
1967 init_IO_APIC_traps();
1968 check_timer();
1969 print_IO_APIC();
1970 ioapic_pm_state_alloc();
1972 register_keyhandler('z', &print_IO_APIC_keyhandler);
1975 void ioapic_suspend(void)
1977 struct IO_APIC_route_entry *entry = ioapic_pm_state;
1978 unsigned long flags;
1979 int apic, i;
1981 spin_lock_irqsave(&ioapic_lock, flags);
1982 for (apic = 0; apic < nr_ioapics; apic++) {
1983 for (i = 0; i < nr_ioapic_registers[apic]; i ++, entry ++ ) {
1984 *(((int *)entry) + 1) = io_apic_read(apic, 0x11 + 2 * i);
1985 *(((int *)entry) + 0) = io_apic_read(apic, 0x10 + 2 * i);
1988 spin_unlock_irqrestore(&ioapic_lock, flags);
1991 void ioapic_resume(void)
1993 struct IO_APIC_route_entry *entry = ioapic_pm_state;
1994 unsigned long flags;
1995 union IO_APIC_reg_00 reg_00;
1996 int i, apic;
1998 spin_lock_irqsave(&ioapic_lock, flags);
1999 for (apic = 0; apic < nr_ioapics; apic++){
2000 reg_00.raw = io_apic_read(apic, 0);
2001 if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) {
2002 reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
2003 io_apic_write(apic, 0, reg_00.raw);
2005 for (i = 0; i < nr_ioapic_registers[apic]; i++, entry++) {
2006 io_apic_write(apic, 0x11+2*i, *(((int *)entry)+1));
2007 io_apic_write(apic, 0x10+2*i, *(((int *)entry)+0));
2010 spin_unlock_irqrestore(&ioapic_lock, flags);
2013 /* --------------------------------------------------------------------------
2014 ACPI-based IOAPIC Configuration
2015 -------------------------------------------------------------------------- */
2017 #ifdef CONFIG_ACPI_BOOT
2019 int __init io_apic_get_unique_id (int ioapic, int apic_id)
2021 union IO_APIC_reg_00 reg_00;
2022 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
2023 physid_mask_t tmp;
2024 unsigned long flags;
2025 int i = 0;
2027 /*
2028 * The P4 platform supports up to 256 APIC IDs on two separate APIC
2029 * buses (one for LAPICs, one for IOAPICs), where predecessors only
2030 * supports up to 16 on one shared APIC bus.
2032 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
2033 * advantage of new APIC bus architecture.
2034 */
2036 if (physids_empty(apic_id_map))
2037 apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
2039 spin_lock_irqsave(&ioapic_lock, flags);
2040 reg_00.raw = io_apic_read(ioapic, 0);
2041 spin_unlock_irqrestore(&ioapic_lock, flags);
2043 if (apic_id >= get_physical_broadcast()) {
2044 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
2045 "%d\n", ioapic, apic_id, reg_00.bits.ID);
2046 apic_id = reg_00.bits.ID;
2049 /*
2050 * Every APIC in a system must have a unique ID or we get lots of nice
2051 * 'stuck on smp_invalidate_needed IPI wait' messages.
2052 */
2053 if (check_apicid_used(apic_id_map, apic_id)) {
2055 for (i = 0; i < get_physical_broadcast(); i++) {
2056 if (!check_apicid_used(apic_id_map, i))
2057 break;
2060 if (i == get_physical_broadcast())
2061 panic("Max apic_id exceeded!\n");
2063 printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
2064 "trying %d\n", ioapic, apic_id, i);
2066 apic_id = i;
2069 tmp = apicid_to_cpu_present(apic_id);
2070 physids_or(apic_id_map, apic_id_map, tmp);
2072 if (reg_00.bits.ID != apic_id) {
2073 reg_00.bits.ID = apic_id;
2075 spin_lock_irqsave(&ioapic_lock, flags);
2076 io_apic_write(ioapic, 0, reg_00.raw);
2077 reg_00.raw = io_apic_read(ioapic, 0);
2078 spin_unlock_irqrestore(&ioapic_lock, flags);
2080 /* Sanity check */
2081 if (reg_00.bits.ID != apic_id) {
2082 printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
2083 return -1;
2087 apic_printk(APIC_VERBOSE, KERN_INFO
2088 "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
2090 return apic_id;
2094 int __init io_apic_get_version (int ioapic)
2096 union IO_APIC_reg_01 reg_01;
2097 unsigned long flags;
2099 spin_lock_irqsave(&ioapic_lock, flags);
2100 reg_01.raw = io_apic_read(ioapic, 1);
2101 spin_unlock_irqrestore(&ioapic_lock, flags);
2103 return reg_01.bits.version;
2107 int __init io_apic_get_redir_entries (int ioapic)
2109 union IO_APIC_reg_01 reg_01;
2110 unsigned long flags;
2112 spin_lock_irqsave(&ioapic_lock, flags);
2113 reg_01.raw = io_apic_read(ioapic, 1);
2114 spin_unlock_irqrestore(&ioapic_lock, flags);
2116 return reg_01.bits.entries;
2120 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
2122 struct IO_APIC_route_entry entry;
2123 unsigned long flags;
2124 int vector;
2126 if (!IO_APIC_IRQ(irq)) {
2127 printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
2128 ioapic);
2129 return -EINVAL;
2132 /*
2133 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
2134 * Note that we mask (disable) IRQs now -- these get enabled when the
2135 * corresponding device driver registers for this IRQ.
2136 */
2138 memset(&entry,0,sizeof(entry));
2140 entry.delivery_mode = INT_DELIVERY_MODE;
2141 entry.dest_mode = INT_DEST_MODE;
2142 SET_DEST(entry.dest.dest32, entry.dest.logical.logical_dest,
2143 cpu_mask_to_apicid(TARGET_CPUS));
2144 entry.trigger = edge_level;
2145 entry.polarity = active_high_low;
2146 entry.mask = 1;
2148 /*
2149 * IRQs < 16 are already in the irq_2_pin[] map
2150 */
2151 if (!platform_legacy_irq(irq))
2152 add_pin_to_irq(irq, ioapic, pin);
2154 vector = assign_irq_vector(irq);
2155 if (vector < 0)
2156 return vector;
2157 entry.vector = vector;
2159 apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
2160 "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
2161 mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
2162 edge_level, active_high_low);
2164 ioapic_register_intr(irq, edge_level);
2166 if (!ioapic && platform_legacy_irq(irq))
2167 disable_8259A_irq(irq);
2169 spin_lock_irqsave(&ioapic_lock, flags);
2170 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
2171 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
2172 set_native_irq_info(irq, TARGET_CPUS);
2173 spin_unlock_irqrestore(&ioapic_lock, flags);
2175 return 0;
2178 #endif /*CONFIG_ACPI_BOOT*/
2180 static int ioapic_physbase_to_id(unsigned long physbase)
2182 int apic;
2183 for ( apic = 0; apic < nr_ioapics; apic++ )
2184 if ( mp_ioapics[apic].mpc_apicaddr == physbase )
2185 return apic;
2186 return -EINVAL;
2189 unsigned apic_gsi_base(int apic);
2191 static int apic_pin_2_gsi_irq(int apic, int pin)
2193 int idx, irq;
2195 if (apic < 0)
2196 return -EINVAL;
2198 irq = apic_gsi_base(apic) + pin;
2199 if (apic == 0) {
2200 idx = find_irq_entry(apic, pin, mp_INT);
2201 if (idx >= 0)
2202 irq = pin_2_irq(idx, apic, pin);
2204 return irq;
2207 int ioapic_guest_read(unsigned long physbase, unsigned int reg, u32 *pval)
2209 int apic;
2210 unsigned long flags;
2212 if ( (apic = ioapic_physbase_to_id(physbase)) < 0 )
2213 return apic;
2215 spin_lock_irqsave(&ioapic_lock, flags);
2216 *pval = io_apic_read(apic, reg);
2217 spin_unlock_irqrestore(&ioapic_lock, flags);
2219 return 0;
2222 #define WARN_BOGUS_WRITE(f, a...) \
2223 dprintk(XENLOG_INFO, "\n%s: " \
2224 "apic=%d, pin=%d, irq=%d\n" \
2225 "%s: new_entry=%08x\n" \
2226 "%s: " f, __FUNCTION__, apic, pin, irq, \
2227 __FUNCTION__, *(u32 *)&rte, \
2228 __FUNCTION__ , ##a )
2230 int ioapic_guest_write(unsigned long physbase, unsigned int reg, u32 val)
2232 int apic, pin, irq, ret, vector, pirq;
2233 struct IO_APIC_route_entry rte = { 0 };
2234 unsigned long flags;
2235 struct irq_cfg *cfg;
2236 struct irq_desc *desc;
2238 if ( (apic = ioapic_physbase_to_id(physbase)) < 0 )
2239 return apic;
2241 /* Only write to the first half of a route entry. */
2242 if ( (reg < 0x10) || (reg & 1) )
2243 return 0;
2245 pin = (reg - 0x10) >> 1;
2247 /* Write first half from guest; second half is target info. */
2248 *(u32 *)&rte = val;
2250 /*
2251 * What about weird destination types?
2252 * SMI: Ignore? Ought to be set up by the BIOS.
2253 * NMI: Ignore? Watchdog functionality is Xen's concern.
2254 * INIT: Definitely ignore: probably a guest OS bug.
2255 * ExtINT: Ignore? Linux only asserts this at start of day.
2256 * For now, print a message and return an error. We can fix up on demand.
2257 */
2258 if ( rte.delivery_mode > dest_LowestPrio )
2260 printk("ERROR: Attempt to write weird IOAPIC destination mode!\n");
2261 printk(" APIC=%d/%d, lo-reg=%x\n", apic, pin, val);
2262 return -EINVAL;
2265 /*
2266 * The guest does not know physical APIC arrangement (flat vs. cluster).
2267 * Apply genapic conventions for this platform.
2268 */
2269 rte.delivery_mode = INT_DELIVERY_MODE;
2270 rte.dest_mode = INT_DEST_MODE;
2272 irq = apic_pin_2_gsi_irq(apic, pin);
2273 if ( irq < 0 )
2274 return irq;
2276 desc = irq_to_desc(irq);
2277 cfg = desc->chip_data;
2279 /*
2280 * Since PHYSDEVOP_alloc_irq_vector is dummy, rte.vector is the pirq
2281 * which corresponds to this ioapic pin, retrieve it for building
2282 * pirq and irq mapping. Where the GSI is greater than 256, we assume
2283 * that dom0 pirq == irq.
2284 */
2285 pirq = (irq >= 256) ? irq : rte.vector;
2286 if ( (pirq < 0) || (pirq >= dom0->nr_pirqs) )
2287 return -EINVAL;
2289 if ( desc->action )
2291 WARN_BOGUS_WRITE("Attempt to modify IO-APIC pin for in-use IRQ!\n");
2292 return 0;
2295 if ( cfg->vector <= 0 || cfg->vector > LAST_DYNAMIC_VECTOR ) {
2297 printk("allocated vector for irq:%d\n", irq);
2299 vector = assign_irq_vector(irq);
2300 if ( vector < 0 )
2301 return vector;
2303 add_pin_to_irq(irq, apic, pin);
2305 spin_lock(&pcidevs_lock);
2306 spin_lock(&dom0->event_lock);
2307 ret = map_domain_pirq(dom0, pirq, irq,
2308 MAP_PIRQ_TYPE_GSI, NULL);
2309 spin_unlock(&dom0->event_lock);
2310 spin_unlock(&pcidevs_lock);
2311 if ( ret < 0 )
2312 return ret;
2314 spin_lock_irqsave(&ioapic_lock, flags);
2315 /* Set the correct irq-handling type. */
2316 desc->handler = rte.trigger ?
2317 &ioapic_level_type: &ioapic_edge_type;
2319 /* Mask iff level triggered. */
2320 rte.mask = rte.trigger;
2321 /* Set the vector field to the real vector! */
2322 rte.vector = cfg->vector;
2324 SET_DEST(rte.dest.dest32, rte.dest.logical.logical_dest,
2325 cpu_mask_to_apicid(cfg->domain));
2327 io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&rte) + 0));
2328 io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&rte) + 1));
2330 spin_unlock_irqrestore(&ioapic_lock, flags);
2332 return 0;
2335 void dump_ioapic_irq_info(void)
2337 struct irq_pin_list *entry;
2338 struct IO_APIC_route_entry rte;
2339 unsigned int irq, pin, printed = 0;
2340 unsigned long flags;
2342 for ( irq = 0; irq < nr_irqs_gsi; irq++ )
2344 entry = &irq_2_pin[irq];
2345 if ( entry->pin == -1 )
2346 continue;
2348 if ( !printed++ )
2349 printk("IO-APIC interrupt information:\n");
2351 printk(" IRQ%3d Vec%3d:\n", irq, irq_to_vector(irq));
2353 for ( ; ; )
2355 pin = entry->pin;
2357 printk(" Apic 0x%02x, Pin %2d: ", entry->apic, pin);
2359 spin_lock_irqsave(&ioapic_lock, flags);
2360 *(((int *)&rte) + 0) = io_apic_read(entry->apic, 0x10 + 2 * pin);
2361 *(((int *)&rte) + 1) = io_apic_read(entry->apic, 0x11 + 2 * pin);
2362 spin_unlock_irqrestore(&ioapic_lock, flags);
2364 printk("vector=%u, delivery_mode=%u, dest_mode=%s, "
2365 "delivery_status=%d, polarity=%d, irr=%d, "
2366 "trigger=%s, mask=%d, dest_id:%d\n",
2367 rte.vector, rte.delivery_mode,
2368 rte.dest_mode ? "logical" : "physical",
2369 rte.delivery_status, rte.polarity, rte.irr,
2370 rte.trigger ? "level" : "edge", rte.mask,
2371 rte.dest.logical.logical_dest);
2373 if ( entry->next == 0 )
2374 break;
2375 entry = &irq_2_pin[entry->next];
2380 unsigned highest_gsi(void);
2382 void __init init_ioapic_mappings(void)
2384 unsigned long ioapic_phys;
2385 unsigned int i, idx = FIX_IO_APIC_BASE_0;
2386 union IO_APIC_reg_01 reg_01;
2388 if ( smp_found_config )
2389 nr_irqs_gsi = 0;
2390 for ( i = 0; i < nr_ioapics; i++ )
2392 if ( smp_found_config )
2394 ioapic_phys = mp_ioapics[i].mpc_apicaddr;
2395 if ( !ioapic_phys )
2397 printk(KERN_ERR "WARNING: bogus zero IO-APIC address "
2398 "found in MPTABLE, disabling IO/APIC support!\n");
2399 smp_found_config = 0;
2400 skip_ioapic_setup = 1;
2401 goto fake_ioapic_page;
2404 else
2406 fake_ioapic_page:
2407 ioapic_phys = __pa(alloc_xenheap_page());
2408 clear_page(__va(ioapic_phys));
2410 set_fixmap_nocache(idx, ioapic_phys);
2411 apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
2412 __fix_to_virt(idx), ioapic_phys);
2413 idx++;
2415 if ( smp_found_config )
2417 /* The number of IO-APIC IRQ registers (== #pins): */
2418 reg_01.raw = io_apic_read(i, 1);
2419 nr_ioapic_registers[i] = reg_01.bits.entries + 1;
2420 nr_irqs_gsi += nr_ioapic_registers[i];
2424 nr_irqs_gsi = max(nr_irqs_gsi, highest_gsi());
2426 if ( !smp_found_config || skip_ioapic_setup || nr_irqs_gsi < 16 )
2427 nr_irqs_gsi = 16;
2428 else if ( nr_irqs_gsi > MAX_GSI_IRQS)
2430 /* for PHYSDEVOP_pirq_eoi_gmfn guest assumptions */
2431 printk(KERN_WARNING "Limiting number of GSI IRQs found (%u) to %lu\n",
2432 nr_irqs_gsi, MAX_GSI_IRQS);
2433 nr_irqs_gsi = MAX_GSI_IRQS;
2436 if (nr_irqs < 2 * nr_irqs_gsi)
2437 nr_irqs = 2 * nr_irqs_gsi;
2439 if (nr_irqs > MAX_NR_IRQS)
2440 nr_irqs = MAX_NR_IRQS;