xen-vtx-unstable
changeset 6538:a698bd49931b
Merge.
line diff
92.1 --- a/linux-2.4-xen-sparse/mkbuildtree Fri Aug 26 13:47:16 2005 -0700 92.2 +++ b/linux-2.4-xen-sparse/mkbuildtree Mon Aug 29 13:19:24 2005 -0700 92.3 @@ -102,9 +102,9 @@ for i in `find . -type l`; do rm -f $i; 92.4 relative_lndir ${RS} 92.5 rm -f mkbuildtree 92.6 92.7 -set ${RS}/../linux-2.6-xen-sparse 92.8 -[ "$1" == "${RS}/../linux-2.6-xen-sparse" ] && { echo "no Linux 2.6 sparse tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; } 92.9 -LINUX_26="$1" 92.10 +LINUX_26=${RS}/../linux-2.6-xen-sparse 92.11 +[ -d $LINUX_26 ] || { echo "no Linux 2.6 sparse tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; } 92.12 + 92.13 92.14 # Create links to the shared definitions of the Xen interfaces. 92.15 rm -rf ${AD}/include/asm-xen/xen-public
109.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Fri Aug 26 13:47:16 2005 -0700 109.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Mon Aug 29 13:19:24 2005 -0700 109.3 @@ -807,8 +807,8 @@ choice 109.4 direct access method and falls back to the BIOS if that doesn't 109.5 work. If unsure, go with the default, which is "Any". 109.6 109.7 -config PCI_GOBIOS 109.8 - bool "BIOS" 109.9 +#config PCI_GOBIOS 109.10 +# bool "BIOS" 109.11 109.12 config PCI_GOMMCONFIG 109.13 bool "MMConfig" 109.14 @@ -821,10 +821,10 @@ config PCI_GOANY 109.15 109.16 endchoice 109.17 109.18 -config PCI_BIOS 109.19 - bool 109.20 - depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) 109.21 - default y 109.22 +#config PCI_BIOS 109.23 +# bool 109.24 +# depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) 109.25 +# default y 109.26 109.27 config PCI_DIRECT 109.28 bool
113.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Fri Aug 26 13:47:16 2005 -0700 113.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Mon Aug 29 13:19:24 2005 -0700 113.3 @@ -610,7 +610,7 @@ static int __init acpi_parse_fadt(unsign 113.4 acpi_fadt.force_apic_physical_destination_mode = fadt->force_apic_physical_destination_mode; 113.5 #endif 113.6 113.7 -#ifdef CONFIG_X86_PM_TIMER 113.8 +#if defined(CONFIG_X86_PM_TIMER) && !defined(CONFIG_XEN) 113.9 /* detect the location of the ACPI PM Timer */ 113.10 if (fadt->revision >= FADT2_REVISION_ID) { 113.11 /* FADT rev. 2 */
130.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Fri Aug 26 13:47:16 2005 -0700 130.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Mon Aug 29 13:19:24 2005 -0700 130.3 @@ -135,6 +135,10 @@ static inline void play_dead(void) 130.4 * low exit latency (ie sit in a loop waiting for 130.5 * somebody to say that they'd like to reschedule) 130.6 */ 130.7 +#ifdef CONFIG_SMP 130.8 +extern void smp_suspend(void); 130.9 +extern void smp_resume(void); 130.10 +#endif 130.11 void cpu_idle (void) 130.12 { 130.13 int cpu = _smp_processor_id(); 130.14 @@ -149,6 +153,9 @@ void cpu_idle (void) 130.15 130.16 if (cpu_is_offline(cpu)) { 130.17 local_irq_disable(); 130.18 +#ifdef CONFIG_SMP 130.19 + smp_suspend(); 130.20 +#endif 130.21 #if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU) 130.22 /* Ack it. From this point on until 130.23 we get woken up, we're not allowed 130.24 @@ -159,6 +166,9 @@ void cpu_idle (void) 130.25 HYPERVISOR_vcpu_down(cpu); 130.26 #endif 130.27 play_dead(); 130.28 +#ifdef CONFIG_SMP 130.29 + smp_resume(); 130.30 +#endif 130.31 local_irq_enable(); 130.32 } 130.33 130.34 @@ -789,10 +799,3 @@ unsigned long arch_align_stack(unsigned 130.35 sp -= get_random_int() % 8192; 130.36 return sp & ~0xf; 130.37 } 130.38 - 130.39 - 130.40 -#ifndef CONFIG_X86_SMP 130.41 -void _restore_vcpu(void) 130.42 -{ 130.43 -} 130.44 -#endif
134.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Fri Aug 26 13:47:16 2005 -0700 134.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Mon Aug 29 13:19:24 2005 -0700 134.3 @@ -1601,32 +1601,71 @@ extern void local_teardown_timer_irq(voi 134.4 134.5 void smp_suspend(void) 134.6 { 134.7 - /* XXX todo: take down time and ipi's on all cpus */ 134.8 local_teardown_timer_irq(); 134.9 smp_intr_exit(); 134.10 } 134.11 134.12 void smp_resume(void) 134.13 { 134.14 - /* XXX todo: restore time and ipi's on all cpus */ 134.15 smp_intr_init(); 134.16 local_setup_timer_irq(); 134.17 } 134.18 134.19 -DECLARE_PER_CPU(int, timer_irq); 134.20 - 134.21 -void _restore_vcpu(void) 134.22 -{ 134.23 - int cpu = smp_processor_id(); 134.24 - extern atomic_t vcpus_rebooting; 134.25 +static atomic_t vcpus_rebooting; 134.26 134.27 - /* We are the first thing the vcpu runs when it comes back, 134.28 - and we are supposed to restore the IPIs and timer 134.29 - interrupts etc. When we return, the vcpu's idle loop will 134.30 - start up again. */ 134.31 - _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu)); 134.32 - _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu)); 134.33 - _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) ); 134.34 - _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) ); 134.35 +static void restore_vcpu_ready(void) 134.36 +{ 134.37 + 134.38 atomic_dec(&vcpus_rebooting); 134.39 } 134.40 + 134.41 +void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 134.42 +{ 134.43 + int r; 134.44 + int gdt_pages; 134.45 + r = HYPERVISOR_vcpu_pickle(vcpu, ctxt); 134.46 + if (r != 0) 134.47 + panic("pickling vcpu %d -> %d!\n", vcpu, r); 134.48 + 134.49 + /* Translate from machine to physical addresses where necessary, 134.50 + so that they can be translated to our new machine address space 134.51 + after resume. libxc is responsible for doing this to vcpu0, 134.52 + but we do it to the others. */ 134.53 + gdt_pages = (ctxt->gdt_ents + 511) / 512; 134.54 + ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]); 134.55 + for (r = 0; r < gdt_pages; r++) 134.56 + ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]); 134.57 +} 134.58 + 134.59 +int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 134.60 +{ 134.61 + int r; 134.62 + int gdt_pages = (ctxt->gdt_ents + 511) / 512; 134.63 + 134.64 + /* This is kind of a hack, and implicitly relies on the fact that 134.65 + the vcpu stops in a place where all of the call clobbered 134.66 + registers are already dead. */ 134.67 + ctxt->user_regs.esp -= 4; 134.68 + ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip; 134.69 + ctxt->user_regs.eip = (unsigned long)restore_vcpu_ready; 134.70 + 134.71 + /* De-canonicalise. libxc handles this for vcpu 0, but we need 134.72 + to do it for the other vcpus. */ 134.73 + ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]); 134.74 + for (r = 0; r < gdt_pages; r++) 134.75 + ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]); 134.76 + 134.77 + atomic_set(&vcpus_rebooting, 1); 134.78 + r = HYPERVISOR_boot_vcpu(vcpu, ctxt); 134.79 + if (r != 0) { 134.80 + printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r); 134.81 + return -1; 134.82 + } 134.83 + 134.84 + /* Make sure we wait for the new vcpu to come up before trying to do 134.85 + anything with it or starting the next one. */ 134.86 + while (atomic_read(&vcpus_rebooting)) 134.87 + barrier(); 134.88 + 134.89 + return 0; 134.90 +}
144.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Fri Aug 26 13:47:16 2005 -0700 144.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Mon Aug 29 13:19:24 2005 -0700 144.3 @@ -219,6 +219,8 @@ static void __init kernel_physical_mappi 144.4 } 144.5 } 144.6 144.7 +#ifndef CONFIG_XEN 144.8 + 144.9 static inline int page_kills_ppro(unsigned long pagenr) 144.10 { 144.11 if (pagenr >= 0x70000 && pagenr <= 0x7003F) 144.12 @@ -266,6 +268,13 @@ static inline int page_is_ram(unsigned l 144.13 return 0; 144.14 } 144.15 144.16 +#else /* CONFIG_XEN */ 144.17 + 144.18 +#define page_kills_ppro(p) 0 144.19 +#define page_is_ram(p) 1 144.20 + 144.21 +#endif 144.22 + 144.23 #ifdef CONFIG_HIGHMEM 144.24 pte_t *kmap_pte; 144.25 pgprot_t kmap_prot;
147.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Fri Aug 26 13:47:16 2005 -0700 147.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Mon Aug 29 13:19:24 2005 -0700 147.3 @@ -4,7 +4,7 @@ CFLAGS += -Iarch/$(XENARCH)/pci 147.4 147.5 c-obj-y := i386.o 147.6 147.7 -c-obj-$(CONFIG_PCI_BIOS) += pcbios.o 147.8 +#c-obj-$(CONFIG_PCI_BIOS) += pcbios.o 147.9 c-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o 147.10 c-obj-$(CONFIG_PCI_DIRECT) += direct.o 147.11
152.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Fri Aug 26 13:47:16 2005 -0700 152.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Mon Aug 29 13:19:24 2005 -0700 152.3 @@ -245,74 +245,6 @@ void unbind_virq_from_irq(int virq) 152.4 spin_unlock(&irq_mapping_update_lock); 152.5 } 152.6 152.7 -/* This is only used when a vcpu from an xm save. The ipi is expected 152.8 - to have been bound before we suspended, and so all of the xenolinux 152.9 - state is set up; we only need to restore the Xen side of things. 152.10 - The irq number has to be the same, but the evtchn number can 152.11 - change. */ 152.12 -void _bind_ipi_to_irq(int ipi, int vcpu, int irq) 152.13 -{ 152.14 - evtchn_op_t op; 152.15 - int evtchn; 152.16 - 152.17 - spin_lock(&irq_mapping_update_lock); 152.18 - 152.19 - op.cmd = EVTCHNOP_bind_ipi; 152.20 - if ( HYPERVISOR_event_channel_op(&op) != 0 ) 152.21 - panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu); 152.22 - evtchn = op.u.bind_ipi.port; 152.23 - 152.24 - printk("<0>IPI %d, old evtchn %d, evtchn %d.\n", 152.25 - ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi], 152.26 - evtchn); 152.27 - 152.28 - evtchn_to_irq[irq_to_evtchn[irq]] = -1; 152.29 - irq_to_evtchn[irq] = -1; 152.30 - 152.31 - evtchn_to_irq[evtchn] = irq; 152.32 - irq_to_evtchn[irq] = evtchn; 152.33 - 152.34 - printk("<0>evtchn_to_irq[%d] = %d.\n", evtchn, 152.35 - evtchn_to_irq[evtchn]); 152.36 - per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn; 152.37 - 152.38 - bind_evtchn_to_cpu(evtchn, vcpu); 152.39 - 152.40 - spin_unlock(&irq_mapping_update_lock); 152.41 - 152.42 - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask); 152.43 - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending); 152.44 -} 152.45 - 152.46 -void _bind_virq_to_irq(int virq, int cpu, int irq) 152.47 -{ 152.48 - evtchn_op_t op; 152.49 - int evtchn; 152.50 - 152.51 - spin_lock(&irq_mapping_update_lock); 152.52 - 152.53 - op.cmd = EVTCHNOP_bind_virq; 152.54 - op.u.bind_virq.virq = virq; 152.55 - if ( HYPERVISOR_event_channel_op(&op) != 0 ) 152.56 - panic("Failed to bind virtual IRQ %d\n", virq); 152.57 - evtchn = op.u.bind_virq.port; 152.58 - 152.59 - evtchn_to_irq[irq_to_evtchn[irq]] = -1; 152.60 - irq_to_evtchn[irq] = -1; 152.61 - 152.62 - evtchn_to_irq[evtchn] = irq; 152.63 - irq_to_evtchn[irq] = evtchn; 152.64 - 152.65 - per_cpu(virq_to_irq, cpu)[virq] = irq; 152.66 - 152.67 - bind_evtchn_to_cpu(evtchn, cpu); 152.68 - 152.69 - spin_unlock(&irq_mapping_update_lock); 152.70 - 152.71 - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask); 152.72 - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending); 152.73 -} 152.74 - 152.75 int bind_ipi_to_irq(int ipi) 152.76 { 152.77 evtchn_op_t op;
155.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Aug 26 13:47:16 2005 -0700 155.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Mon Aug 29 13:19:24 2005 -0700 155.3 @@ -65,66 +65,13 @@ static int shutting_down = SHUTDOWN_INVA 155.4 #define cpu_up(x) (-EOPNOTSUPP) 155.5 #endif 155.6 155.7 -static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 155.8 -{ 155.9 - int r; 155.10 - int gdt_pages; 155.11 - r = HYPERVISOR_vcpu_pickle(vcpu, ctxt); 155.12 - if (r != 0) 155.13 - panic("pickling vcpu %d -> %d!\n", vcpu, r); 155.14 - 155.15 - /* Translate from machine to physical addresses where necessary, 155.16 - so that they can be translated to our new machine address space 155.17 - after resume. libxc is responsible for doing this to vcpu0, 155.18 - but we do it to the others. */ 155.19 - gdt_pages = (ctxt->gdt_ents + 511) / 512; 155.20 - ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]); 155.21 - for (r = 0; r < gdt_pages; r++) 155.22 - ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]); 155.23 -} 155.24 - 155.25 -void _restore_vcpu(int cpu); 155.26 - 155.27 -atomic_t vcpus_rebooting; 155.28 - 155.29 -static int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 155.30 -{ 155.31 - int r; 155.32 - int gdt_pages = (ctxt->gdt_ents + 511) / 512; 155.33 - 155.34 - /* This is kind of a hack, and implicitly relies on the fact that 155.35 - the vcpu stops in a place where all of the call clobbered 155.36 - registers are already dead. */ 155.37 - ctxt->user_regs.esp -= 4; 155.38 - ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip; 155.39 - ctxt->user_regs.eip = (unsigned long)_restore_vcpu; 155.40 - 155.41 - /* De-canonicalise. libxc handles this for vcpu 0, but we need 155.42 - to do it for the other vcpus. */ 155.43 - ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]); 155.44 - for (r = 0; r < gdt_pages; r++) 155.45 - ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]); 155.46 - 155.47 - atomic_set(&vcpus_rebooting, 1); 155.48 - r = HYPERVISOR_boot_vcpu(vcpu, ctxt); 155.49 - if (r != 0) { 155.50 - printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r); 155.51 - return -1; 155.52 - } 155.53 - 155.54 - /* Make sure we wait for the new vcpu to come up before trying to do 155.55 - anything with it or starting the next one. */ 155.56 - while (atomic_read(&vcpus_rebooting)) 155.57 - barrier(); 155.58 - 155.59 - return 0; 155.60 -} 155.61 +#ifdef CONFIG_SMP 155.62 +#endif 155.63 155.64 static int __do_suspend(void *ignore) 155.65 { 155.66 int i, j; 155.67 suspend_record_t *suspend_record; 155.68 - static vcpu_guest_context_t suspended_cpu_records[NR_CPUS]; 155.69 155.70 /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */ 155.71 /* XXX SMH: yes it would :-( */ 155.72 @@ -138,16 +85,22 @@ static int __do_suspend(void *ignore) 155.73 extern int gnttab_suspend(void); 155.74 extern int gnttab_resume(void); 155.75 155.76 -#ifdef CONFIG_SMP 155.77 - extern void smp_suspend(void); 155.78 - extern void smp_resume(void); 155.79 -#endif 155.80 extern void time_suspend(void); 155.81 extern void time_resume(void); 155.82 extern unsigned long max_pfn; 155.83 extern unsigned int *pfn_to_mfn_frame_list; 155.84 155.85 +#ifdef CONFIG_SMP 155.86 + extern void smp_suspend(void); 155.87 + extern void smp_resume(void); 155.88 + 155.89 + static vcpu_guest_context_t suspended_cpu_records[NR_CPUS]; 155.90 cpumask_t prev_online_cpus, prev_present_cpus; 155.91 + 155.92 + void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt); 155.93 + int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt); 155.94 +#endif 155.95 + 155.96 int err = 0; 155.97 155.98 BUG_ON(smp_processor_id() != 0); 155.99 @@ -164,6 +117,8 @@ static int __do_suspend(void *ignore) 155.100 if ( suspend_record == NULL ) 155.101 goto out; 155.102 155.103 + preempt_disable(); 155.104 +#ifdef CONFIG_SMP 155.105 /* Take all of the other cpus offline. We need to be careful not 155.106 to get preempted between the final test for num_online_cpus() 155.107 == 1 and disabling interrupts, since otherwise userspace could 155.108 @@ -175,7 +130,6 @@ static int __do_suspend(void *ignore) 155.109 since by the time num_online_cpus() == 1, there aren't any 155.110 other cpus) */ 155.111 cpus_clear(prev_online_cpus); 155.112 - preempt_disable(); 155.113 while (num_online_cpus() > 1) { 155.114 preempt_enable(); 155.115 for_each_online_cpu(i) { 155.116 @@ -190,6 +144,7 @@ static int __do_suspend(void *ignore) 155.117 } 155.118 preempt_disable(); 155.119 } 155.120 +#endif 155.121 155.122 suspend_record->nr_pfns = max_pfn; /* final number of pfns */ 155.123 155.124 @@ -197,6 +152,7 @@ static int __do_suspend(void *ignore) 155.125 155.126 preempt_enable(); 155.127 155.128 +#ifdef CONFIG_SMP 155.129 cpus_clear(prev_present_cpus); 155.130 for_each_present_cpu(i) { 155.131 if (i == 0) 155.132 @@ -204,6 +160,7 @@ static int __do_suspend(void *ignore) 155.133 save_vcpu_context(i, &suspended_cpu_records[i]); 155.134 cpu_set(i, prev_present_cpus); 155.135 } 155.136 +#endif 155.137 155.138 #ifdef __i386__ 155.139 mm_pin_all(); 155.140 @@ -269,12 +226,14 @@ static int __do_suspend(void *ignore) 155.141 155.142 usbif_resume(); 155.143 155.144 - for_each_cpu_mask(i, prev_present_cpus) { 155.145 +#ifdef CONFIG_SMP 155.146 + for_each_cpu_mask(i, prev_present_cpus) 155.147 restore_vcpu_context(i, &suspended_cpu_records[i]); 155.148 - } 155.149 +#endif 155.150 155.151 __sti(); 155.152 155.153 +#ifdef CONFIG_SMP 155.154 out_reenable_cpus: 155.155 for_each_cpu_mask(i, prev_online_cpus) { 155.156 j = cpu_up(i); 155.157 @@ -284,6 +243,7 @@ static int __do_suspend(void *ignore) 155.158 err = j; 155.159 } 155.160 } 155.161 +#endif 155.162 155.163 out: 155.164 if ( suspend_record != NULL )
165.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Fri Aug 26 13:47:16 2005 -0700 165.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Mon Aug 29 13:19:24 2005 -0700 165.3 @@ -40,7 +40,7 @@ obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o 165.4 i386-obj-$(CONFIG_DUMMY_IOMMU) += pci-dma.o 165.5 i386-obj-$(CONFIG_SWIOTLB) += swiotlb.o 165.6 obj-$(CONFIG_KPROBES) += kprobes.o 165.7 -obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 165.8 +#obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 165.9 165.10 c-obj-$(CONFIG_MODULES) += module.o 165.11
186.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Fri Aug 26 13:47:16 2005 -0700 186.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Mon Aug 29 13:19:24 2005 -0700 186.3 @@ -1277,21 +1277,23 @@ extern void local_teardown_timer_irq(voi 186.4 186.5 void smp_suspend(void) 186.6 { 186.7 - /* XXX todo: take down time and ipi's on all cpus */ 186.8 local_teardown_timer_irq(); 186.9 smp_intr_exit(); 186.10 } 186.11 186.12 void smp_resume(void) 186.13 { 186.14 - /* XXX todo: restore time and ipi's on all cpus */ 186.15 smp_intr_init(); 186.16 local_setup_timer_irq(); 186.17 } 186.18 186.19 -void _restore_vcpu(void) 186.20 +void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 186.21 { 186.22 - /* XXX need to write this */ 186.23 +} 186.24 + 186.25 +int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 186.26 +{ 186.27 + return 0; 186.28 } 186.29 186.30 #endif
193.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Fri Aug 26 13:47:16 2005 -0700 193.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Mon Aug 29 13:19:24 2005 -0700 193.3 @@ -767,9 +767,6 @@ void __init clear_kernel_mapping(unsigne 193.4 193.5 static inline int page_is_ram (unsigned long pagenr) 193.6 { 193.7 - if (pagenr < start_pfn || pagenr >= end_pfn) 193.8 - return 0; 193.9 - 193.10 return 1; 193.11 } 193.12
203.1 --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Aug 26 13:47:16 2005 -0700 203.2 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Mon Aug 29 13:19:24 2005 -0700 203.3 @@ -295,10 +295,10 @@ static struct xenbus_watch target_watch 203.4 /* React to a change in the target key */ 203.5 static void watch_target(struct xenbus_watch *watch, const char *node) 203.6 { 203.7 - unsigned long new_target; 203.8 + unsigned long long new_target; 203.9 int err; 203.10 203.11 - err = xenbus_scanf("memory", "target", "%lu", &new_target); 203.12 + err = xenbus_scanf("memory", "target", "%llu", &new_target); 203.13 if (err != 1) { 203.14 printk(KERN_ERR "Unable to read memory/target\n"); 203.15 return;
212.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Aug 26 13:47:16 2005 -0700 212.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Mon Aug 29 13:19:24 2005 -0700 212.3 @@ -32,23 +32,15 @@ 212.4 */ 212.5 212.6 #if 1 212.7 -#define ASSERT(_p) \ 212.8 - if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ 212.9 - __LINE__, __FILE__); *(int*)0=0; } 212.10 +#define ASSERT(p) \ 212.11 + if (!(p)) { printk("Assertion '%s' failed, line %d, file %s", #p , \ 212.12 + __LINE__, __FILE__); *(int*)0=0; } 212.13 #else 212.14 #define ASSERT(_p) 212.15 #endif 212.16 212.17 #include <linux/version.h> 212.18 - 212.19 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 212.20 #include "block.h" 212.21 -#else 212.22 -#include "common.h" 212.23 -#include <linux/blk.h> 212.24 -#include <linux/tqueue.h> 212.25 -#endif 212.26 - 212.27 #include <linux/cdrom.h> 212.28 #include <linux/sched.h> 212.29 #include <linux/interrupt.h> 212.30 @@ -58,91 +50,57 @@ 212.31 #include <asm-xen/xen-public/grant_table.h> 212.32 #include <asm-xen/gnttab.h> 212.33 212.34 -typedef unsigned char byte; /* from linux/ide.h */ 212.35 - 212.36 -/* Control whether runtime update of vbds is enabled. */ 212.37 -#define ENABLE_VBD_UPDATE 1 212.38 - 212.39 #define BLKIF_STATE_DISCONNECTED 0 212.40 #define BLKIF_STATE_CONNECTED 1 212.41 212.42 static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED; 212.43 212.44 -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 212.45 - 212.46 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 212.47 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) 212.48 #define GRANTREF_INVALID (1<<15) 212.49 212.50 -static struct blk_shadow { 212.51 - blkif_request_t req; 212.52 - unsigned long request; 212.53 - unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 212.54 -} blk_shadow[BLK_RING_SIZE]; 212.55 -unsigned long blk_shadow_free; 212.56 - 212.57 static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */ 212.58 212.59 static void kick_pending_request_queues(struct blkfront_info *info); 212.60 212.61 -static int __init xlblk_init(void); 212.62 - 212.63 static void blkif_completion(struct blk_shadow *s); 212.64 212.65 -static inline int GET_ID_FROM_FREELIST(void) 212.66 +static inline int GET_ID_FROM_FREELIST( 212.67 + struct blkfront_info *info) 212.68 { 212.69 - unsigned long free = blk_shadow_free; 212.70 - BUG_ON(free > BLK_RING_SIZE); 212.71 - blk_shadow_free = blk_shadow[free].req.id; 212.72 - blk_shadow[free].req.id = 0x0fffffee; /* debug */ 212.73 - return free; 212.74 + unsigned long free = info->shadow_free; 212.75 + BUG_ON(free > BLK_RING_SIZE); 212.76 + info->shadow_free = info->shadow[free].req.id; 212.77 + info->shadow[free].req.id = 0x0fffffee; /* debug */ 212.78 + return free; 212.79 } 212.80 212.81 -static inline void ADD_ID_TO_FREELIST(unsigned long id) 212.82 +static inline void ADD_ID_TO_FREELIST( 212.83 + struct blkfront_info *info, unsigned long id) 212.84 { 212.85 - blk_shadow[id].req.id = blk_shadow_free; 212.86 - blk_shadow[id].request = 0; 212.87 - blk_shadow_free = id; 212.88 + info->shadow[id].req.id = info->shadow_free; 212.89 + info->shadow[id].request = 0; 212.90 + info->shadow_free = id; 212.91 } 212.92 212.93 - 212.94 -/************************ COMMON CODE (inlined) ************************/ 212.95 - 212.96 -/* Kernel-specific definitions used in the common code */ 212.97 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 212.98 -#define DISABLE_SCATTERGATHER() 212.99 -#else 212.100 -static int sg_operation = -1; 212.101 -#define DISABLE_SCATTERGATHER() (sg_operation = -1) 212.102 -#endif 212.103 - 212.104 static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r) 212.105 { 212.106 212.107 - s->req = *r; 212.108 + s->req = *r; 212.109 } 212.110 212.111 static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s) 212.112 { 212.113 212.114 - *r = s->req; 212.115 + *r = s->req; 212.116 } 212.117 212.118 - 212.119 static inline void flush_requests(struct blkfront_info *info) 212.120 { 212.121 - DISABLE_SCATTERGATHER(); 212.122 - RING_PUSH_REQUESTS(&info->ring); 212.123 - notify_via_evtchn(info->evtchn); 212.124 + RING_PUSH_REQUESTS(&info->ring); 212.125 + notify_via_evtchn(info->evtchn); 212.126 } 212.127 212.128 - 212.129 -/************************** KERNEL VERSION 2.6 **************************/ 212.130 - 212.131 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 212.132 - 212.133 -module_init(xlblk_init); 212.134 - 212.135 static void kick_pending_request_queues(struct blkfront_info *info) 212.136 { 212.137 if (!RING_FULL(&info->ring)) { 212.138 @@ -169,50 +127,44 @@ static void blkif_restart_queue_callback 212.139 212.140 int blkif_open(struct inode *inode, struct file *filep) 212.141 { 212.142 - // struct gendisk *gd = inode->i_bdev->bd_disk; 212.143 - // struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; 212.144 - 212.145 - /* Update of usage count is protected by per-device semaphore. */ 212.146 - // di->mi->usage++; 212.147 - 212.148 return 0; 212.149 } 212.150 212.151 212.152 int blkif_release(struct inode *inode, struct file *filep) 212.153 { 212.154 - /* FIXME: This is where we can actually free up majors, etc. --RR */ 212.155 - return 0; 212.156 + return 0; 212.157 } 212.158 212.159 212.160 int blkif_ioctl(struct inode *inode, struct file *filep, 212.161 unsigned command, unsigned long argument) 212.162 { 212.163 - int i; 212.164 + int i; 212.165 212.166 - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 212.167 - command, (long)argument, inode->i_rdev); 212.168 + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 212.169 + command, (long)argument, inode->i_rdev); 212.170 212.171 - switch ( command ) 212.172 - { 212.173 - case HDIO_GETGEO: 212.174 - /* return ENOSYS to use defaults */ 212.175 - return -ENOSYS; 212.176 + switch ( command ) 212.177 + { 212.178 + case HDIO_GETGEO: 212.179 + /* return ENOSYS to use defaults */ 212.180 + return -ENOSYS; 212.181 212.182 - case CDROMMULTISESSION: 212.183 - DPRINTK("FIXME: support multisession CDs later\n"); 212.184 - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) 212.185 - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; 212.186 - return 0; 212.187 + case CDROMMULTISESSION: 212.188 + DPRINTK("FIXME: support multisession CDs later\n"); 212.189 + for (i = 0; i < sizeof(struct cdrom_multisession); i++) 212.190 + if (put_user(0, (char *)(argument + i))) 212.191 + return -EFAULT; 212.192 + return 0; 212.193 212.194 - default: 212.195 - /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", 212.196 - command);*/ 212.197 - return -EINVAL; /* same return as native Linux */ 212.198 - } 212.199 + default: 212.200 + /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", 212.201 + command);*/ 212.202 + return -EINVAL; /* same return as native Linux */ 212.203 + } 212.204 212.205 - return 0; 212.206 + return 0; 212.207 } 212.208 212.209 212.210 @@ -228,76 +180,77 @@ int blkif_ioctl(struct inode *inode, str 212.211 */ 212.212 static int blkif_queue_request(struct request *req) 212.213 { 212.214 - struct blkfront_info *info = req->rq_disk->private_data; 212.215 - unsigned long buffer_ma; 212.216 - blkif_request_t *ring_req; 212.217 - struct bio *bio; 212.218 - struct bio_vec *bvec; 212.219 - int idx; 212.220 - unsigned long id; 212.221 - unsigned int fsect, lsect; 212.222 - int ref; 212.223 - grant_ref_t gref_head; 212.224 + struct blkfront_info *info = req->rq_disk->private_data; 212.225 + unsigned long buffer_ma; 212.226 + blkif_request_t *ring_req; 212.227 + struct bio *bio; 212.228 + struct bio_vec *bvec; 212.229 + int idx; 212.230 + unsigned long id; 212.231 + unsigned int fsect, lsect; 212.232 + int ref; 212.233 + grant_ref_t gref_head; 212.234 212.235 - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 212.236 - return 1; 212.237 + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 212.238 + return 1; 212.239 212.240 - if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST, 212.241 - &gref_head) < 0) { 212.242 - gnttab_request_free_callback(&info->callback, 212.243 - blkif_restart_queue_callback, info, 212.244 - BLKIF_MAX_SEGMENTS_PER_REQUEST); 212.245 - return 1; 212.246 - } 212.247 + if (gnttab_alloc_grant_references( 212.248 + BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 212.249 + gnttab_request_free_callback( 212.250 + &info->callback, 212.251 + blkif_restart_queue_callback, 212.252 + info, 212.253 + BLKIF_MAX_SEGMENTS_PER_REQUEST); 212.254 + return 1; 212.255 + } 212.256 212.257 - /* Fill out a communications ring structure. */ 212.258 - ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 212.259 - id = GET_ID_FROM_FREELIST(); 212.260 - blk_shadow[id].request = (unsigned long)req; 212.261 + /* Fill out a communications ring structure. */ 212.262 + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 212.263 + id = GET_ID_FROM_FREELIST(info); 212.264 + info->shadow[id].request = (unsigned long)req; 212.265 212.266 - ring_req->id = id; 212.267 - ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; 212.268 - ring_req->sector_number = (blkif_sector_t)req->sector; 212.269 - ring_req->handle = info->handle; 212.270 + ring_req->id = id; 212.271 + ring_req->operation = rq_data_dir(req) ? 212.272 + BLKIF_OP_WRITE : BLKIF_OP_READ; 212.273 + ring_req->sector_number = (blkif_sector_t)req->sector; 212.274 + ring_req->handle = info->handle; 212.275 212.276 - ring_req->nr_segments = 0; 212.277 - rq_for_each_bio(bio, req) 212.278 - { 212.279 - bio_for_each_segment(bvec, bio, idx) 212.280 - { 212.281 - if ( ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST ) 212.282 - BUG(); 212.283 - buffer_ma = page_to_phys(bvec->bv_page); 212.284 - fsect = bvec->bv_offset >> 9; 212.285 - lsect = fsect + (bvec->bv_len >> 9) - 1; 212.286 - /* install a grant reference. */ 212.287 - ref = gnttab_claim_grant_reference(&gref_head); 212.288 - ASSERT( ref != -ENOSPC ); 212.289 + ring_req->nr_segments = 0; 212.290 + rq_for_each_bio (bio, req) { 212.291 + bio_for_each_segment (bvec, bio, idx) { 212.292 + BUG_ON(ring_req->nr_segments 212.293 + == BLKIF_MAX_SEGMENTS_PER_REQUEST); 212.294 + buffer_ma = page_to_phys(bvec->bv_page); 212.295 + fsect = bvec->bv_offset >> 9; 212.296 + lsect = fsect + (bvec->bv_len >> 9) - 1; 212.297 + /* install a grant reference. */ 212.298 + ref = gnttab_claim_grant_reference(&gref_head); 212.299 + ASSERT(ref != -ENOSPC); 212.300 + 212.301 + gnttab_grant_foreign_access_ref( 212.302 + ref, 212.303 + info->backend_id, 212.304 + buffer_ma >> PAGE_SHIFT, 212.305 + rq_data_dir(req) ); 212.306 212.307 - gnttab_grant_foreign_access_ref( 212.308 - ref, 212.309 - info->backend_id, 212.310 - buffer_ma >> PAGE_SHIFT, 212.311 - rq_data_dir(req) ); 212.312 + info->shadow[id].frame[ring_req->nr_segments] = 212.313 + buffer_ma >> PAGE_SHIFT; 212.314 212.315 - blk_shadow[id].frame[ring_req->nr_segments] = 212.316 - buffer_ma >> PAGE_SHIFT; 212.317 - 212.318 - ring_req->frame_and_sects[ring_req->nr_segments] = 212.319 - blkif_fas_from_gref(ref, fsect, lsect); 212.320 + ring_req->frame_and_sects[ring_req->nr_segments] = 212.321 + blkif_fas_from_gref(ref, fsect, lsect); 212.322 212.323 - ring_req->nr_segments++; 212.324 - } 212.325 - } 212.326 + ring_req->nr_segments++; 212.327 + } 212.328 + } 212.329 212.330 - info->ring.req_prod_pvt++; 212.331 + info->ring.req_prod_pvt++; 212.332 212.333 - /* Keep a private copy so we can reissue requests when recovering. */ 212.334 - pickle_request(&blk_shadow[id], ring_req); 212.335 + /* Keep a private copy so we can reissue requests when recovering. */ 212.336 + pickle_request(&info->shadow[id], ring_req); 212.337 212.338 - gnttab_free_grant_references(gref_head); 212.339 + gnttab_free_grant_references(gref_head); 212.340 212.341 - return 0; 212.342 + return 0; 212.343 } 212.344 212.345 /* 212.346 @@ -306,756 +259,197 @@ static int blkif_queue_request(struct re 212.347 */ 212.348 void do_blkif_request(request_queue_t *rq) 212.349 { 212.350 - struct blkfront_info *info = NULL; 212.351 - struct request *req; 212.352 - int queued; 212.353 + struct blkfront_info *info = NULL; 212.354 + struct request *req; 212.355 + int queued; 212.356 212.357 - DPRINTK("Entered do_blkif_request\n"); 212.358 + DPRINTK("Entered do_blkif_request\n"); 212.359 212.360 - queued = 0; 212.361 + queued = 0; 212.362 212.363 - while ( (req = elv_next_request(rq)) != NULL ) 212.364 - { 212.365 - info = req->rq_disk->private_data; 212.366 + while ((req = elv_next_request(rq)) != NULL) { 212.367 + info = req->rq_disk->private_data; 212.368 212.369 - if ( !blk_fs_request(req) ) 212.370 - { 212.371 - end_request(req, 0); 212.372 - continue; 212.373 - } 212.374 + if (!blk_fs_request(req)) { 212.375 + end_request(req, 0); 212.376 + continue; 212.377 + } 212.378 + 212.379 + if (RING_FULL(&info->ring)) 212.380 + goto wait; 212.381 212.382 - if (RING_FULL(&info->ring)) 212.383 - goto wait; 212.384 - 212.385 - DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n", 212.386 - req, req->cmd, req->sector, req->current_nr_sectors, 212.387 - req->nr_sectors, req->buffer, 212.388 - rq_data_dir(req) ? "write" : "read"); 212.389 + DPRINTK("do_blk_req %p: cmd %p, sec %lx, " 212.390 + "(%u/%li) buffer:%p [%s]\n", 212.391 + req, req->cmd, req->sector, req->current_nr_sectors, 212.392 + req->nr_sectors, req->buffer, 212.393 + rq_data_dir(req) ? "write" : "read"); 212.394 212.395 - blkdev_dequeue_request(req); 212.396 - if (blkif_queue_request(req)) { 212.397 - blk_requeue_request(rq, req); 212.398 - wait: 212.399 - /* Avoid pointless unplugs. */ 212.400 - blk_stop_queue(rq); 212.401 - break; 212.402 - } 212.403 + blkdev_dequeue_request(req); 212.404 + if (blkif_queue_request(req)) { 212.405 + blk_requeue_request(rq, req); 212.406 + wait: 212.407 + /* Avoid pointless unplugs. */ 212.408 + blk_stop_queue(rq); 212.409 + break; 212.410 + } 212.411 212.412 - queued++; 212.413 - } 212.414 + queued++; 212.415 + } 212.416 212.417 - if ( queued != 0 ) 212.418 - flush_requests(info); 212.419 + if (queued != 0) 212.420 + flush_requests(info); 212.421 } 212.422 212.423 212.424 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) 212.425 { 212.426 - struct request *req; 212.427 - blkif_response_t *bret; 212.428 - RING_IDX i, rp; 212.429 - unsigned long flags; 212.430 - struct blkfront_info *info = (struct blkfront_info *)dev_id; 212.431 - 212.432 - spin_lock_irqsave(&blkif_io_lock, flags); 212.433 - 212.434 - if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) { 212.435 - spin_unlock_irqrestore(&blkif_io_lock, flags); 212.436 - return IRQ_HANDLED; 212.437 - } 212.438 - 212.439 - rp = info->ring.sring->rsp_prod; 212.440 - rmb(); /* Ensure we see queued responses up to 'rp'. */ 212.441 - 212.442 - for ( i = info->ring.rsp_cons; i != rp; i++ ) 212.443 - { 212.444 - unsigned long id; 212.445 - 212.446 - bret = RING_GET_RESPONSE(&info->ring, i); 212.447 - id = bret->id; 212.448 - req = (struct request *)blk_shadow[id].request; 212.449 - 212.450 - blkif_completion(&blk_shadow[id]); 212.451 - 212.452 - ADD_ID_TO_FREELIST(id); 212.453 - 212.454 - switch ( bret->operation ) 212.455 - { 212.456 - case BLKIF_OP_READ: 212.457 - case BLKIF_OP_WRITE: 212.458 - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) 212.459 - DPRINTK("Bad return from blkdev data request: %x\n", 212.460 - bret->status); 212.461 - 212.462 - if ( unlikely(end_that_request_first 212.463 - (req, 212.464 - (bret->status == BLKIF_RSP_OKAY), 212.465 - req->hard_nr_sectors)) ) 212.466 - BUG(); 212.467 - end_that_request_last(req); 212.468 - 212.469 - break; 212.470 - default: 212.471 - BUG(); 212.472 - } 212.473 - } 212.474 - 212.475 - info->ring.rsp_cons = i; 212.476 - 212.477 - kick_pending_request_queues(info); 212.478 - 212.479 - spin_unlock_irqrestore(&blkif_io_lock, flags); 212.480 - 212.481 - return IRQ_HANDLED; 212.482 -} 212.483 - 212.484 -#else 212.485 -/************************** KERNEL VERSION 2.4 **************************/ 212.486 - 212.487 -static kdev_t sg_dev; 212.488 -static unsigned long sg_next_sect; 212.489 - 212.490 -/* 212.491 - * Request queues with outstanding work, but ring is currently full. 212.492 - * We need no special lock here, as we always access this with the 212.493 - * blkif_io_lock held. We only need a small maximum list. 212.494 - */ 212.495 -#define MAX_PENDING 8 212.496 -static request_queue_t *pending_queues[MAX_PENDING]; 212.497 -static int nr_pending; 212.498 + struct request *req; 212.499 + blkif_response_t *bret; 212.500 + RING_IDX i, rp; 212.501 + unsigned long flags; 212.502 + struct blkfront_info *info = (struct blkfront_info *)dev_id; 212.503 212.504 - 212.505 -#define blkif_io_lock io_request_lock 212.506 - 212.507 -/*============================================================================*/ 212.508 -static void kick_pending_request_queues(void) 212.509 -{ 212.510 - /* We kick pending request queues if the ring is reasonably empty. */ 212.511 - if ( (nr_pending != 0) && 212.512 - (RING_PENDING_REQUESTS(&info->ring) < (BLK_RING_SIZE >> 1)) ) 212.513 - { 212.514 - /* Attempt to drain the queue, but bail if the ring becomes full. */ 212.515 - while ( (nr_pending != 0) && !RING_FULL(&info->ring) ) 212.516 - do_blkif_request(pending_queues[--nr_pending]); 212.517 - } 212.518 -} 212.519 - 212.520 -int blkif_open(struct inode *inode, struct file *filep) 212.521 -{ 212.522 - short xldev = inode->i_rdev; 212.523 - struct gendisk *gd = get_gendisk(xldev); 212.524 - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 212.525 - short minor = MINOR(xldev); 212.526 - 212.527 - if ( gd->part[minor].nr_sects == 0 ) 212.528 - { 212.529 - /* 212.530 - * Device either doesn't exist, or has zero capacity; we use a few 212.531 - * cheesy heuristics to return the relevant error code 212.532 - */ 212.533 - if ( (gd->sizes[minor >> gd->minor_shift] != 0) || 212.534 - ((minor & (gd->max_p - 1)) != 0) ) 212.535 - { 212.536 - /* 212.537 - * We have a real device, but no such partition, or we just have a 212.538 - * partition number so guess this is the problem. 212.539 - */ 212.540 - return -ENXIO; /* no such device or address */ 212.541 - } 212.542 - else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) 212.543 - { 212.544 - /* This is a removable device => assume that media is missing. */ 212.545 - return -ENOMEDIUM; /* media not present (this is a guess) */ 212.546 - } 212.547 - else 212.548 - { 212.549 - /* Just go for the general 'no such device' error. */ 212.550 - return -ENODEV; /* no such device */ 212.551 - } 212.552 - } 212.553 - 212.554 - /* Update of usage count is protected by per-device semaphore. */ 212.555 - disk->usage++; 212.556 - 212.557 - return 0; 212.558 -} 212.559 - 212.560 - 212.561 -int blkif_release(struct inode *inode, struct file *filep) 212.562 -{ 212.563 - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 212.564 - 212.565 - /* 212.566 - * When usage drops to zero it may allow more VBD updates to occur. 212.567 - * Update of usage count is protected by a per-device semaphore. 212.568 - */ 212.569 - if ( --disk->usage == 0 ) { 212.570 - vbd_update(); 212.571 - } 212.572 - 212.573 - return 0; 212.574 -} 212.575 - 212.576 + spin_lock_irqsave(&blkif_io_lock, flags); 212.577 212.578 -int blkif_ioctl(struct inode *inode, struct file *filep, 212.579 - unsigned command, unsigned long argument) 212.580 -{ 212.581 - kdev_t dev = inode->i_rdev; 212.582 - struct hd_geometry *geo = (struct hd_geometry *)argument; 212.583 - struct gendisk *gd; 212.584 - struct hd_struct *part; 212.585 - int i; 212.586 - unsigned short cylinders; 212.587 - byte heads, sectors; 212.588 - 212.589 - /* NB. No need to check permissions. That is done for us. */ 212.590 - 212.591 - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 212.592 - command, (long) argument, dev); 212.593 - 212.594 - gd = get_gendisk(dev); 212.595 - part = &gd->part[MINOR(dev)]; 212.596 - 212.597 - switch ( command ) 212.598 - { 212.599 - case BLKGETSIZE: 212.600 - DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 212.601 - return put_user(part->nr_sects, (unsigned long *) argument); 212.602 - 212.603 - case BLKGETSIZE64: 212.604 - DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, 212.605 - (u64)part->nr_sects * 512); 212.606 - return put_user((u64)part->nr_sects * 512, (u64 *) argument); 212.607 - 212.608 - case BLKRRPART: /* re-read partition table */ 212.609 - DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); 212.610 - return blkif_revalidate(dev); 212.611 - 212.612 - case BLKSSZGET: 212.613 - return hardsect_size[MAJOR(dev)][MINOR(dev)]; 212.614 + if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) { 212.615 + spin_unlock_irqrestore(&blkif_io_lock, flags); 212.616 + return IRQ_HANDLED; 212.617 + } 212.618 212.619 - case BLKBSZGET: /* get block size */ 212.620 - DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); 212.621 - break; 212.622 - 212.623 - case BLKBSZSET: /* set block size */ 212.624 - DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); 212.625 - break; 212.626 - 212.627 - case BLKRASET: /* set read-ahead */ 212.628 - DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); 212.629 - break; 212.630 - 212.631 - case BLKRAGET: /* get read-ahead */ 212.632 - DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); 212.633 - break; 212.634 - 212.635 - case HDIO_GETGEO: 212.636 - DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); 212.637 - if (!argument) return -EINVAL; 212.638 - 212.639 - /* We don't have real geometry info, but let's at least return 212.640 - values consistent with the size of the device */ 212.641 - 212.642 - heads = 0xff; 212.643 - sectors = 0x3f; 212.644 - cylinders = part->nr_sects / (heads * sectors); 212.645 - 212.646 - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 212.647 - if (put_user(heads, (byte *)&geo->heads)) return -EFAULT; 212.648 - if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT; 212.649 - if (put_user(cylinders, (unsigned short *)&geo->cylinders)) return -EFAULT; 212.650 - 212.651 - return 0; 212.652 - 212.653 - case HDIO_GETGEO_BIG: 212.654 - DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); 212.655 - if (!argument) return -EINVAL; 212.656 - 212.657 - /* We don't have real geometry info, but let's at least return 212.658 - values consistent with the size of the device */ 212.659 + rp = info->ring.sring->rsp_prod; 212.660 + rmb(); /* Ensure we see queued responses up to 'rp'. */ 212.661 212.662 - heads = 0xff; 212.663 - sectors = 0x3f; 212.664 - cylinders = part->nr_sects / (heads * sectors); 212.665 - 212.666 - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 212.667 - if (put_user(heads, (byte *)&geo->heads)) return -EFAULT; 212.668 - if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT; 212.669 - if (put_user(cylinders, (unsigned int *) &geo->cylinders)) return -EFAULT; 212.670 - 212.671 - return 0; 212.672 - 212.673 - case CDROMMULTISESSION: 212.674 - DPRINTK("FIXME: support multisession CDs later\n"); 212.675 - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) 212.676 - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; 212.677 - return 0; 212.678 - 212.679 - case SCSI_IOCTL_GET_BUS_NUMBER: 212.680 - DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif"); 212.681 - return -ENOSYS; 212.682 - 212.683 - default: 212.684 - WPRINTK("ioctl %08x not supported by XL blkif\n", command); 212.685 - return -ENOSYS; 212.686 - } 212.687 - 212.688 - return 0; 212.689 -} 212.690 - 212.691 - 212.692 - 212.693 -/* check media change: should probably do something here in some cases :-) */ 212.694 -int blkif_check(kdev_t dev) 212.695 -{ 212.696 - DPRINTK("blkif_check\n"); 212.697 - return 0; 212.698 -} 212.699 + for (i = info->ring.rsp_cons; i != rp; i++) { 212.700 + unsigned long id; 212.701 212.702 -int blkif_revalidate(kdev_t dev) 212.703 -{ 212.704 - struct block_device *bd; 212.705 - struct gendisk *gd; 212.706 - xl_disk_t *disk; 212.707 - unsigned long capacity; 212.708 - int i, rc = 0; 212.709 - 212.710 - if ( (bd = bdget(dev)) == NULL ) 212.711 - return -EINVAL; 212.712 - 212.713 - /* 212.714 - * Update of partition info, and check of usage count, is protected 212.715 - * by the per-block-device semaphore. 212.716 - */ 212.717 - down(&bd->bd_sem); 212.718 + bret = RING_GET_RESPONSE(&info->ring, i); 212.719 + id = bret->id; 212.720 + req = (struct request *)info->shadow[id].request; 212.721 212.722 - if ( ((gd = get_gendisk(dev)) == NULL) || 212.723 - ((disk = xldev_to_xldisk(dev)) == NULL) || 212.724 - ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) 212.725 - { 212.726 - rc = -EINVAL; 212.727 - goto out; 212.728 - } 212.729 - 212.730 - if ( disk->usage > 1 ) 212.731 - { 212.732 - rc = -EBUSY; 212.733 - goto out; 212.734 - } 212.735 - 212.736 - /* Only reread partition table if VBDs aren't mapped to partitions. */ 212.737 - if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) 212.738 - { 212.739 - for ( i = gd->max_p - 1; i >= 0; i-- ) 212.740 - { 212.741 - invalidate_device(dev+i, 1); 212.742 - gd->part[MINOR(dev+i)].start_sect = 0; 212.743 - gd->part[MINOR(dev+i)].nr_sects = 0; 212.744 - gd->sizes[MINOR(dev+i)] = 0; 212.745 - } 212.746 + blkif_completion(&info->shadow[id]); 212.747 212.748 - grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); 212.749 - } 212.750 - 212.751 - out: 212.752 - up(&bd->bd_sem); 212.753 - bdput(bd); 212.754 - return rc; 212.755 -} 212.756 - 212.757 - 212.758 -/* 212.759 - * blkif_queue_request 212.760 - * 212.761 - * request block io 212.762 - * 212.763 - * id: for guest use only. 212.764 - * operation: BLKIF_OP_{READ,WRITE,PROBE} 212.765 - * buffer: buffer to read/write into. this should be a 212.766 - * virtual address in the guest os. 212.767 - */ 212.768 -static int blkif_queue_request(unsigned long id, 212.769 - int operation, 212.770 - char * buffer, 212.771 - unsigned long sector_number, 212.772 - unsigned short nr_sectors, 212.773 - kdev_t device, 212.774 - blkif_vdev_t handle) 212.775 -{ 212.776 - unsigned long buffer_ma = virt_to_bus(buffer); 212.777 - unsigned long xid; 212.778 - struct gendisk *gd; 212.779 - blkif_request_t *req; 212.780 - struct buffer_head *bh; 212.781 - unsigned int fsect, lsect; 212.782 - int ref; 212.783 - 212.784 - fsect = (buffer_ma & ~PAGE_MASK) >> 9; 212.785 - lsect = fsect + nr_sectors - 1; 212.786 - 212.787 - /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */ 212.788 - if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) 212.789 - BUG(); 212.790 - if ( lsect > ((PAGE_SIZE/512)-1) ) 212.791 - BUG(); 212.792 - 212.793 - buffer_ma &= PAGE_MASK; 212.794 - 212.795 - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 212.796 - return 1; 212.797 - 212.798 - switch ( operation ) 212.799 - { 212.800 - 212.801 - case BLKIF_OP_READ: 212.802 - case BLKIF_OP_WRITE: 212.803 - gd = get_gendisk(device); 212.804 - 212.805 - /* 212.806 - * Update the sector_number we'll pass down as appropriate; note that 212.807 - * we could sanity check that resulting sector will be in this 212.808 - * partition, but this will happen in driver backend anyhow. 212.809 - */ 212.810 - sector_number += gd->part[MINOR(device)].start_sect; 212.811 - 212.812 - /* 212.813 - * If this unit doesn't consist of virtual partitions then we clear 212.814 - * the partn bits from the device number. 212.815 - */ 212.816 - if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 212.817 - GENHD_FL_VIRT_PARTNS) ) 212.818 - device &= ~(gd->max_p - 1); 212.819 + ADD_ID_TO_FREELIST(info, id); 212.820 212.821 - if ( (sg_operation == operation) && 212.822 - (sg_dev == device) && 212.823 - (sg_next_sect == sector_number) ) 212.824 - { 212.825 - req = RING_GET_REQUEST(&info->ring, 212.826 - info->ring.req_prod_pvt - 1); 212.827 - bh = (struct buffer_head *)id; 212.828 - 212.829 - bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request; 212.830 - blk_shadow[req->id].request = (unsigned long)id; 212.831 - 212.832 - /* install a grant reference. */ 212.833 - ref = gnttab_claim_grant_reference(&gref_head); 212.834 - ASSERT( ref != -ENOSPC ); 212.835 - 212.836 - gnttab_grant_foreign_access_ref( 212.837 - ref, 212.838 - info->backend_id, 212.839 - buffer_ma >> PAGE_SHIFT, 212.840 - ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); 212.841 - 212.842 - blk_shadow[req->id].frame[req->nr_segments] = 212.843 - buffer_ma >> PAGE_SHIFT; 212.844 - 212.845 - req->frame_and_sects[req->nr_segments] = 212.846 - blkif_fas_from_gref(ref, fsect, lsect); 212.847 - if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) 212.848 - sg_next_sect += nr_sectors; 212.849 - else 212.850 - DISABLE_SCATTERGATHER(); 212.851 - 212.852 - /* Update the copy of the request in the recovery ring. */ 212.853 - pickle_request(&blk_shadow[req->id], req ); 212.854 - 212.855 - return 0; 212.856 - } 212.857 - else if ( RING_FULL(&info->ring) ) 212.858 - { 212.859 - return 1; 212.860 - } 212.861 - else 212.862 - { 212.863 - sg_operation = operation; 212.864 - sg_dev = device; 212.865 - sg_next_sect = sector_number + nr_sectors; 212.866 - } 212.867 - break; 212.868 - 212.869 - default: 212.870 - panic("unknown op %d\n", operation); 212.871 - } 212.872 - 212.873 - /* Fill out a communications ring structure. */ 212.874 - req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 212.875 - 212.876 - xid = GET_ID_FROM_FREELIST(); 212.877 - blk_shadow[xid].request = (unsigned long)id; 212.878 - 212.879 - req->id = xid; 212.880 - req->operation = operation; 212.881 - req->sector_number = (blkif_sector_t)sector_number; 212.882 - req->handle = handle; 212.883 - req->nr_segments = 1; 212.884 - /* install a grant reference. */ 212.885 - ref = gnttab_claim_grant_reference(&gref_head); 212.886 - ASSERT( ref != -ENOSPC ); 212.887 - 212.888 - gnttab_grant_foreign_access_ref( 212.889 - ref, 212.890 - info->backend_id, 212.891 - buffer_ma >> PAGE_SHIFT, 212.892 - ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); 212.893 - 212.894 - blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT; 212.895 - 212.896 - req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect); 212.897 - 212.898 - /* Keep a private copy so we can reissue requests when recovering. */ 212.899 - pickle_request(&blk_shadow[xid], req); 212.900 + switch (bret->operation) { 212.901 + case BLKIF_OP_READ: 212.902 + case BLKIF_OP_WRITE: 212.903 + if (unlikely(bret->status != BLKIF_RSP_OKAY)) 212.904 + DPRINTK("Bad return from blkdev data " 212.905 + "request: %x\n", bret->status); 212.906 212.907 - info->ring.req_prod_pvt++; 212.908 - 212.909 - return 0; 212.910 -} 212.911 - 212.912 - 212.913 -/* 212.914 - * do_blkif_request 212.915 - * read a block; request is in a request queue 212.916 - */ 212.917 -void do_blkif_request(request_queue_t *rq) 212.918 -{ 212.919 - struct request *req; 212.920 - struct buffer_head *bh, *next_bh; 212.921 - int rw, nsect, full, queued = 0; 212.922 - 212.923 - DPRINTK("Entered do_blkif_request\n"); 212.924 - 212.925 - while ( !rq->plugged && !list_empty(&rq->queue_head)) 212.926 - { 212.927 - if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 212.928 - goto out; 212.929 - 212.930 - DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", 212.931 - req, req->cmd, req->sector, 212.932 - req->current_nr_sectors, req->nr_sectors, req->bh); 212.933 - 212.934 - rw = req->cmd; 212.935 - if ( rw == READA ) 212.936 - rw = READ; 212.937 - if ( unlikely((rw != READ) && (rw != WRITE)) ) 212.938 - panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); 212.939 - 212.940 - req->errors = 0; 212.941 - 212.942 - bh = req->bh; 212.943 - while ( bh != NULL ) 212.944 - { 212.945 - next_bh = bh->b_reqnext; 212.946 - bh->b_reqnext = NULL; 212.947 - 212.948 - full = blkif_queue_request( 212.949 - (unsigned long)bh, 212.950 - (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, 212.951 - bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); 212.952 - 212.953 - if ( full ) 212.954 - { 212.955 - bh->b_reqnext = next_bh; 212.956 - pending_queues[nr_pending++] = rq; 212.957 - if ( unlikely(nr_pending >= MAX_PENDING) ) 212.958 - BUG(); 212.959 - goto out; 212.960 - } 212.961 - 212.962 - queued++; 212.963 - 212.964 - /* Dequeue the buffer head from the request. */ 212.965 - nsect = bh->b_size >> 9; 212.966 - bh = req->bh = next_bh; 212.967 + BUG_ON(end_that_request_first( 212.968 + req, (bret->status == BLKIF_RSP_OKAY), 212.969 + req->hard_nr_sectors)); 212.970 + end_that_request_last(req); 212.971 + break; 212.972 + default: 212.973 + BUG(); 212.974 + } 212.975 + } 212.976 212.977 - if ( bh != NULL ) 212.978 - { 212.979 - /* There's another buffer head to do. Update the request. */ 212.980 - req->hard_sector += nsect; 212.981 - req->hard_nr_sectors -= nsect; 212.982 - req->sector = req->hard_sector; 212.983 - req->nr_sectors = req->hard_nr_sectors; 212.984 - req->current_nr_sectors = bh->b_size >> 9; 212.985 - req->buffer = bh->b_data; 212.986 - } 212.987 - else 212.988 - { 212.989 - /* That was the last buffer head. Finalise the request. */ 212.990 - if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) 212.991 - BUG(); 212.992 - blkdev_dequeue_request(req); 212.993 - end_that_request_last(req); 212.994 - } 212.995 - } 212.996 - } 212.997 + info->ring.rsp_cons = i; 212.998 212.999 - out: 212.1000 - if ( queued != 0 ) 212.1001 - flush_requests(); 212.1002 -} 212.1003 - 212.1004 - 212.1005 -static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) 212.1006 -{ 212.1007 - RING_IDX i, rp; 212.1008 - unsigned long flags; 212.1009 - struct buffer_head *bh, *next_bh; 212.1010 - 212.1011 - spin_lock_irqsave(&io_request_lock, flags); 212.1012 - 212.1013 - if ( unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery) ) 212.1014 - { 212.1015 - spin_unlock_irqrestore(&io_request_lock, flags); 212.1016 - return; 212.1017 - } 212.1018 - 212.1019 - rp = info->ring.sring->rsp_prod; 212.1020 - rmb(); /* Ensure we see queued responses up to 'rp'. */ 212.1021 + kick_pending_request_queues(info); 212.1022 212.1023 - for ( i = info->ring.rsp_cons; i != rp; i++ ) 212.1024 - { 212.1025 - unsigned long id; 212.1026 - blkif_response_t *bret; 212.1027 - 212.1028 - bret = RING_GET_RESPONSE(&info->ring, i); 212.1029 - id = bret->id; 212.1030 - bh = (struct buffer_head *)blk_shadow[id].request; 212.1031 - 212.1032 - blkif_completion(&blk_shadow[id]); 212.1033 - 212.1034 - ADD_ID_TO_FREELIST(id); 212.1035 + spin_unlock_irqrestore(&blkif_io_lock, flags); 212.1036 212.1037 - switch ( bret->operation ) 212.1038 - { 212.1039 - case BLKIF_OP_READ: 212.1040 - case BLKIF_OP_WRITE: 212.1041 - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) 212.1042 - DPRINTK("Bad return from blkdev data request: %lx\n", 212.1043 - bret->status); 212.1044 - for ( ; bh != NULL; bh = next_bh ) 212.1045 - { 212.1046 - next_bh = bh->b_reqnext; 212.1047 - bh->b_reqnext = NULL; 212.1048 - bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY); 212.1049 - } 212.1050 - 212.1051 - break; 212.1052 - case BLKIF_OP_PROBE: 212.1053 - memcpy(&blkif_control_rsp, bret, sizeof(*bret)); 212.1054 - blkif_control_rsp_valid = 1; 212.1055 - break; 212.1056 - default: 212.1057 - BUG(); 212.1058 - } 212.1059 - 212.1060 - } 212.1061 - info->ring.rsp_cons = i; 212.1062 - 212.1063 - kick_pending_request_queues(); 212.1064 - 212.1065 - spin_unlock_irqrestore(&io_request_lock, flags); 212.1066 + return IRQ_HANDLED; 212.1067 } 212.1068 212.1069 -#endif 212.1070 - 212.1071 -/***************************** COMMON CODE *******************************/ 212.1072 - 212.1073 static void blkif_free(struct blkfront_info *info) 212.1074 { 212.1075 - /* Prevent new requests being issued until we fix things up. */ 212.1076 - spin_lock_irq(&blkif_io_lock); 212.1077 - info->connected = BLKIF_STATE_DISCONNECTED; 212.1078 - spin_unlock_irq(&blkif_io_lock); 212.1079 + /* Prevent new requests being issued until we fix things up. */ 212.1080 + spin_lock_irq(&blkif_io_lock); 212.1081 + info->connected = BLKIF_STATE_DISCONNECTED; 212.1082 + spin_unlock_irq(&blkif_io_lock); 212.1083 212.1084 - /* Free resources associated with old device channel. */ 212.1085 - if ( info->ring.sring != NULL ) 212.1086 - { 212.1087 - free_page((unsigned long)info->ring.sring); 212.1088 - info->ring.sring = NULL; 212.1089 - } 212.1090 - unbind_evtchn_from_irqhandler(info->evtchn, NULL); 212.1091 - info->evtchn = 0; 212.1092 + /* Free resources associated with old device channel. */ 212.1093 + if (info->ring.sring != NULL) { 212.1094 + free_page((unsigned long)info->ring.sring); 212.1095 + info->ring.sring = NULL; 212.1096 + } 212.1097 + unbind_evtchn_from_irqhandler(info->evtchn, NULL); 212.1098 + info->evtchn = 0; 212.1099 } 212.1100 212.1101 static void blkif_recover(struct blkfront_info *info) 212.1102 { 212.1103 - int i; 212.1104 - blkif_request_t *req; 212.1105 - struct blk_shadow *copy; 212.1106 - int j; 212.1107 + int i; 212.1108 + blkif_request_t *req; 212.1109 + struct blk_shadow *copy; 212.1110 + int j; 212.1111 212.1112 - /* Stage 1: Make a safe copy of the shadow state. */ 212.1113 - copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL); 212.1114 - BUG_ON(copy == NULL); 212.1115 - memcpy(copy, blk_shadow, sizeof(blk_shadow)); 212.1116 + /* Stage 1: Make a safe copy of the shadow state. */ 212.1117 + copy = (struct blk_shadow *)kmalloc(sizeof(info->shadow), GFP_KERNEL); 212.1118 + BUG_ON(copy == NULL); 212.1119 + memcpy(copy, info->shadow, sizeof(info->shadow)); 212.1120 212.1121 - /* Stage 2: Set up free list. */ 212.1122 - memset(&blk_shadow, 0, sizeof(blk_shadow)); 212.1123 - for ( i = 0; i < BLK_RING_SIZE; i++ ) 212.1124 - blk_shadow[i].req.id = i+1; 212.1125 - blk_shadow_free = info->ring.req_prod_pvt; 212.1126 - blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 212.1127 + /* Stage 2: Set up free list. */ 212.1128 + memset(&info->shadow, 0, sizeof(info->shadow)); 212.1129 + for (i = 0; i < BLK_RING_SIZE; i++) 212.1130 + info->shadow[i].req.id = i+1; 212.1131 + info->shadow_free = info->ring.req_prod_pvt; 212.1132 + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 212.1133 212.1134 - /* Stage 3: Find pending requests and requeue them. */ 212.1135 - for ( i = 0; i < BLK_RING_SIZE; i++ ) 212.1136 - { 212.1137 - /* Not in use? */ 212.1138 - if ( copy[i].request == 0 ) 212.1139 - continue; 212.1140 + /* Stage 3: Find pending requests and requeue them. */ 212.1141 + for (i = 0; i < BLK_RING_SIZE; i++) { 212.1142 + /* Not in use? */ 212.1143 + if (copy[i].request == 0) 212.1144 + continue; 212.1145 212.1146 - /* Grab a request slot and unpickle shadow state into it. */ 212.1147 - req = RING_GET_REQUEST( 212.1148 - &info->ring, info->ring.req_prod_pvt); 212.1149 - unpickle_request(req, ©[i]); 212.1150 + /* Grab a request slot and unpickle shadow state into it. */ 212.1151 + req = RING_GET_REQUEST( 212.1152 + &info->ring, info->ring.req_prod_pvt); 212.1153 + unpickle_request(req, ©[i]); 212.1154 212.1155 - /* We get a new request id, and must reset the shadow state. */ 212.1156 - req->id = GET_ID_FROM_FREELIST(); 212.1157 - memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i])); 212.1158 + /* We get a new request id, and must reset the shadow state. */ 212.1159 + req->id = GET_ID_FROM_FREELIST(info); 212.1160 + memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); 212.1161 212.1162 - /* Rewrite any grant references invalidated by suspend/resume. */ 212.1163 - for ( j = 0; j < req->nr_segments; j++ ) 212.1164 - { 212.1165 - if ( req->frame_and_sects[j] & GRANTREF_INVALID ) 212.1166 - gnttab_grant_foreign_access_ref( 212.1167 - blkif_gref_from_fas(req->frame_and_sects[j]), 212.1168 - info->backend_id, 212.1169 - blk_shadow[req->id].frame[j], 212.1170 - rq_data_dir((struct request *) 212.1171 - blk_shadow[req->id].request)); 212.1172 - req->frame_and_sects[j] &= ~GRANTREF_INVALID; 212.1173 - } 212.1174 - blk_shadow[req->id].req = *req; 212.1175 + /* Rewrite any grant references invalidated by susp/resume. */ 212.1176 + for (j = 0; j < req->nr_segments; j++) { 212.1177 + if ( req->frame_and_sects[j] & GRANTREF_INVALID ) 212.1178 + gnttab_grant_foreign_access_ref( 212.1179 + blkif_gref_from_fas( 212.1180 + req->frame_and_sects[j]), 212.1181 + info->backend_id, 212.1182 + info->shadow[req->id].frame[j], 212.1183 + rq_data_dir( 212.1184 + (struct request *) 212.1185 + info->shadow[req->id].request)); 212.1186 + req->frame_and_sects[j] &= ~GRANTREF_INVALID; 212.1187 + } 212.1188 + info->shadow[req->id].req = *req; 212.1189 212.1190 - info->ring.req_prod_pvt++; 212.1191 - } 212.1192 + info->ring.req_prod_pvt++; 212.1193 + } 212.1194 212.1195 - kfree(copy); 212.1196 + kfree(copy); 212.1197 212.1198 - recovery = 0; 212.1199 + recovery = 0; 212.1200 212.1201 - /* info->ring->req_prod will be set when we flush_requests().*/ 212.1202 - wmb(); 212.1203 + /* info->ring->req_prod will be set when we flush_requests().*/ 212.1204 + wmb(); 212.1205 212.1206 - /* Kicks things back into life. */ 212.1207 - flush_requests(info); 212.1208 + /* Kicks things back into life. */ 212.1209 + flush_requests(info); 212.1210 212.1211 - /* Now safe to left other people use the interface. */ 212.1212 - info->connected = BLKIF_STATE_CONNECTED; 212.1213 + /* Now safe to left other people use the interface. */ 212.1214 + info->connected = BLKIF_STATE_CONNECTED; 212.1215 } 212.1216 212.1217 static void blkif_connect(struct blkfront_info *info, u16 evtchn) 212.1218 { 212.1219 - int err = 0; 212.1220 + int err = 0; 212.1221 212.1222 - info->evtchn = evtchn; 212.1223 + info->evtchn = evtchn; 212.1224 212.1225 - err = bind_evtchn_to_irqhandler( 212.1226 - info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); 212.1227 - if ( err != 0 ) 212.1228 - { 212.1229 - WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); 212.1230 - return; 212.1231 - } 212.1232 + err = bind_evtchn_to_irqhandler( 212.1233 + info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); 212.1234 + if (err != 0) { 212.1235 + WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); 212.1236 + return; 212.1237 + } 212.1238 } 212.1239 212.1240 212.1241 @@ -1227,9 +621,8 @@ static int talk_to_backend(struct xenbus 212.1242 static int blkfront_probe(struct xenbus_device *dev, 212.1243 const struct xenbus_device_id *id) 212.1244 { 212.1245 - int err; 212.1246 + int err, vdevice, i; 212.1247 struct blkfront_info *info; 212.1248 - int vdevice; 212.1249 212.1250 /* FIXME: Use dynamic device id if this is not set. */ 212.1251 err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice); 212.1252 @@ -1251,6 +644,12 @@ static int blkfront_probe(struct xenbus_ 212.1253 info->mi = NULL; 212.1254 INIT_WORK(&info->work, blkif_restart_queue, (void *)info); 212.1255 212.1256 + info->shadow_free = 0; 212.1257 + memset(info->shadow, 0, sizeof(info->shadow)); 212.1258 + for (i = 0; i < BLK_RING_SIZE; i++) 212.1259 + info->shadow[i].req.id = i+1; 212.1260 + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 212.1261 + 212.1262 /* Front end dir is a number, which is used as the id. */ 212.1263 info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); 212.1264 dev->data = info; 212.1265 @@ -1329,55 +728,57 @@ static void __init init_blk_xenbus(void) 212.1266 212.1267 static int wait_for_blkif(void) 212.1268 { 212.1269 - int err = 0; 212.1270 - int i; 212.1271 + int err = 0; 212.1272 + int i; 212.1273 212.1274 - /* 212.1275 - * We should figure out how many and which devices we need to 212.1276 - * proceed and only wait for those. For now, continue once the 212.1277 - * first device is around. 212.1278 - */ 212.1279 - for ( i=0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++ ) 212.1280 - { 212.1281 - set_current_state(TASK_INTERRUPTIBLE); 212.1282 - schedule_timeout(1); 212.1283 - } 212.1284 + /* 212.1285 + * We should figure out how many and which devices we need to 212.1286 + * proceed and only wait for those. For now, continue once the 212.1287 + * first device is around. 212.1288 + */ 212.1289 + for (i = 0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++) { 212.1290 + set_current_state(TASK_INTERRUPTIBLE); 212.1291 + schedule_timeout(1); 212.1292 + } 212.1293 212.1294 - if ( blkif_state != BLKIF_STATE_CONNECTED ) 212.1295 - { 212.1296 - WPRINTK("Timeout connecting to device!\n"); 212.1297 - err = -ENOSYS; 212.1298 - } 212.1299 - return err; 212.1300 + if (blkif_state != BLKIF_STATE_CONNECTED) { 212.1301 + WPRINTK("Timeout connecting to device!\n"); 212.1302 + err = -ENOSYS; 212.1303 + } 212.1304 + return err; 212.1305 } 212.1306 212.1307 static int __init xlblk_init(void) 212.1308 { 212.1309 - int i; 212.1310 + if ((xen_start_info.flags & SIF_INITDOMAIN) 212.1311 + || (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) 212.1312 + return 0; 212.1313 212.1314 - if ( (xen_start_info.flags & SIF_INITDOMAIN) || 212.1315 - (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) 212.1316 - return 0; 212.1317 - 212.1318 - IPRINTK("Initialising virtual block device driver\n"); 212.1319 + IPRINTK("Initialising virtual block device driver\n"); 212.1320 212.1321 - blk_shadow_free = 0; 212.1322 - memset(blk_shadow, 0, sizeof(blk_shadow)); 212.1323 - for ( i = 0; i < BLK_RING_SIZE; i++ ) 212.1324 - blk_shadow[i].req.id = i+1; 212.1325 - blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 212.1326 + init_blk_xenbus(); 212.1327 + 212.1328 + wait_for_blkif(); 212.1329 212.1330 - init_blk_xenbus(); 212.1331 + return 0; 212.1332 +} 212.1333 212.1334 - wait_for_blkif(); 212.1335 - 212.1336 - return 0; 212.1337 -} 212.1338 +module_init(xlblk_init); 212.1339 212.1340 static void blkif_completion(struct blk_shadow *s) 212.1341 { 212.1342 - int i; 212.1343 - for ( i = 0; i < s->req.nr_segments; i++ ) 212.1344 - gnttab_free_grant_reference( 212.1345 - blkif_gref_from_fas(s->req.frame_and_sects[i])); 212.1346 + int i; 212.1347 + for (i = 0; i < s->req.nr_segments; i++) 212.1348 + gnttab_free_grant_reference( 212.1349 + blkif_gref_from_fas(s->req.frame_and_sects[i])); 212.1350 } 212.1351 + 212.1352 +/* 212.1353 + * Local variables: 212.1354 + * c-file-style: "linux" 212.1355 + * indent-tabs-mode: t 212.1356 + * c-indent-level: 8 212.1357 + * c-basic-offset: 8 212.1358 + * tab-width: 8 212.1359 + * End: 212.1360 + */
213.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Fri Aug 26 13:47:16 2005 -0700 213.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Mon Aug 29 13:19:24 2005 -0700 213.3 @@ -96,6 +96,14 @@ struct xlbd_major_info 213.4 struct xlbd_type_info *type; 213.5 }; 213.6 213.7 +struct blk_shadow { 213.8 + blkif_request_t req; 213.9 + unsigned long request; 213.10 + unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 213.11 +}; 213.12 + 213.13 +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 213.14 + 213.15 /* 213.16 * We have one of these per vbd, whether ide, scsi or 'other'. They 213.17 * hang in private_data off the gendisk structure. We may end up 213.18 @@ -116,11 +124,11 @@ struct blkfront_info 213.19 blkif_front_ring_t ring; 213.20 unsigned int evtchn; 213.21 struct xlbd_major_info *mi; 213.22 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 213.23 request_queue_t *rq; 213.24 -#endif 213.25 struct work_struct work; 213.26 struct gnttab_free_callback callback; 213.27 + struct blk_shadow shadow[BLK_RING_SIZE]; 213.28 + unsigned long shadow_free; 213.29 }; 213.30 213.31 extern spinlock_t blkif_io_lock;
233.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Aug 26 13:47:16 2005 -0700 233.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Aug 29 13:19:24 2005 -0700 233.3 @@ -1272,25 +1272,24 @@ static int netfront_remove(struct xenbus 233.4 233.5 static int netfront_suspend(struct xenbus_device *dev) 233.6 { 233.7 - struct net_private *np = dev->data; 233.8 - /* Avoid having tx/rx stuff happen until we're ready. */ 233.9 - unbind_evtchn_from_irqhandler(np->evtchn, np->netdev); 233.10 - return 0; 233.11 + struct netfront_info *info = dev->data; 233.12 + 233.13 + unregister_xenbus_watch(&info->watch); 233.14 + kfree(info->backend); 233.15 + info->backend = NULL; 233.16 + 233.17 + netif_free(info); 233.18 + 233.19 + return 0; 233.20 } 233.21 233.22 static int netfront_resume(struct xenbus_device *dev) 233.23 { 233.24 - struct net_private *np = dev->data; 233.25 - /* 233.26 - * Connect regardless of whether IFF_UP flag set. 233.27 - * Stop bad things from happening until we're back up. 233.28 - */ 233.29 - np->backend_state = BEST_DISCONNECTED; 233.30 - memset(np->tx, 0, PAGE_SIZE); 233.31 - memset(np->rx, 0, PAGE_SIZE); 233.32 - 233.33 - // send_interface_connect(np); 233.34 - return 0; 233.35 + struct net_private *np = dev->data; 233.36 + int err; 233.37 + 233.38 + err = talk_to_backend(dev, np); 233.39 + return err; 233.40 } 233.41 233.42 static struct xenbus_driver netfront = {
431.1 --- a/tools/libxc/xc_linux_save.c Fri Aug 26 13:47:16 2005 -0700 431.2 +++ b/tools/libxc/xc_linux_save.c Mon Aug 29 13:19:24 2005 -0700 431.3 @@ -763,8 +763,6 @@ int xc_linux_save(int xc_handle, int io_ 431.4 batch++; 431.5 } 431.6 431.7 -// DPRINTF("batch %d:%d (n=%d)\n", iter, batch, n); 431.8 - 431.9 if ( batch == 0 ) 431.10 goto skip; /* vanishingly unlikely... */ 431.11 431.12 @@ -915,7 +913,7 @@ int xc_linux_save(int xc_handle, int io_ 431.13 continue; 431.14 } 431.15 431.16 - if ( last_iter ) break; 431.17 + if ( last_iter ) break; 431.18 431.19 if ( live ) 431.20 {
479.1 --- a/tools/python/xen/xend/XendCheckpoint.py Fri Aug 26 13:47:16 2005 -0700 479.2 +++ b/tools/python/xen/xend/XendCheckpoint.py Mon Aug 29 13:19:24 2005 -0700 479.3 @@ -51,7 +51,7 @@ def save(xd, fd, dominfo): 479.4 p = select.poll() 479.5 p.register(child.fromchild.fileno()) 479.6 p.register(child.childerr.fileno()) 479.7 - while True: 479.8 + while True: 479.9 r = p.poll() 479.10 for (fd, event) in r: 479.11 if not event & select.POLLIN: 479.12 @@ -69,8 +69,9 @@ def save(xd, fd, dominfo): 479.13 try: 479.14 dominfo.db.releaseDomain(dominfo.id) 479.15 except Exception, ex: 479.16 - log.warning("error in domain release on xenstore: %s", 479.17 - ex) 479.18 + log.warning( 479.19 + "error in domain release on xenstore: %s", 479.20 + ex) 479.21 pass 479.22 dominfo.state_wait("suspended") 479.23 log.info("suspend %d done" % dominfo.id)
494.1 --- a/tools/python/xen/xend/server/SrvDaemon.py Fri Aug 26 13:47:16 2005 -0700 494.2 +++ b/tools/python/xen/xend/server/SrvDaemon.py Mon Aug 29 13:19:24 2005 -0700 494.3 @@ -42,7 +42,8 @@ class Daemon: 494.4 self.traceon = 0 494.5 self.tracefile = None 494.6 self.traceindent = 0 494.7 - 494.8 + self.child = 0 494.9 + 494.10 def daemon_pids(self): 494.11 pids = [] 494.12 pidex = '(?P<pid>\d+)' 494.13 @@ -140,15 +141,12 @@ class Daemon: 494.14 else: 494.15 return 0 494.16 494.17 - def install_child_reaper(self): 494.18 - #signal.signal(signal.SIGCHLD, self.onSIGCHLD) 494.19 - # Ensure that zombie children are automatically reaped. 494.20 - xu.autoreap() 494.21 - 494.22 def onSIGCHLD(self, signum, frame): 494.23 - code = 1 494.24 - while code > 0: 494.25 - code = os.waitpid(-1, os.WNOHANG) 494.26 + if self.child > 0: 494.27 + try: 494.28 + pid, sts = os.waitpid(self.child, os.WNOHANG) 494.29 + except os.error, ex: 494.30 + pass 494.31 494.32 def fork_pid(self, pidfile): 494.33 """Fork and write the pid of the child to 'pidfile'. 494.34 @@ -156,13 +154,16 @@ class Daemon: 494.35 @param pidfile: pid file 494.36 @return: pid of child in parent, 0 in child 494.37 """ 494.38 - pid = os.fork() 494.39 - if pid: 494.40 + 494.41 + self.child = os.fork() 494.42 + 494.43 + if self.child: 494.44 # Parent 494.45 pidfile = open(pidfile, 'w') 494.46 - pidfile.write(str(pid)) 494.47 + pidfile.write(str(self.child)) 494.48 pidfile.close() 494.49 - return pid 494.50 + 494.51 + return self.child 494.52 494.53 def daemonize(self): 494.54 if not XEND_DAEMONIZE: return 494.55 @@ -203,8 +204,7 @@ class Daemon: 494.56 # Trying to run an already-running service is a success. 494.57 return 0 494.58 494.59 - self.install_child_reaper() 494.60 - 494.61 + signal.signal(signal.SIGCHLD, self.onSIGCHLD) 494.62 if self.fork_pid(XEND_PID_FILE): 494.63 #Parent. Sleep to give child time to start. 494.64 time.sleep(1) 494.65 @@ -309,7 +309,7 @@ class Daemon: 494.66 print >>sys.stderr, 'Exception starting xend:', ex 494.67 if XEND_DEBUG: 494.68 traceback.print_exc() 494.69 - log.exception("Exception starting xend") 494.70 + log.exception("Exception starting xend (%s)" % ex) 494.71 self.exit(1) 494.72 494.73 def createFactories(self):
521.1 --- a/tools/python/xen/xm/main.py Fri Aug 26 13:47:16 2005 -0700 521.2 +++ b/tools/python/xen/xm/main.py Mon Aug 29 13:19:24 2005 -0700 521.3 @@ -715,9 +715,9 @@ def main(argv=sys.argv): 521.4 err("Most commands need root access. Please try again as root") 521.5 sys.exit(1) 521.6 except XendError, ex: 521.7 - if args[0] == "bogus": 521.8 - args.remove("bogus") 521.9 if len(args) > 0: 521.10 + if args[0] == "bogus": 521.11 + args.remove("bogus") 521.12 handle_xend_error(argv[1], args[0], ex) 521.13 else: 521.14 print "Unexpected error:", sys.exc_info()[0]