debuggers.hg

view xen/arch/ia64/xen/hyperprivop.S @ 16669:213a7029fdbc

[IA64] xenoprof: don't modify mPSR.pp. PV case

Don't change mPSR.pp for xenoprof for PV domain case.
xenoprof manages mPSR.pp so that mPSR.pp shouldn't be modified.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Alex Williamson <alex.williamson@hp.com>
date Mon Dec 17 09:51:06 2007 -0700 (2007-12-17)
parents eae7b887e5ac
children fe25c7ec84e8
line source
1 /*
2 * arch/ia64/kernel/hyperprivop.S
3 *
4 * Copyright (C) 2005 Hewlett-Packard Co
5 * Dan Magenheimer <dan.magenheimer@hp.com>
6 */
8 #include <linux/config.h>
10 #include <asm/asmmacro.h>
11 #include <asm/kregs.h>
12 #include <asm/offsets.h>
13 #include <asm/processor.h>
14 #include <asm/system.h>
15 #include <asm/debugger.h>
16 #include <asm/asm-xsi-offsets.h>
17 #include <asm/pgtable.h>
18 #include <asm/vmmu.h>
19 #include <public/xen.h>
22 #define PAGE_PHYS (__DIRTY_BITS | _PAGE_PL_PRIV | _PAGE_AR_RWX)
24 #if 1 // change to 0 to turn off all fast paths
25 # define FAST_HYPERPRIVOPS
26 # ifdef PERF_COUNTERS
27 # define FAST_HYPERPRIVOP_CNT
28 # define FAST_HYPERPRIVOP_PERFC(N) PERFC(fast_hyperprivop + N)
29 # define FAST_REFLECT_CNT
30 # endif
32 //#define FAST_TICK // mostly working (unat problems) but default off for now
33 //#define FAST_TLB_MISS_REFLECT // mostly working but default off for now
34 # undef FAST_ITC //XXX TODO fast_itc doesn't support dom0 vp yet
35 # define FAST_BREAK
36 # undef FAST_ACCESS_REFLECT //XXX TODO fast_access_reflect
37 // doesn't support dom0 vp yet.
38 # define FAST_RFI
39 // TODO: Since we use callback to deliver interrupt,
40 // FAST_SSM_I needs to be rewritten.
41 # define FAST_SSM_I
42 # define FAST_PTC_GA
43 # undef RFI_TO_INTERRUPT // not working yet
44 # define FAST_SET_RR0_TO_RR4
45 #endif
47 #ifdef CONFIG_SMP
48 //#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
49 #undef FAST_PTC_GA
50 #endif
52 // FIXME: turn off for now... but NaTs may crash Xen so re-enable soon!
53 #define HANDLE_AR_UNAT
55 // FIXME: This is defined in include/asm-ia64/hw_irq.h but this
56 // doesn't appear to be include'able from assembly?
57 #define IA64_TIMER_VECTOR 0xef
59 // Note: not hand-scheduled for now
60 // Registers at entry
61 // r16 == cr.isr
62 // r17 == cr.iim
63 // r18 == XSI_PSR_IC_OFS
64 // r19 == ipsr.cpl
65 // r31 == pr
66 GLOBAL_ENTRY(fast_hyperprivop)
67 // HYPERPRIVOP_SSM_I?
68 // assumes domain interrupts pending, so just do it
69 cmp.eq p7,p6=HYPERPRIVOP_SSM_I,r17
70 (p7) br.sptk.many hyper_ssm_i;;
72 // Check pending event indication
73 adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS, r18;;
74 ld8 r20=[r20]
75 ;;
76 ld1 r22=[r20],-1 // evtchn_upcall_mask
77 ;;
78 ld1 r20=[r20] // evtchn_upcall_pending
79 ;;
81 // HYPERPRIVOP_RFI?
82 cmp.eq p7,p6=HYPERPRIVOP_RFI,r17
83 (p7) br.sptk.many hyper_rfi
84 ;;
85 #ifndef FAST_HYPERPRIVOPS // see beginning of file
86 br.sptk.many dispatch_break_fault ;;
87 #endif
88 // if event enabled and there are pending events
89 cmp.ne p7,p0=r20,r0
90 ;;
91 cmp.eq.and p7,p0=r22,r0
92 (p7) br.spnt.many dispatch_break_fault
93 ;;
95 // HYPERPRIVOP_COVER?
96 cmp.eq p7,p0=HYPERPRIVOP_COVER,r17
97 (p7) br.sptk.many hyper_cover
98 ;;
100 // HYPERPRIVOP_SSM_DT?
101 cmp.eq p7,p0=HYPERPRIVOP_SSM_DT,r17
102 (p7) br.sptk.many hyper_ssm_dt
103 ;;
105 // HYPERPRIVOP_RSM_DT?
106 cmp.eq p7,p0=HYPERPRIVOP_RSM_DT,r17
107 (p7) br.sptk.many hyper_rsm_dt
108 ;;
110 // HYPERPRIVOP_SET_ITM?
111 cmp.eq p7,p0=HYPERPRIVOP_SET_ITM,r17
112 (p7) br.sptk.many hyper_set_itm
113 ;;
115 // HYPERPRIVOP_SET_RR0_TO_RR4?
116 cmp.eq p7,p0=HYPERPRIVOP_SET_RR0_TO_RR4,r17
117 (p7) br.sptk.many hyper_set_rr0_to_rr4
118 ;;
120 // HYPERPRIVOP_SET_RR?
121 cmp.eq p7,p0=HYPERPRIVOP_SET_RR,r17
122 (p7) br.sptk.many hyper_set_rr
123 ;;
125 // HYPERPRIVOP_GET_RR?
126 cmp.eq p7,p0=HYPERPRIVOP_GET_RR,r17
127 (p7) br.sptk.many hyper_get_rr
128 ;;
130 // HYPERPRIVOP_GET_PSR?
131 cmp.eq p7,p0=HYPERPRIVOP_GET_PSR,r17
132 (p7) br.sptk.many hyper_get_psr
133 ;;
135 // HYPERPRIVOP_PTC_GA?
136 cmp.eq p7,p0=HYPERPRIVOP_PTC_GA,r17
137 (p7) br.sptk.many hyper_ptc_ga
138 ;;
140 // HYPERPRIVOP_ITC_D?
141 cmp.eq p7,p0=HYPERPRIVOP_ITC_D,r17
142 (p7) br.sptk.many hyper_itc_d
143 ;;
145 // HYPERPRIVOP_ITC_I?
146 cmp.eq p7,p0=HYPERPRIVOP_ITC_I,r17
147 (p7) br.sptk.many hyper_itc_i
148 ;;
150 // HYPERPRIVOP_THASH?
151 cmp.eq p7,p0=HYPERPRIVOP_THASH,r17
152 (p7) br.sptk.many hyper_thash
153 ;;
155 // HYPERPRIVOP_SET_KR?
156 cmp.eq p7,p0=HYPERPRIVOP_SET_KR,r17
157 (p7) br.sptk.many hyper_set_kr
158 ;;
160 // if not one of the above, give up for now and do it the slow way
161 br.sptk.many dispatch_break_fault
162 ;;
163 END(fast_hyperprivop)
165 // give up for now if: ipsr.be==1, ipsr.pp==1
166 // from reflect_interruption, don't need to:
167 // - printk first extint (debug only)
168 // - check for interrupt collection enabled (routine will force on)
169 // - set ifa (not valid for extint)
170 // - set iha (not valid for extint)
171 // - set itir (not valid for extint)
172 // DO need to
173 // - increment the HYPER_SSM_I fast_hyperprivop counter
174 // - set shared_mem iip to instruction after HYPER_SSM_I
175 // - set cr.iip to guest iva+0x3000
176 // - set shared_mem ipsr to [vcpu_get_ipsr_int_state]
177 // be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0;
178 // i = shared_mem interrupt_delivery_enabled
179 // ic = shared_mem interrupt_collection_enabled
180 // ri = instruction after HYPER_SSM_I
181 // all other bits unchanged from real cr.ipsr
182 // - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!)
183 // - set shared_mem isr: isr.ei to instr following HYPER_SSM_I
184 // and isr.ri to cr.isr.ri (all other bits zero)
185 // - cover and set shared_mem precover_ifs to cr.ifs
186 // ^^^ MISSED THIS FOR fast_break??
187 // - set shared_mem interrupt_delivery_enabled to 0
188 // - set shared_mem interrupt_collection_enabled to 0
189 // - set r31 to SHAREDINFO_ADDR
190 // - virtual bank switch 0
191 // maybe implement later
192 // - verify that there really IS a deliverable interrupt pending
193 // - set shared_mem iva
194 // needs to be done but not implemented (in reflect_interruption)
195 // - set shared_mem iipa
196 // don't know for sure
197 // - set shared_mem unat
198 // r16 == cr.isr
199 // r17 == cr.iim
200 // r18 == XSI_PSR_IC
201 // r19 == ipsr.cpl
202 // r31 == pr
203 ENTRY(hyper_ssm_i)
204 #ifndef FAST_SSM_I
205 br.spnt.few dispatch_break_fault ;;
206 #endif
207 // give up for now if: ipsr.be==1, ipsr.pp==1
208 mov r30=cr.ipsr
209 mov r29=cr.iip;;
210 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
211 cmp.ne p7,p0=r21,r0
212 (p7) br.sptk.many dispatch_break_fault ;;
213 #ifdef FAST_HYPERPRIVOP_CNT
214 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_I);;
215 ld4 r21=[r20];;
216 adds r21=1,r21;;
217 st4 [r20]=r21;;
218 #endif
219 // set shared_mem iip to instruction after HYPER_SSM_I
220 extr.u r20=r30,IA64_PSR_RI_BIT,2 ;;
221 cmp.eq p6,p7=2,r20 ;;
222 (p6) mov r20=0
223 (p6) adds r29=16,r29
224 (p7) adds r20=1,r20 ;;
225 dep r30=r20,r30,IA64_PSR_RI_BIT,2 // adjust cr.ipsr.ri but don't save yet
226 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
227 st8 [r21]=r29 ;;
228 // set shared_mem isr
229 extr.u r16=r16,IA64_ISR_IR_BIT,1;; // grab cr.isr.ir bit
230 dep r16=r16,r0,IA64_ISR_IR_BIT,1;; // insert into cr.isr (rest of bits zero)
231 dep r16=r20,r16,IA64_PSR_RI_BIT,2 // deposit cr.isr.ri
232 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
233 st8 [r21]=r16
234 // set cr.ipsr
235 mov r29=r30
236 movl r28=DELIVER_PSR_SET
237 movl r27=~(DELIVER_PSR_CLR & (~IA64_PSR_CPL));;
238 and r29=r29,r27;;
239 or r29=r29,r28;;
240 // set hpsr_dfh to ipsr
241 adds r28=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
242 ld1 r28=[r28];;
243 dep r29=r28,r29,IA64_PSR_DFH_BIT,1;;
244 mov cr.ipsr=r29;;
245 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
246 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
247 cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
248 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
249 // FOR SSM_I ONLY, also turn on psr.i and psr.ic
250 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC)
251 // movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
252 movl r27=~IA64_PSR_BN;;
253 or r30=r30,r28;;
254 and r30=r30,r27;;
255 mov r20=1
256 movl r22=THIS_CPU(current_psr_i_addr)
257 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
258 ld8 r22=[r22]
259 adds r27=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
260 ld1 r28=[r27];;
261 st1 [r27]=r0
262 dep r30=r28,r30,IA64_PSR_DFH_BIT,1
263 ;;
264 st8 [r21]=r30;;
265 // set shared_mem interrupt_delivery_enabled to 0
266 // set shared_mem interrupt_collection_enabled to 0
267 st1 [r22]=r20
268 st4 [r18]=r0
269 // cover and set shared_mem precover_ifs to cr.ifs
270 // set shared_mem ifs to 0
271 cover ;;
272 mov r20=cr.ifs
273 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
274 st8 [r21]=r0 ;;
275 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
276 st8 [r21]=r20 ;;
277 // leave cr.ifs alone for later rfi
278 // set iip to go to event callback handler
279 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
280 ld8 r22=[r22];;
281 adds r22=IA64_VCPU_EVENT_CALLBACK_IP_OFFSET,r22;;
282 ld8 r24=[r22];;
283 mov cr.iip=r24;;
284 // OK, now all set to go except for switch to virtual bank0
285 mov r30=r2
286 mov r29=r3
287 ;;
288 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
289 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18
290 // temporarily save ar.unat
291 mov r28=ar.unat
292 bsw.1;;
293 // FIXME?: ar.unat is not really handled correctly,
294 // but may not matter if the OS is NaT-clean
295 .mem.offset 0,0; st8.spill [r2]=r16,16
296 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
297 .mem.offset 0,0; st8.spill [r2]=r18,16
298 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
299 .mem.offset 0,0; st8.spill [r2]=r20,16
300 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
301 .mem.offset 0,0; st8.spill [r2]=r22,16
302 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
303 .mem.offset 0,0; st8.spill [r2]=r24,16
304 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
305 .mem.offset 0,0; st8.spill [r2]=r26,16
306 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
307 .mem.offset 0,0; st8.spill [r2]=r28,16
308 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
309 .mem.offset 0,0; st8.spill [r2]=r30,16
310 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
311 bsw.0 ;;
312 mov r27=ar.unat
313 adds r26=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;;
314 //save bank1 ar.unat
315 st8 [r26]=r27
316 //restore ar.unat
317 mov ar.unat=r28
318 mov r2=r30
319 mov r3=r29
320 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
321 st4 [r20]=r0
322 mov pr=r31,-1 ;;
323 rfi
324 ;;
325 END(hyper_ssm_i)
327 // reflect domain clock interrupt
328 // r31 == pr
329 // r30 == cr.ivr
330 // r29 == rp
331 GLOBAL_ENTRY(fast_tick_reflect)
332 #ifndef FAST_TICK // see beginning of file
333 br.cond.sptk.many rp;;
334 #endif
335 mov r28=IA64_TIMER_VECTOR;;
336 cmp.ne p6,p0=r28,r30
337 (p6) br.cond.spnt.few rp;;
338 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
339 ld8 r26=[r20]
340 mov r27=ar.itc;;
341 adds r27=200,r27;; // safety margin
342 cmp.ltu p6,p0=r26,r27
343 (p6) br.cond.spnt.few rp;;
344 mov r17=cr.ipsr;;
345 // slow path if: ipsr.pp==1
346 extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
347 cmp.ne p6,p0=r21,r0
348 (p6) br.cond.spnt.few rp;;
349 // definitely have a domain tick
350 mov cr.eoi=r0
351 mov rp=r29
352 mov cr.itm=r26 // ensure next tick
353 #ifdef FAST_REFLECT_CNT
354 movl r20=PERFC(fast_reflect + (0x3000>>8));;
355 ld4 r21=[r20];;
356 adds r21=1,r21;;
357 st4 [r20]=r21;;
358 #endif
359 // vcpu_pend_timer(current)
360 movl r18=THIS_CPU(current_psr_ic_addr)
361 ;;
362 ld8 r18=[r18]
363 ;;
364 adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
365 ld8 r20=[r20];;
366 cmp.eq p6,p0=r20,r0 // if cr.itv==0 done
367 (p6) br.cond.spnt.few fast_tick_reflect_done;;
368 tbit.nz p6,p0=r20,16;; // check itv.m (discard) bit
369 (p6) br.cond.spnt.few fast_tick_reflect_done;;
370 extr.u r27=r20,0,6 // r27 has low 6 bits of itv.vector
371 extr.u r26=r20,6,2 // r26 has irr index of itv.vector
372 movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
373 ld8 r19=[r19];;
374 adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19
375 adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;;
376 ld8 r24=[r22]
377 ld8 r23=[r23];;
378 cmp.eq p6,p0=r23,r24 // skip if this tick already delivered
379 (p6) br.cond.spnt.few fast_tick_reflect_done;;
380 // set irr bit
381 adds r21=IA64_VCPU_IRR0_OFFSET,r19
382 shl r26=r26,3;;
383 add r21=r21,r26
384 mov r25=1;;
385 shl r22=r25,r27
386 ld8 r23=[r21];;
387 or r22=r22,r23;;
388 st8 [r21]=r22
389 // set evtchn_upcall_pending!
390 adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18;;
391 ld8 r20=[r20];;
392 adds r20=-1,r20;; // evtchn_upcall_pending
393 st1 [r20]=r25
394 // if interrupted at pl0, we're done
395 extr.u r16=r17,IA64_PSR_CPL0_BIT,2;;
396 cmp.eq p6,p0=r16,r0;;
397 (p6) br.cond.spnt.few fast_tick_reflect_done;;
398 // if guest vpsr.i is off, we're done
399 movl r21=THIS_CPU(current_psr_i_addr);;
400 ld8 r21=[r21];;
401 ld1 r21=[r21];;
402 cmp.eq p0,p6=r21,r0
403 (p6) br.cond.spnt.few fast_tick_reflect_done;;
405 // OK, we have a clock tick to deliver to the active domain!
406 // so deliver to iva+0x3000
407 // r17 == cr.ipsr
408 // r18 == XSI_PSR_IC
409 // r19 == IA64_KR(CURRENT)
410 // r31 == pr
411 mov r16=cr.isr
412 mov r29=cr.iip
413 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
414 st8 [r21]=r29
415 // set shared_mem isr
416 extr.u r16=r16,IA64_ISR_IR_BIT,1;; // grab cr.isr.ir bit
417 dep r16=r16,r0,IA64_ISR_IR_BIT,1 // insert into cr.isr (rest of bits zero)
418 extr.u r20=r17,IA64_PSR_RI_BIT,2;; // get ipsr.ri
419 dep r16=r20,r16,IA64_PSR_RI_BIT,2 // deposit cr.isr.ei
420 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18;;
421 st8 [r21]=r16
422 // set cr.ipsr (make sure cpl==2!)
423 mov r29=r17
424 movl r28=DELIVER_PSR_SET | (CONFIG_CPL0_EMUL << IA64_PSR_CPL0_BIT)
425 movl r27=~DELIVER_PSR_CLR;;
426 and r29=r29,r27;;
427 or r29=r29,r28;;
428 mov cr.ipsr=r29;;
429 // set shared_mem ipsr (from ipsr in r17 with ipsr.ri already set)
430 extr.u r29=r17,IA64_PSR_CPL0_BIT,2;;
431 cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
432 (p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2
433 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
434 movl r27=~(IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
435 or r17=r17,r28;;
436 and r17=r17,r27
437 ld4 r16=[r18];;
438 cmp.ne p6,p0=r16,r0
439 movl r22=THIS_CPU(current_psr_i_addr);;
440 ld8 r22=[r22]
441 (p6) dep r17=-1,r17,IA64_PSR_IC_BIT,1 ;;
442 ld1 r16=[r22];;
443 cmp.eq p6,p0=r16,r0;;
444 (p6) dep r17=-1,r17,IA64_PSR_I_BIT,1
445 mov r20=1
446 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18;;
447 st8 [r21]=r17
448 // set shared_mem interrupt_delivery_enabled to 0
449 // set shared_mem interrupt_collection_enabled to 0
450 st1 [r22]=r20
451 st4 [r18]=r0;;
452 // cover and set shared_mem precover_ifs to cr.ifs
453 // set shared_mem ifs to 0
454 cover ;;
455 mov r20=cr.ifs
456 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
457 st8 [r21]=r0 ;;
458 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
459 st8 [r21]=r20
460 // leave cr.ifs alone for later rfi
461 // set iip to go to domain IVA break instruction vector
462 adds r22=IA64_VCPU_IVA_OFFSET,r19;;
463 ld8 r23=[r22]
464 movl r24=0x3000;;
465 add r24=r24,r23;;
466 mov cr.iip=r24
467 // OK, now all set to go except for switch to virtual bank0
468 mov r30=r2
469 mov r29=r3
470 #ifdef HANDLE_AR_UNAT
471 mov r28=ar.unat
472 #endif
473 ;;
474 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
475 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18
476 ;;
477 bsw.1;;
478 .mem.offset 0,0; st8.spill [r2]=r16,16
479 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
480 .mem.offset 0,0; st8.spill [r2]=r18,16
481 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
482 .mem.offset 0,0; st8.spill [r2]=r20,16
483 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
484 .mem.offset 0,0; st8.spill [r2]=r22,16
485 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
486 .mem.offset 0,0; st8.spill [r2]=r24,16
487 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
488 .mem.offset 0,0; st8.spill [r2]=r26,16
489 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
490 .mem.offset 0,0; st8.spill [r2]=r28,16
491 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
492 .mem.offset 0,0; st8.spill [r2]=r30,16
493 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
494 #ifdef HANDLE_AR_UNAT
495 // r16~r23 are preserved regsin bank0 regs, we need to restore them,
496 // r24~r31 are scratch regs, we don't need to handle NaT bit,
497 // because OS handler must assign it before access it
498 ld8 r16=[r2],16
499 ld8 r17=[r3],16;;
500 ld8 r18=[r2],16
501 ld8 r19=[r3],16;;
502 ld8 r20=[r2],16
503 ld8 r21=[r3],16;;
504 ld8 r22=[r2],16
505 ld8 r23=[r3],16;;
506 #endif
507 ;;
508 bsw.0 ;;
509 mov r24=ar.unat
510 mov r2=r30
511 mov r3=r29
512 #ifdef HANDLE_AR_UNAT
513 mov ar.unat=r28
514 #endif
515 ;;
516 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
517 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
518 st8 [r25]=r24
519 st4 [r20]=r0
520 fast_tick_reflect_done:
521 mov pr=r31,-1 ;;
522 rfi
523 END(fast_tick_reflect)
525 // reflect domain breaks directly to domain
526 // r16 == cr.isr
527 // r17 == cr.iim
528 // r18 == XSI_PSR_IC
529 // r19 == ipsr.cpl
530 // r31 == pr
531 GLOBAL_ENTRY(fast_break_reflect)
532 #ifndef FAST_BREAK // see beginning of file
533 br.sptk.many dispatch_break_fault ;;
534 #endif
535 mov r30=cr.ipsr
536 mov r29=cr.iip;;
537 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
538 cmp.ne p7,p0=r21,r0
539 (p7) br.spnt.few dispatch_break_fault ;;
540 movl r20=IA64_PSR_CPL ;;
541 and r22=r20,r30 ;;
542 cmp.ne p7,p0=r22,r0
543 (p7) br.spnt.many 1f ;;
544 cmp.eq p7,p0=r17,r0
545 (p7) br.spnt.few dispatch_break_fault ;;
546 #ifdef CRASH_DEBUG
547 movl r21=CDB_BREAK_NUM ;;
548 cmp.eq p7,p0=r17,r21
549 (p7) br.spnt.few dispatch_break_fault ;;
550 #endif
551 1:
552 #if 1 /* special handling in case running on simulator */
553 movl r20=first_break;;
554 ld4 r23=[r20]
555 movl r21=0x80001
556 movl r22=0x80002;;
557 cmp.ne p7,p0=r23,r0
558 (p7) br.spnt.few dispatch_break_fault ;;
559 cmp.eq p7,p0=r21,r17
560 (p7) br.spnt.few dispatch_break_fault ;;
561 cmp.eq p7,p0=r22,r17
562 (p7) br.spnt.few dispatch_break_fault ;;
563 #endif
564 movl r20=0x2c00
565 // save iim in shared_info
566 adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
567 st8 [r21]=r17;;
568 // fall through
569 END(fast_break_reflect)
571 // reflect to domain ivt+r20
572 // sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
573 // r16 == cr.isr
574 // r18 == XSI_PSR_IC
575 // r20 == offset into ivt
576 // r29 == iip
577 // r30 == ipsr
578 // r31 == pr
579 ENTRY(fast_reflect)
580 #ifdef FAST_REFLECT_CNT
581 movl r22=PERFC(fast_reflect)
582 shr r23=r20,8-2;;
583 add r22=r22,r23;;
584 ld4 r21=[r22];;
585 adds r21=1,r21;;
586 st4 [r22]=r21;;
587 #endif
588 // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
589 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
590 st8 [r21]=r29,XSI_ISR_OFS-XSI_IIP_OFS;;
591 // set shared_mem isr
592 st8 [r21]=r16 ;;
593 // set cr.ipsr
594 movl r21=THIS_CPU(current_psr_i_addr)
595 mov r29=r30 ;;
596 ld8 r21=[r21]
597 movl r28=DELIVER_PSR_SET | (CONFIG_CPL0_EMUL << IA64_PSR_CPL0_BIT)
598 movl r27=~DELIVER_PSR_CLR;;
599 and r29=r29,r27;;
600 or r29=r29,r28;;
601 // set hpsr_dfh to ipsr
602 adds r28=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
603 ld1 r28=[r28];;
604 dep r29=r28,r29,IA64_PSR_DFH_BIT,1;;
605 mov cr.ipsr=r29;;
606 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
607 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
608 cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
609 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
610 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
611 movl r27=~(IA64_PSR_PP|IA64_PSR_BN);;
612 or r30=r30,r28;;
613 and r30=r30,r27
614 // also set shared_mem ipsr.i and ipsr.ic appropriately
615 ld1 r22=[r21]
616 ld4 r24=[r18];;
617 cmp4.eq p6,p7=r24,r0;;
618 (p6) dep r30=0,r30,IA64_PSR_IC_BIT,1
619 (p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1
620 mov r24=r21
621 cmp.ne p6,p7=r22,r0;;
622 (p6) dep r30=0,r30,IA64_PSR_I_BIT,1
623 (p7) dep r30=-1,r30,IA64_PSR_I_BIT,1
624 mov r22=1
625 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18
626 adds r27=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
627 ld1 r28=[r27];;
628 st1 [r27]=r0
629 dep r30=r28,r30,IA64_PSR_DFH_BIT,1
630 ;;
631 st8 [r21]=r30
632 // set shared_mem interrupt_delivery_enabled to 0
633 // set shared_mem interrupt_collection_enabled to 0
634 st1 [r24]=r22
635 st4 [r18]=r0;;
636 // cover and set shared_mem precover_ifs to cr.ifs
637 // set shared_mem ifs to 0
638 cover ;;
639 mov r24=cr.ifs
640 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
641 st8 [r21]=r0 ;;
642 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
643 st8 [r21]=r24
644 // FIXME: need to save iipa and isr to be arch-compliant
645 // set iip to go to domain IVA break instruction vector
646 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
647 ld8 r22=[r22];;
648 adds r22=IA64_VCPU_IVA_OFFSET,r22;;
649 ld8 r23=[r22];;
650 add r20=r20,r23;;
651 mov cr.iip=r20
652 // OK, now all set to go except for switch to virtual bank0
653 mov r30=r2
654 mov r29=r3
655 #ifdef HANDLE_AR_UNAT
656 mov r28=ar.unat
657 #endif
658 ;;
659 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
660 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18
661 ;;
662 bsw.1;;
663 .mem.offset 0,0; st8.spill [r2]=r16,16
664 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
665 .mem.offset 0,0; st8.spill [r2]=r18,16
666 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
667 .mem.offset 0,0; st8.spill [r2]=r20,16
668 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
669 .mem.offset 0,0; st8.spill [r2]=r22,16
670 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
671 .mem.offset 0,0; st8.spill [r2]=r24,16
672 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
673 .mem.offset 0,0; st8.spill [r2]=r26,16
674 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
675 .mem.offset 0,0; st8.spill [r2]=r28,16
676 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
677 .mem.offset 0,0; st8.spill [r2]=r30,16
678 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
679 #ifdef HANDLE_AR_UNAT
680 // r16~r23 are preserved regs in bank0 regs, we need to restore them,
681 // r24~r31 are scratch regs, we don't need to handle NaT bit,
682 // because OS handler must assign it before access it
683 ld8 r16=[r2],16
684 ld8 r17=[r3],16;;
685 ld8 r18=[r2],16
686 ld8 r19=[r3],16;;
687 ld8 r20=[r2],16
688 ld8 r21=[r3],16;;
689 ld8 r22=[r2],16
690 ld8 r23=[r3],16;;
691 #endif
692 ;;
693 bsw.0 ;;
694 mov r24=ar.unat
695 mov r2=r30
696 mov r3=r29
697 #ifdef HANDLE_AR_UNAT
698 mov ar.unat=r28
699 #endif
700 ;;
701 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
702 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
703 st8 [r25]=r24
704 st4 [r20]=r0
705 mov pr=r31,-1 ;;
706 rfi
707 ;;
708 END(fast_reflect)
710 // reflect access faults (0x2400,0x2800,0x5300) directly to domain
711 // r16 == isr
712 // r17 == ifa
713 // r19 == reflect number (only pass-thru to dispatch_reflection)
714 // r20 == offset into ivt
715 // r31 == pr
716 GLOBAL_ENTRY(fast_access_reflect)
717 #ifndef FAST_ACCESS_REFLECT // see beginning of file
718 br.spnt.few dispatch_reflection ;;
719 #endif
720 mov r30=cr.ipsr
721 mov r29=cr.iip;;
722 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
723 cmp.ne p7,p0=r21,r0
724 (p7) br.spnt.few dispatch_reflection ;;
725 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
726 cmp.eq p7,p0=r21,r0
727 (p7) br.spnt.few dispatch_reflection ;;
728 movl r18=THIS_CPU(current_psr_ic_addr);;
729 ld8 r18=[r18];;
730 ld4 r21=[r18];;
731 cmp.eq p7,p0=r0,r21
732 (p7) br.spnt.few dispatch_reflection ;;
733 // set shared_mem ifa, FIXME: should we validate it?
734 mov r17=cr.ifa
735 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
736 st8 [r21]=r17 ;;
737 // get rr[ifa] and save to itir in shared memory (extra bits ignored)
738 shr.u r22=r17,61
739 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18
740 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
741 shladd r22=r22,3,r21;;
742 ld8 r22=[r22];;
743 and r22=~3,r22;;
744 st8 [r23]=r22;;
745 br.cond.sptk.many fast_reflect;;
746 END(fast_access_reflect)
748 // when we get to here, VHPT_CCHAIN_LOOKUP has failed and everything
749 // is as it was at the time of original miss. We want to preserve that
750 // so if we get a nested fault, we can just branch to page_fault
751 GLOBAL_ENTRY(fast_tlb_miss_reflect)
752 #ifndef FAST_TLB_MISS_REFLECT // see beginning of file
753 br.spnt.few page_fault ;;
754 #else
755 mov r31=pr
756 mov r30=cr.ipsr
757 mov r29=cr.iip
758 mov r16=cr.isr
759 mov r17=cr.ifa;;
760 // for now, always take slow path for region 0 (e.g. metaphys mode)
761 extr.u r21=r17,61,3;;
762 cmp.eq p7,p0=r0,r21
763 (p7) br.spnt.few page_fault ;;
764 // always take slow path for PL0 (e.g. __copy_from_user)
765 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
766 cmp.eq p7,p0=r21,r0
767 (p7) br.spnt.few page_fault ;;
768 // slow path if strange ipsr or isr bits set
769 extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
770 cmp.ne p7,p0=r21,r0
771 (p7) br.spnt.few page_fault ;;
772 movl r21=IA64_ISR_IR|IA64_ISR_SP|IA64_ISR_NA ;;
773 and r21=r16,r21;;
774 cmp.ne p7,p0=r0,r21
775 (p7) br.spnt.few page_fault ;;
776 // also take slow path if virtual psr.ic=0
777 movl r18=XSI_PSR_IC;;
778 ld4 r21=[r18];;
779 cmp.eq p7,p0=r0,r21
780 (p7) br.spnt.few page_fault ;;
781 // OK, if we get to here, we are doing a fast vcpu_translate. Need to:
782 // 1) look in the virtual TR's (pinned), if not there
783 // 2) look in the 1-entry TLB (pinned), if not there
784 // 3) check the domain VHPT (NOT pinned, accesses domain memory!)
785 // If we find it in any of these places, we need to effectively do
786 // a hyper_itc_i/d
788 // short-term hack for now, if in region 5-7, take slow path
789 // since all Linux TRs are in region 5 or 7, we need not check TRs
790 extr.u r21=r17,61,3;;
791 cmp.le p7,p0=5,r21
792 (p7) br.spnt.few page_fault ;;
793 fast_tlb_no_tr_match:
794 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
795 ld8 r27=[r27]
796 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
797 (p6) adds r25=IA64_VCPU_ITLB_OFFSET,r27
798 (p7) adds r25=IA64_VCPU_DTLB_OFFSET,r27;;
799 ld8 r20=[r25],8;;
800 tbit.z p7,p0=r20,VTLB_PTE_P_BIT // present?
801 (p7) br.cond.spnt.few 1f;;
802 // if ifa is in range of tlb, don't bother to check rid, go slow path
803 ld8 r21=[r25],8;;
804 mov r23=1
805 extr.u r21=r21,IA64_ITIR_PS,IA64_ITIR_PS_LEN;;
806 shl r22=r23,r21
807 ld8 r21=[r25],8;;
808 cmp.ltu p7,p0=r17,r21
809 (p7) br.cond.sptk.many 1f;
810 add r21=r22,r21;;
811 cmp.ltu p7,p0=r17,r21
812 (p7) br.cond.spnt.few page_fault;;
814 1: // check the guest VHPT
815 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
816 ld8 r19=[r19]
817 // if (!rr.ve || !(pta & IA64_PTA_VE)) take slow way for now
818 // FIXME: later, we deliver an alt_d/i vector after thash and itir
819 extr.u r25=r17,61,3
820 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
821 shl r25=r25,3;;
822 add r21=r21,r25;;
823 ld8 r22=[r21];;
824 tbit.z p7,p0=r22,0
825 (p7) br.cond.spnt.few page_fault;;
826 tbit.z p7,p0=r19,IA64_PTA_VE_BIT
827 (p7) br.cond.spnt.few page_fault;;
828 tbit.nz p7,p0=r19,IA64_PTA_VF_BIT // long format VHPT
829 (p7) br.cond.spnt.few page_fault;;
831 // compute and save away itir (r22 & RR_PS_MASK)
832 movl r21=IA64_ITIR_PS_MASK;;
833 and r22=r22,r21;;
834 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
835 st8 [r21]=r22;;
837 // save away ifa
838 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
839 st8 [r21]=r17;;
840 // see vcpu_thash to save away iha
841 shr.u r20 = r17, 61
842 addl r25 = 1, r0
843 movl r30 = 0xe000000000000000
844 ;;
845 and r21 = r30, r17 // VHPT_Addr1
846 ;;
847 shladd r28 = r20, 3, r18
848 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
849 ;;
850 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
851 addl r28 = 32767, r0
852 ld8 r24 = [r19] // pta
853 ;;
854 ld8 r23 = [r27] // rrs[vadr>>61]
855 extr.u r26 = r24, IA64_PTA_SIZE_BIT, IA64_PTA_SIZE_LEN
856 ;;
857 extr.u r22 = r23, IA64_RR_PS, IA64_RR_PS_LEN
858 shl r30 = r25, r26 // pt size
859 ;;
860 shr.u r19 = r17, r22 // ifa pg number
861 shr.u r29 = r24, IA64_PTA_BASE_BIT
862 adds r30 = -1, r30 // pt size mask
863 ;;
864 shladd r27 = r19, 3, r0 // vhpt offset
865 extr.u r26 = r30, 15, 46
866 ;;
867 andcm r24 = r29, r26
868 and r19 = r28, r27
869 shr.u r25 = r27, 15
870 ;;
871 and r23 = r26, r25
872 ;;
873 or r22 = r24, r23
874 ;;
875 dep.z r20 = r22, 15, 46
876 ;;
877 or r30 = r20, r21
878 ;;
879 //or r8 = r19, r30
880 or r19 = r19, r30
881 ;;
882 adds r23=XSI_IHA_OFS-XSI_PSR_IC_OFS,r18 ;;
883 st8 [r23]=r19
884 // done with thash, check guest VHPT
886 adds r20 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
887 ld8 r24 = [r20];; // pta
888 // avoid recursively walking the VHPT
889 // if (((r17=address ^ r24=pta) & ((itir_mask(pta) << 3) >> 3)) != 0) {
890 mov r20=-8
891 xor r21=r17,r24
892 extr.u r24=r24,IA64_PTA_SIZE_BIT,IA64_PTA_SIZE_LEN;;
893 shl r20=r20,r24;;
894 shr.u r20=r20,3;;
895 and r21=r20,r21;;
896 cmp.eq p7,p0=r21,r0
897 (p7) br.cond.spnt.few 1f;;
898 // __copy_from_user(&pte, r19=(void *)(*iha), sizeof(pte)=8)
899 // prepare for possible nested dtlb fault
900 mov r29=b0
901 movl r30=guest_vhpt_miss
902 // now go fetch the entry from the guest VHPT
903 ld8 r20=[r19];;
904 // if we wind up here, we successfully loaded the VHPT entry
906 // this VHPT walker aborts on non-present pages instead
907 // of inserting a not-present translation, this allows
908 // vectoring directly to the miss handler
909 tbit.z p7,p0=r20,0
910 (p7) br.cond.spnt.few page_not_present;;
912 #ifdef FAST_REFLECT_CNT
913 movl r21=PERFC(fast_vhpt_translate);;
914 ld4 r22=[r21];;
915 adds r22=1,r22;;
916 st4 [r21]=r22;;
917 #endif
919 // prepare for fast_insert(PSCB(ifa),PSCB(itir),r16=pte)
920 // r16 == pte
921 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
922 // r18 == XSI_PSR_IC_OFS
923 // r24 == ps
924 // r29 == saved value of b0 in case of recovery
925 // r30 == recovery ip if failure occurs
926 // r31 == pr
927 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
928 (p6) mov r17=1
929 (p7) mov r17=0
930 mov r16=r20
931 mov r29=b0
932 movl r30=recover_and_page_fault
933 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
934 ld8 r24=[r21];;
935 extr.u r24=r24,IA64_ITIR_PS,IA64_ITIR_PS_LEN
936 // IFA already in PSCB
937 br.cond.sptk.many fast_insert;;
938 END(fast_tlb_miss_reflect)
940 // we get here if fast_insert fails (e.g. due to metaphysical lookup)
941 ENTRY(recover_and_page_fault)
942 #ifdef PERF_COUNTERS
943 movl r21=PERFC(recover_to_page_fault);;
944 ld4 r22=[r21];;
945 adds r22=1,r22;;
946 st4 [r21]=r22;;
947 #endif
948 mov b0=r29
949 br.cond.sptk.many page_fault;;
951 // if we wind up here, we missed in guest VHPT so recover
952 // from nested dtlb fault and reflect a tlb fault to the guest
953 guest_vhpt_miss:
954 mov b0=r29
955 // fault = IA64_VHPT_FAULT
956 mov r20=r0
957 br.cond.sptk.many 1f;
959 // if we get to here, we are ready to reflect
960 // need to set up virtual ifa, iha, itir (fast_reflect handles
961 // virtual isr, iip, ipsr, ifs
962 // see vcpu_get_itir_on_fault: get ps,rid,(FIXME key) from rr[ifa]
963 page_not_present:
964 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
965 (p6) movl r20=0x400
966 (p7) movl r20=0x800
968 1: extr.u r25=r17,61,3;;
969 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
970 shl r25=r25,3;;
971 add r21=r21,r25;;
972 ld8 r22=[r21];;
973 extr.u r22=r22,IA64_RR_PS,IA64_RR_PS_LEN+IA64_RR_RID_LEN;;
974 dep.z r22=r22,IA64_RR_PS,IA64_RR_PS_LEN+IA64_RR_RID_LEN
975 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
976 st8 [r23]=r22
978 // fast reflect expects
979 // r16 == cr.isr
980 // r18 == XSI_PSR_IC
981 // r20 == offset into ivt
982 // r29 == iip
983 // r30 == ipsr
984 // r31 == pr
985 //mov r16=cr.isr
986 mov r29=cr.iip
987 mov r30=cr.ipsr
988 br.sptk.many fast_reflect;;
989 #endif
990 END(fast_tlb_miss_reflect)
992 ENTRY(slow_vcpu_rfi)
993 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18;;
994 ld8 r22=[r22];;
995 tbit.z p6,p0=r22,63
996 (p6) br.spnt.few dispatch_break_fault ;;
997 // If vifs.v is set, we have two IFS to consider:
998 // * the guest IFS
999 // * the hypervisor IFS (validated by cover)
1000 // Because IFS is copied to CFM and is used to adjust AR.BSP,
1001 // virtualization of rfi is not easy.
1002 // Previously there was a two steps method (a first rfi jumped to
1003 // a stub which performed a new rfi).
1004 // This new method discards the RS before executing the hypervisor
1005 // cover. After cover, IFS.IFM will be zero. This IFS would simply
1006 // clear CFM but not modifying AR.BSP. Therefore the guest IFS can
1007 // be used instead and there is no need of a second rfi.
1008 // Discarding the RS with the following alloc instruction just clears
1009 // CFM, which is safe because rfi will overwrite it.
1010 // There is a drawback: because the RS must be discarded before
1011 // executing C code, emulation of rfi must go through an hyperprivop
1012 // and not through normal instruction decoding.
1013 alloc r22=ar.pfs,0,0,0,0
1014 br.spnt.few dispatch_break_fault
1015 ;;
1016 END(slow_vcpu_rfi)
1018 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
1019 ENTRY(hyper_rfi)
1020 #ifndef FAST_RFI
1021 br.spnt.few slow_vcpu_rfi ;;
1022 #endif
1023 // if no interrupts pending, proceed
1024 mov r30=r0
1025 cmp.eq p7,p0=r20,r0
1026 (p7) br.sptk.many 1f
1027 ;;
1028 adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
1029 ld8 r21=[r20];; // r21 = vcr.ipsr
1030 extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
1031 mov r30=r22;;
1032 // r30 determines whether we might deliver an immediate extint
1033 #ifndef RFI_TO_INTERRUPT // see beginning of file
1034 cmp.ne p6,p0=r30,r0
1035 (p6) br.cond.spnt.few slow_vcpu_rfi ;;
1036 #endif
1037 1:
1038 adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
1039 ld8 r21=[r20];; // r21 = vcr.ipsr
1040 // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
1041 movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
1042 and r22=r20,r21
1043 ;;
1044 cmp.ne p7,p0=r22,r20
1045 (p7) br.spnt.few slow_vcpu_rfi ;;
1046 // if was in metaphys mode, do it the slow way (FIXME later?)
1047 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1048 ld4 r20=[r20];;
1049 cmp.ne p7,p0=r20,r0
1050 (p7) br.spnt.few slow_vcpu_rfi ;;
1051 #if 0
1052 // if domain hasn't already done virtual bank switch
1053 // do it the slow way (FIXME later?)
1054 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1055 ld4 r20=[r20];;
1056 cmp.eq p7,p0=r20,r0
1057 (p7) br.spnt.few slow_vcpu_rfi ;;
1058 #endif
1059 adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
1060 ld8 r22=[r20];;
1061 1: // OK now, let's do an rfi.
1062 #ifdef FAST_HYPERPRIVOP_CNT
1063 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RFI);;
1064 ld4 r23=[r20];;
1065 adds r23=1,r23;;
1066 st4 [r20]=r23;;
1067 #endif
1068 #ifdef RFI_TO_INTERRUPT
1069 // maybe do an immediate interrupt delivery?
1070 cmp.ne p6,p0=r30,r0
1071 (p6) br.cond.spnt.few rfi_check_extint;;
1072 #endif
1074 just_do_rfi:
1075 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1076 mov cr.iip=r22
1077 extr.u r19=r21,IA64_PSR_CPL0_BIT,2
1078 adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1079 cmp.gtu p7,p0=CONFIG_CPL0_EMUL,r19
1080 ld8 r20=[r20];;
1081 (p7) mov r19=CONFIG_CPL0_EMUL
1082 dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set
1083 mov cr.ifs=r20 ;;
1084 // ipsr.cpl = max(vcr.ipsr.cpl, IA64_PSR_CPL0_BIT);
1085 movl r20=THIS_CPU(current_psr_i_addr)
1086 dep r21=r19,r21,IA64_PSR_CPL0_BIT,2;;
1087 // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
1088 ld8 r20=[r20]
1089 mov r19=1
1090 extr.u r23=r21,IA64_PSR_I_BIT,1 ;;
1091 cmp.ne p7,p6=r23,r0 ;;
1092 // not done yet
1093 (p7) st1 [r20]=r0
1094 (p6) st1 [r20]=r19;;
1095 extr.u r23=r21,IA64_PSR_IC_BIT,1 ;;
1096 cmp.ne p7,p6=r23,r0 ;;
1097 (p7) st4 [r18]=r19;;
1098 (p6) st4 [r18]=r0;;
1099 // force on psr.ic, i, dt, rt, it, bn
1100 movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT| \
1101 IA64_PSR_IT|IA64_PSR_BN)
1102 // keep cr.ipsr.pp and set vPSR.pp = vIPSR.pp
1103 mov r22=cr.ipsr
1104 ;;
1105 or r21=r21,r20
1106 tbit.z p10,p11 = r22, IA64_PSR_PP_BIT
1107 ;;
1108 adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
1109 tbit.z p8,p9 = r21, IA64_PSR_DFH_BIT
1110 adds r23=XSI_VPSR_PP_OFS-XSI_PSR_IC_OFS,r18
1111 ;;
1112 (p9) mov r27=1;;
1113 (p9) st1 [r20]=r27
1114 dep r21=r22,r21,IA64_PSR_PP_BIT,1
1115 (p10) st1 [r23]=r0
1116 (p11) st1 [r23]=r27
1117 ;;
1118 (p8) st1 [r20]=r0
1119 (p8) adds r20=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
1120 (p8) ld1 r27=[r20]
1121 ;;
1122 (p8) dep r21=r27,r21, IA64_PSR_DFH_BIT, 1
1123 ;;
1124 mov cr.ipsr=r21
1125 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1126 ld4 r21=[r20];;
1127 cmp.ne p7,p0=r21,r0 // domain already did "bank 1 switch?"
1128 (p7) br.cond.spnt.few 1f;
1129 // OK, now all set to go except for switch to virtual bank1
1130 mov r22=1;;
1131 st4 [r20]=r22
1132 mov r30=r2
1133 mov r29=r3
1134 mov r17=ar.unat;;
1135 adds r16=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
1136 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
1137 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;;
1138 ld8 r16=[r16];;
1139 mov ar.unat=r16;;
1140 bsw.1;;
1141 // FIXME?: ar.unat is not really handled correctly,
1142 // but may not matter if the OS is NaT-clean
1143 .mem.offset 0,0; ld8.fill r16=[r2],16
1144 .mem.offset 8,0; ld8.fill r17=[r3],16 ;;
1145 .mem.offset 0,0; ld8.fill r18=[r2],16
1146 .mem.offset 0,0; ld8.fill r19=[r3],16 ;;
1147 .mem.offset 8,0; ld8.fill r20=[r2],16
1148 .mem.offset 8,0; ld8.fill r21=[r3],16 ;;
1149 .mem.offset 8,0; ld8.fill r22=[r2],16
1150 .mem.offset 8,0; ld8.fill r23=[r3],16 ;;
1151 .mem.offset 8,0; ld8.fill r24=[r2],16
1152 .mem.offset 8,0; ld8.fill r25=[r3],16 ;;
1153 .mem.offset 8,0; ld8.fill r26=[r2],16
1154 .mem.offset 8,0; ld8.fill r27=[r3],16 ;;
1155 .mem.offset 8,0; ld8.fill r28=[r2],16
1156 .mem.offset 8,0; ld8.fill r29=[r3],16 ;;
1157 .mem.offset 8,0; ld8.fill r30=[r2],16
1158 .mem.offset 8,0; ld8.fill r31=[r3],16 ;;
1159 bsw.0 ;;
1160 mov ar.unat=r17
1161 mov r2=r30
1162 mov r3=r29
1163 1: mov pr=r31,-1
1164 ;;
1165 rfi
1166 ;;
1167 END(hyper_rfi)
1169 #ifdef RFI_TO_INTERRUPT
1170 ENTRY(rfi_check_extint)
1171 //br.sptk.many dispatch_break_fault ;;
1173 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1174 // make sure none of these get trashed in case going to just_do_rfi
1175 movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1176 ld8 r30=[r30];;
1177 adds r24=IA64_VCPU_INSVC3_OFFSET,r30
1178 mov r25=192
1179 adds r16=IA64_VCPU_IRR3_OFFSET,r30;;
1180 ld8 r23=[r16];;
1181 cmp.eq p6,p0=r23,r0;;
1182 (p6) adds r16=-8,r16;;
1183 (p6) adds r24=-8,r24;;
1184 (p6) adds r25=-64,r25;;
1185 (p6) ld8 r23=[r16];;
1186 (p6) cmp.eq p6,p0=r23,r0;;
1187 (p6) adds r16=-8,r16;;
1188 (p6) adds r24=-8,r24;;
1189 (p6) adds r25=-64,r25;;
1190 (p6) ld8 r23=[r16];;
1191 (p6) cmp.eq p6,p0=r23,r0;;
1192 (p6) adds r16=-8,r16;;
1193 (p6) adds r24=-8,r24;;
1194 (p6) adds r25=-64,r25;;
1195 (p6) ld8 r23=[r16];;
1196 cmp.eq p6,p0=r23,r0
1197 (p6) br.cond.spnt.few just_do_rfi; // this is actually an error
1198 // r16 points to non-zero element of irr, r23 has value
1199 // r24 points to corr element of insvc, r25 has elt*64
1200 ld8 r26=[r24];;
1201 cmp.geu p6,p0=r26,r23
1202 (p6) br.cond.spnt.many just_do_rfi;
1204 // not masked by insvc, get vector number
1205 shr.u r26=r23,1;;
1206 or r26=r23,r26;;
1207 shr.u r27=r26,2;;
1208 or r26=r26,r27;;
1209 shr.u r27=r26,4;;
1210 or r26=r26,r27;;
1211 shr.u r27=r26,8;;
1212 or r26=r26,r27;;
1213 shr.u r27=r26,16;;
1214 or r26=r26,r27;;
1215 shr.u r27=r26,32;;
1216 or r26=r26,r27;;
1217 andcm r26=0xffffffffffffffff,r26;;
1218 popcnt r26=r26;;
1219 sub r26=63,r26;;
1220 // r26 now contains the bit index (mod 64)
1221 mov r27=1;;
1222 shl r27=r27,r26;;
1223 // r27 now contains the (within the proper word) bit mask
1224 add r26=r25,r26
1225 // r26 now contains the vector [0..255]
1226 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1227 ld8 r20=[r20] ;;
1228 extr.u r28=r20,16,1
1229 extr.u r29=r20,4,4 ;;
1230 cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, just rfi
1231 (p6) br.cond.spnt.few just_do_rfi;;
1232 shl r29=r29,4;;
1233 adds r29=15,r29;;
1234 cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi
1235 (p6) br.cond.spnt.few just_do_rfi;;
1236 END(rfi_check_extint)
1238 // this doesn't work yet (dies early after getting to user mode)
1239 // but happens relatively infrequently, so fix it later.
1240 // NOTE that these will be counted incorrectly for now (for privcnt output)
1241 ENTRY(rfi_with_interrupt)
1242 #if 1
1243 br.sptk.many dispatch_break_fault ;;
1244 #endif
1246 // OK, have an unmasked vector, so deliver extint to vcr.iva+0x3000
1247 // r18 == XSI_PSR_IC
1248 // r21 == vipsr (ipsr in shared_mem)
1249 // r30 == IA64_KR(CURRENT)
1250 // r31 == pr
1251 mov r17=cr.ipsr
1252 mov r16=cr.isr;;
1253 // set shared_mem isr
1254 extr.u r16=r16,IA64_ISR_IR_BIT,1;; // grab cr.isr.ir bit
1255 dep r16=r16,r0,IA64_ISR_IR_BIT,1 // insert into cr.isr (rest of bits zero)
1256 extr.u r20=r21,IA64_PSR_RI_BIT,2 ;; // get v(!)psr.ri
1257 dep r16=r20,r16,IA64_PSR_RI_BIT,2 ;; // deposit cr.isr.ei
1258 adds r22=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
1259 st8 [r22]=r16;;
1260 movl r22=THIS_CPU(current_psr_i_addr)
1261 // set cr.ipsr (make sure cpl==2!)
1262 mov r29=r17
1263 movl r27=~DELIVER_PSR_CLR
1264 movl r28=DELIVER_PSR_SET | (CONFIG_CPL0_EMUL << IA64_PSR_CPL0_BIT)
1265 mov r20=1;;
1266 ld8 r22=[r22]
1267 and r29=r29,r27;;
1268 or r29=r29,r28;;
1269 mov cr.ipsr=r29
1270 // v.ipsr and v.iip are already set (and v.iip validated) as rfi target
1271 // set shared_mem interrupt_delivery_enabled to 0
1272 // set shared_mem interrupt_collection_enabled to 0
1273 st1 [r22]=r20
1274 st4 [r18]=r0;;
1275 // cover and set shared_mem precover_ifs to cr.ifs
1276 // set shared_mem ifs to 0
1277 #if 0
1278 cover ;;
1279 mov r20=cr.ifs
1280 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1281 st8 [r22]=r0 ;;
1282 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1283 st8 [r22]=r20 ;;
1284 // leave cr.ifs alone for later rfi
1285 #else
1286 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1287 ld8 r20=[r22];;
1288 st8 [r22]=r0 ;;
1289 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1290 st8 [r22]=r20 ;;
1291 #endif
1292 // set iip to go to domain IVA break instruction vector
1293 adds r22=IA64_VCPU_IVA_OFFSET,r30;;
1294 ld8 r23=[r22]
1295 movl r24=0x3000;;
1296 add r24=r24,r23;;
1297 mov cr.iip=r24;;
1298 #if 0
1299 // OK, now all set to go except for switch to virtual bank0
1300 mov r30=r2
1301 mov r29=r3;;
1302 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18
1303 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
1304 bsw.1;;
1305 // FIXME: need to handle ar.unat!
1306 .mem.offset 0,0; st8.spill [r2]=r16,16
1307 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
1308 .mem.offset 0,0; st8.spill [r2]=r18,16
1309 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
1310 .mem.offset 0,0; st8.spill [r2]=r20,16
1311 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
1312 .mem.offset 0,0; st8.spill [r2]=r22,16
1313 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
1314 .mem.offset 0,0; st8.spill [r2]=r24,16
1315 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
1316 .mem.offset 0,0; st8.spill [r2]=r26,16
1317 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
1318 .mem.offset 0,0; st8.spill [r2]=r28,16
1319 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
1320 .mem.offset 0,0; st8.spill [r2]=r30,16
1321 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
1322 bsw.0 ;;
1323 mov r2=r30
1324 mov r3=r29;;
1325 #endif
1326 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1327 st4 [r20]=r0
1328 mov pr=r31,-1 ;;
1329 rfi
1330 END(rfi_with_interrupt)
1331 #endif // RFI_TO_INTERRUPT
1333 ENTRY(hyper_cover)
1334 #ifdef FAST_HYPERPRIVOP_CNT
1335 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_COVER);;
1336 ld4 r21=[r20];;
1337 adds r21=1,r21;;
1338 st4 [r20]=r21;;
1339 #endif
1340 mov r24=cr.ipsr
1341 mov r25=cr.iip;;
1342 // skip test for vpsr.ic.. it's a prerequisite for hyperprivops
1343 cover ;;
1344 mov r30=cr.ifs
1345 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18;;
1346 st8 [r22]=r30
1347 mov cr.ifs=r0
1348 // adjust return address to skip over break instruction
1349 extr.u r26=r24,41,2 ;;
1350 cmp.eq p6,p7=2,r26 ;;
1351 (p6) mov r26=0
1352 (p6) adds r25=16,r25
1353 (p7) adds r26=1,r26
1354 ;;
1355 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1356 ;;
1357 mov cr.ipsr=r24
1358 mov cr.iip=r25
1359 mov pr=r31,-1 ;;
1360 rfi
1361 ;;
1362 END(hyper_cover)
1364 // return from metaphysical mode (meta=1) to virtual mode (meta=0)
1365 ENTRY(hyper_ssm_dt)
1366 #ifdef FAST_HYPERPRIVOP_CNT
1367 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_DT);;
1368 ld4 r21=[r20];;
1369 adds r21=1,r21;;
1370 st4 [r20]=r21;;
1371 #endif
1372 mov r24=cr.ipsr
1373 mov r25=cr.iip
1374 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1375 ld4 r21=[r20];;
1376 cmp.eq p7,p0=r21,r0 // meta==0?
1377 (p7) br.spnt.many 1f ;; // already in virtual mode
1378 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1379 ld8 r22=[r22];;
1380 adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;;
1381 ld8 r23=[r22];;
1382 mov rr[r0]=r23;;
1383 srlz.i;;
1384 st4 [r20]=r0
1385 // adjust return address to skip over break instruction
1386 1: extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1387 cmp.eq p6,p7=2,r26 ;;
1388 (p6) mov r26=0
1389 (p6) adds r25=16,r25
1390 (p7) adds r26=1,r26
1391 ;;
1392 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1393 ;;
1394 mov cr.ipsr=r24
1395 mov cr.iip=r25
1396 mov pr=r31,-1 ;;
1397 rfi
1398 ;;
1399 END(hyper_ssm_dt)
1401 // go to metaphysical mode (meta=1) from virtual mode (meta=0)
1402 ENTRY(hyper_rsm_dt)
1403 #ifdef FAST_HYPERPRIVOP_CNT
1404 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RSM_DT);;
1405 ld4 r21=[r20];;
1406 adds r21=1,r21;;
1407 st4 [r20]=r21;;
1408 #endif
1409 mov r24=cr.ipsr
1410 mov r25=cr.iip
1411 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1412 ld4 r21=[r20];;
1413 cmp.ne p7,p0=r21,r0 // meta==0?
1414 (p7) br.spnt.many 1f ;; // already in metaphysical mode
1415 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1416 ld8 r22=[r22];;
1417 adds r22=IA64_VCPU_META_RID_DT_OFFSET,r22;;
1418 ld8 r23=[r22];;
1419 mov rr[r0]=r23;;
1420 srlz.i;;
1421 adds r21=1,r0 ;;
1422 st4 [r20]=r21
1423 // adjust return address to skip over break instruction
1424 1: extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1425 cmp.eq p6,p7=2,r26 ;;
1426 (p6) mov r26=0
1427 (p6) adds r25=16,r25
1428 (p7) adds r26=1,r26
1429 ;;
1430 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1431 ;;
1432 mov cr.ipsr=r24
1433 mov cr.iip=r25
1434 mov pr=r31,-1 ;;
1435 rfi
1436 ;;
1437 END(hyper_rsm_dt)
1439 ENTRY(hyper_set_itm)
1440 // when we get to here r20=~=interrupts pending
1441 cmp.ne p7,p0=r20,r0
1442 (p7) br.spnt.many dispatch_break_fault ;;
1443 #ifdef FAST_HYPERPRIVOP_CNT
1444 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_ITM);;
1445 ld4 r21=[r20];;
1446 adds r21=1,r21;;
1447 st4 [r20]=r21;;
1448 #endif
1449 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
1450 ld8 r21=[r20];;
1451 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1452 ld8 r20=[r20];;
1453 adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;;
1454 st8 [r20]=r8
1455 cmp.geu p6,p0=r21,r8;;
1456 (p6) mov r21=r8
1457 // now "safe set" cr.itm=r21
1458 mov r23=100;;
1459 2: mov cr.itm=r21;;
1460 srlz.d;;
1461 mov r22=ar.itc ;;
1462 cmp.leu p6,p0=r21,r22;;
1463 add r21=r21,r23;;
1464 shl r23=r23,1
1465 (p6) br.cond.spnt.few 2b;;
1466 1: mov r24=cr.ipsr
1467 mov r25=cr.iip;;
1468 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1469 cmp.eq p6,p7=2,r26 ;;
1470 (p6) mov r26=0
1471 (p6) adds r25=16,r25
1472 (p7) adds r26=1,r26
1473 ;;
1474 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1475 ;;
1476 mov cr.ipsr=r24
1477 mov cr.iip=r25
1478 mov pr=r31,-1 ;;
1479 rfi
1480 ;;
1481 END(hyper_set_itm)
1483 ENTRY(hyper_get_psr)
1484 #ifdef FAST_HYPERPRIVOP_CNT
1485 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_PSR);;
1486 ld4 r21=[r20];;
1487 adds r21=1,r21;;
1488 st4 [r20]=r21;;
1489 #endif
1490 mov r24=cr.ipsr
1491 movl r8=0xffffffff | IA64_PSR_MC | IA64_PSR_IT;;
1492 // only return PSR{36:35,31:0}
1493 and r8=r8,r24
1494 // get vpsr.ic
1495 ld4 r21=[r18];;
1496 dep r8=r21,r8,IA64_PSR_IC_BIT,1
1497 // get vpsr.pp
1498 adds r20=XSI_VPSR_PP_OFS-XSI_PSR_IC_OFS,r18 ;;
1499 ld1 r21=[r20];;
1500 dep r8=r21,r8,IA64_PSR_PP_BIT,1
1501 // get vpsr.dt
1502 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1503 ld4 r21=[r20];;
1504 cmp.ne p6,p0=r21,r0
1505 ;;
1506 (p6) dep.z r8=r8,IA64_PSR_DT_BIT,1
1507 // get vpsr.i
1508 adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18 ;;
1509 ld8 r20=[r20];;
1510 ld1 r21=[r20];;
1511 cmp.eq p8,p9=r0,r21
1512 ;;
1513 (p8) dep r8=-1,r8,IA64_PSR_I_BIT,1
1514 (p9) dep r8=0,r8,IA64_PSR_I_BIT,1
1515 // get vpsr.dfh
1516 adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
1517 ld1 r21=[r20];;
1518 dep r8=r21,r8,IA64_PSR_DFH_BIT,1
1519 ;;
1520 mov r25=cr.iip
1521 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1522 cmp.eq p6,p7=2,r26 ;;
1523 (p6) mov r26=0
1524 (p6) adds r25=16,r25
1525 (p7) adds r26=1,r26
1526 ;;
1527 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1528 ;;
1529 mov cr.ipsr=r24
1530 mov cr.iip=r25
1531 mov pr=r31,-1 ;;
1532 rfi
1533 ;;
1534 END(hyper_get_psr)
1537 ENTRY(hyper_get_rr)
1538 #ifdef FAST_HYPERPRIVOP_CNT
1539 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_RR);;
1540 ld4 r21=[r20];;
1541 adds r21=1,r21;;
1542 st4 [r20]=r21;;
1543 #endif
1544 extr.u r25=r8,61,3;;
1545 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
1546 shl r25=r25,3;;
1547 add r20=r20,r25;;
1548 ld8 r8=[r20]
1549 1: mov r24=cr.ipsr
1550 mov r25=cr.iip;;
1551 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1552 cmp.eq p6,p7=2,r26 ;;
1553 (p6) mov r26=0
1554 (p6) adds r25=16,r25
1555 (p7) adds r26=1,r26
1556 ;;
1557 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1558 ;;
1559 mov cr.ipsr=r24
1560 mov cr.iip=r25
1561 mov pr=r31,-1 ;;
1562 rfi
1563 ;;
1564 END(hyper_get_rr)
1566 ENTRY(hyper_set_rr)
1567 extr.u r25=r8,61,3;;
1568 cmp.leu p7,p0=7,r25 // punt on setting rr7
1569 (p7) br.spnt.many dispatch_break_fault ;;
1570 #ifdef FAST_HYPERPRIVOP_CNT
1571 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_RR);;
1572 ld4 r21=[r20];;
1573 adds r21=1,r21;;
1574 st4 [r20]=r21;;
1575 #endif
1576 extr.u r26=r9,IA64_RR_RID,IA64_RR_RID_LEN // r26 = r9.rid
1577 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1578 ld8 r20=[r20];;
1579 adds r22=IA64_VCPU_STARTING_RID_OFFSET,r20
1580 adds r23=IA64_VCPU_ENDING_RID_OFFSET,r20
1581 adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20
1582 adds r21=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r20;;
1583 ld4 r22=[r22]
1584 ld4 r23=[r23]
1585 ld1 r21=[r21];;
1586 add r22=r26,r22;;
1587 cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid
1588 (p6) br.cond.spnt.few 1f; // this is an error, but just ignore/return
1589 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
1590 shl r25=r25,3;;
1591 add r20=r20,r25;;
1592 st8 [r20]=r9;; // store away exactly what was passed
1593 // but adjust value actually placed in rr[r8]
1594 // r22 contains adjusted rid, "mangle" it (see regionreg.c)
1595 // and set ps to v->arch.vhpt_pg_shift and ve to 1
1596 extr.u r27=r22,0,8
1597 extr.u r28=r22,8,8
1598 extr.u r29=r22,16,8
1599 dep.z r23=r21,IA64_RR_PS,IA64_RR_PS_LEN;;
1600 dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3
1601 dep r23=r27,r23,24,8;;
1602 dep r23=r28,r23,16,8;;
1603 dep r23=r29,r23,8,8
1604 cmp.eq p6,p0=r25,r0;; // if rr0, save for metaphysical
1605 (p6) st8 [r24]=r23
1606 mov rr[r8]=r23;;
1607 // done, mosey on back
1608 1: mov r24=cr.ipsr
1609 mov r25=cr.iip;;
1610 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1611 cmp.eq p6,p7=2,r26 ;;
1612 (p6) mov r26=0
1613 (p6) adds r25=16,r25
1614 (p7) adds r26=1,r26
1615 ;;
1616 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1617 ;;
1618 mov cr.ipsr=r24
1619 mov cr.iip=r25
1620 mov pr=r31,-1 ;;
1621 rfi
1622 ;;
1623 END(hyper_set_rr)
1625 // r8 = val0
1626 // r9 = val1
1627 // r10 = val2
1628 // r11 = val3
1629 // r14 = val4
1630 // mov rr[0x0000000000000000UL] = r8
1631 // mov rr[0x2000000000000000UL] = r9
1632 // mov rr[0x4000000000000000UL] = r10
1633 // mov rr[0x6000000000000000UL] = r11
1634 // mov rr[0x8000000000000000UL] = r14
1635 ENTRY(hyper_set_rr0_to_rr4)
1636 #ifndef FAST_SET_RR0_TO_RR4
1637 br.spnt.few dispatch_break_fault ;;
1638 #endif
1639 #ifdef FAST_HYPERPRIVOP_CNT
1640 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_RR0_TO_RR4);;
1641 ld4 r21=[r20];;
1642 adds r21=1,r21;;
1643 st4 [r20]=r21;;
1644 #endif
1645 movl r17=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1646 ld8 r17=[r17];;
1648 adds r21=IA64_VCPU_STARTING_RID_OFFSET,r17
1649 adds r22=IA64_VCPU_ENDING_RID_OFFSET,r17
1650 adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r17
1651 ;;
1652 ld4 r21=[r21] // r21 = current->starting_rid
1653 extr.u r26=r8,IA64_RR_RID,IA64_RR_RID_LEN // r26 = r8.rid
1654 extr.u r27=r9,IA64_RR_RID,IA64_RR_RID_LEN // r27 = r9.rid
1655 ld4 r22=[r22] // r22 = current->ending_rid
1656 extr.u r28=r10,IA64_RR_RID,IA64_RR_RID_LEN // r28 = r10.rid
1657 extr.u r29=r11,IA64_RR_RID,IA64_RR_RID_LEN // r29 = r11.rid
1658 adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r17
1659 extr.u r30=r14,IA64_RR_RID,IA64_RR_RID_LEN // r30 = r14.rid
1660 ld1 r23=[r23] // r23 = current->vhpt_pg_shift
1661 ;;
1662 add r16=r26,r21
1663 add r17=r27,r21
1664 add r19=r28,r21
1665 add r20=r29,r21
1666 add r21=r30,r21
1667 dep.z r23=r23,IA64_RR_PS,IA64_RR_PS_LEN // r23 = rr.ps
1668 ;;
1669 cmp.geu p6,p0=r16,r22 // if r8.rid + starting_rid >= ending_rid
1670 cmp.geu p7,p0=r17,r22 // if r9.rid + starting_rid >= ending_rid
1671 cmp.geu p8,p0=r19,r22 // if r10.rid + starting_rid >= ending_rid
1672 (p6) br.cond.spnt.few 1f // this is an error, but just ignore/return
1673 (p7) br.cond.spnt.few 1f // this is an error, but just ignore/return
1674 cmp.geu p9,p0=r20,r22 // if r11.rid + starting_rid >= ending_rid
1675 (p8) br.cond.spnt.few 1f // this is an error, but just ignore/return
1676 (p9) br.cond.spnt.few 1f // this is an error, but just ignore/return
1677 cmp.geu p10,p0=r21,r22 // if r14.rid + starting_rid >= ending_rid
1678 (p10) br.cond.spnt.few 1f // this is an error, but just ignore/return
1679 dep r23=-1,r23,0,1 // add rr.ve
1680 ;;
1681 mov r25=1
1682 adds r22=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
1683 ;;
1684 shl r30=r25,61 // r30 = 0x2000000000000000
1686 #if 0
1687 // simple plain version
1688 // rr0
1689 st8 [r22]=r8, 8 // current->rrs[0] = r8
1691 mov r26=0 // r26=0x0000000000000000
1692 extr.u r27=r16,0,8
1693 extr.u r28=r16,8,8
1694 extr.u r29=r16,16,8;;
1695 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1696 dep r25=r28,r25,16,8;;
1697 dep r25=r29,r25,8,8;;
1698 st8 [r24]=r25 // save for metaphysical
1699 mov rr[r26]=r25
1700 dv_serialize_data
1702 // rr1
1703 st8 [r22]=r9, 8 // current->rrs[1] = r9
1704 add r26=r26,r30 // r26 = 0x2000000000000000
1705 extr.u r27=r17,0,8
1706 extr.u r28=r17,8,8
1707 extr.u r29=r17,16,8;;
1708 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1709 dep r25=r28,r25,16,8;;
1710 dep r25=r29,r25,8,8;;
1711 mov rr[r26]=r25
1712 dv_serialize_data
1714 // rr2
1715 st8 [r22]=r10, 8 // current->rrs[2] = r10
1716 add r26=r26,r30 // r26 = 0x4000000000000000
1717 extr.u r27=r19,0,8
1718 extr.u r28=r19,8,8
1719 extr.u r29=r19,16,8;;
1720 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1721 dep r25=r28,r25,16,8;;
1722 dep r25=r29,r25,8,8;;
1723 mov rr[r26]=r25
1724 dv_serialize_data
1726 // rr3
1727 st8 [r22]=r11, 8 // current->rrs[3] = r11
1729 add r26=r26,r30 // r26 = 0x6000000000000000
1730 extr.u r27=r20,0,8
1731 extr.u r28=r20,8,8
1732 extr.u r29=r20,16,8;;
1733 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1734 dep r25=r28,r25,16,8;;
1735 dep r25=r29,r25,8,8;;
1736 mov rr[r26]=r25
1737 dv_serialize_data
1739 // rr4
1740 st8 [r22]=r14 // current->rrs[4] = r14
1742 add r26=r26,r30 // r26 = 0x8000000000000000
1743 extr.u r27=r21,0,8
1744 extr.u r28=r21,8,8
1745 extr.u r29=r21,16,8;;
1746 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1747 dep r25=r28,r25,16,8;;
1748 dep r25=r29,r25,8,8;;
1749 mov rr[r26]=r25
1750 dv_serialize_data
1751 #else
1752 // shuffled version
1753 // rr0
1754 // uses r27, r28, r29 for mangling
1755 // r25 for mangled value
1756 st8 [r22]=r8, 8 // current->rrs[0] = r8
1757 mov r26=0 // r26=0x0000000000000000
1758 extr.u r27=r16,0,8
1759 extr.u r28=r16,8,8
1760 extr.u r29=r16,16,8;;
1761 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1762 dep r25=r28,r25,16,8;;
1763 dep r25=r29,r25,8,8;;
1764 st8 [r24]=r25 // save for metaphysical
1765 mov rr[r26]=r25
1766 dv_serialize_data
1768 // r16, r24, r25 is usable.
1769 // rr1
1770 // uses r25, r28, r29 for mangling
1771 // r25 for mangled value
1772 extr.u r25=r17,0,8
1773 extr.u r28=r17,8,8
1774 st8 [r22]=r9, 8 // current->rrs[1] = r9
1775 extr.u r29=r17,16,8 ;;
1776 add r26=r26,r30 // r26 = 0x2000000000000000
1777 extr.u r24=r19,8,8
1778 extr.u r16=r19,0,8
1779 dep r25=r25,r23,24,8;; // mangling is swapping bytes 1 & 3
1780 dep r25=r28,r25,16,8;;
1781 dep r25=r29,r25,8,8;;
1782 mov rr[r26]=r25
1783 dv_serialize_data
1785 // r16, r17, r24, r25 is usable
1786 // rr2
1787 // uses r16, r24, r29 for mangling
1788 // r17 for mangled value
1789 extr.u r29=r19,16,8
1790 extr.u r27=r20,0,8
1791 st8 [r22]=r10, 8 // current->rrs[2] = r10
1792 add r26=r26,r30 // r26 = 0x4000000000000000
1793 dep r17=r16,r23,24,8;; // mangling is swapping bytes 1 & 3
1794 dep r17=r24,r17,16,8;;
1795 dep r17=r29,r17,8,8;;
1796 mov rr[r26]=r17
1797 dv_serialize_data
1799 // r16, r17, r19, r24, r25 is usable
1800 // rr3
1801 // uses r27, r28, r29 for mangling
1802 // r25 for mangled value
1803 extr.u r28=r20,8,8
1804 extr.u r29=r20,16,8
1805 st8 [r22]=r11, 8 // current->rrs[3] = r11
1806 extr.u r16=r21,0,8
1807 add r26=r26,r30 // r26 = 0x6000000000000000
1808 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1809 dep r25=r28,r25,16,8;;
1810 dep r25=r29,r25,8,8;;
1811 mov rr[r26]=r25
1812 dv_serialize_data
1814 // r16, r17, r19, r20, r24, r25
1815 // rr4
1816 // uses r16, r17, r24 for mangling
1817 // r25 for mangled value
1818 extr.u r17=r21,8,8
1819 extr.u r24=r21,16,8
1820 st8 [r22]=r14 // current->rrs[4] = r14
1821 add r26=r26,r30 // r26 = 0x8000000000000000
1822 dep r25=r16,r23,24,8;; // mangling is swapping bytes 1 & 3
1823 dep r25=r17,r25,16,8;;
1824 dep r25=r24,r25,8,8;;
1825 mov rr[r26]=r25
1826 dv_serialize_data
1827 #endif
1829 // done, mosey on back
1830 1: mov r24=cr.ipsr
1831 mov r25=cr.iip;;
1832 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1833 cmp.eq p6,p7=2,r26 ;;
1834 (p6) mov r26=0
1835 (p6) adds r25=16,r25
1836 (p7) adds r26=1,r26
1837 ;;
1838 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1839 ;;
1840 mov cr.ipsr=r24
1841 mov cr.iip=r25
1842 mov pr=r31,-1 ;;
1843 rfi
1844 ;;
1845 END(hyper_set_rr0_to_rr4)
1847 ENTRY(hyper_set_kr)
1848 extr.u r25=r8,3,61;;
1849 cmp.ne p7,p0=r0,r25 // if kr# > 7, go slow way
1850 (p7) br.spnt.many dispatch_break_fault ;;
1851 #ifdef FAST_HYPERPRIVOP_CNT
1852 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_KR);;
1853 ld4 r21=[r20];;
1854 adds r21=1,r21;;
1855 st4 [r20]=r21;;
1856 #endif
1857 adds r21=XSI_KR0_OFS-XSI_PSR_IC_OFS,r18
1858 shl r20=r8,3;;
1859 add r22=r20,r21;;
1860 st8 [r22]=r9;;
1861 cmp.eq p7,p0=r8,r0
1862 adds r8=-1,r8;;
1863 (p7) mov ar0=r9;;
1864 cmp.eq p7,p0=r8,r0
1865 adds r8=-1,r8;;
1866 (p7) mov ar1=r9;;
1867 cmp.eq p7,p0=r8,r0
1868 adds r8=-1,r8;;
1869 (p7) mov ar2=r9;;
1870 cmp.eq p7,p0=r8,r0
1871 adds r8=-1,r8;;
1872 (p7) mov ar3=r9;;
1873 cmp.eq p7,p0=r8,r0
1874 adds r8=-1,r8;;
1875 (p7) mov ar4=r9;;
1876 cmp.eq p7,p0=r8,r0
1877 adds r8=-1,r8;;
1878 (p7) mov ar5=r9;;
1879 cmp.eq p7,p0=r8,r0
1880 adds r8=-1,r8;;
1881 (p7) mov ar6=r9;;
1882 cmp.eq p7,p0=r8,r0
1883 adds r8=-1,r8;;
1884 (p7) mov ar7=r9;;
1885 // done, mosey on back
1886 1: mov r24=cr.ipsr
1887 mov r25=cr.iip;;
1888 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1889 cmp.eq p6,p7=2,r26 ;;
1890 (p6) mov r26=0
1891 (p6) adds r25=16,r25
1892 (p7) adds r26=1,r26
1893 ;;
1894 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1895 ;;
1896 mov cr.ipsr=r24
1897 mov cr.iip=r25
1898 mov pr=r31,-1 ;;
1899 rfi
1900 ;;
1901 END(hyper_set_kr)
1903 // this routine was derived from optimized assembly output from
1904 // vcpu_thash so it is dense and difficult to read but it works
1905 // On entry:
1906 // r18 == XSI_PSR_IC
1907 // r31 == pr
1908 ENTRY(hyper_thash)
1909 #ifdef FAST_HYPERPRIVOP_CNT
1910 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_THASH);;
1911 ld4 r21=[r20];;
1912 adds r21=1,r21;;
1913 st4 [r20]=r21;;
1914 #endif
1915 shr.u r20 = r8, 61
1916 addl r25 = 1, r0
1917 movl r17 = 0xe000000000000000
1918 ;;
1919 and r21 = r17, r8 // VHPT_Addr1
1920 ;;
1921 shladd r28 = r20, 3, r18
1922 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
1923 ;;
1924 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
1925 addl r28 = 32767, r0
1926 ld8 r24 = [r19] // pta
1927 ;;
1928 ld8 r23 = [r27] // rrs[vadr>>61]
1929 extr.u r26 = r24, IA64_PTA_SIZE_BIT, IA64_PTA_SIZE_LEN
1930 ;;
1931 extr.u r22 = r23, IA64_RR_PS, IA64_RR_PS_LEN
1932 shl r30 = r25, r26
1933 ;;
1934 shr.u r19 = r8, r22
1935 shr.u r29 = r24, 15
1936 ;;
1937 adds r17 = -1, r30
1938 ;;
1939 shladd r27 = r19, 3, r0
1940 extr.u r26 = r17, 15, 46
1941 ;;
1942 andcm r24 = r29, r26
1943 and r19 = r28, r27
1944 shr.u r25 = r27, 15
1945 ;;
1946 and r23 = r26, r25
1947 ;;
1948 or r22 = r24, r23
1949 ;;
1950 dep.z r20 = r22, 15, 46
1951 ;;
1952 or r16 = r20, r21
1953 ;;
1954 or r8 = r19, r16
1955 // done, update iip/ipsr to next instruction
1956 mov r24=cr.ipsr
1957 mov r25=cr.iip;;
1958 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1959 cmp.eq p6,p7=2,r26 ;;
1960 (p6) mov r26=0
1961 (p6) adds r25=16,r25
1962 (p7) adds r26=1,r26
1963 ;;
1964 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1965 ;;
1966 mov cr.ipsr=r24
1967 mov cr.iip=r25
1968 mov pr=r31,-1 ;;
1969 rfi
1970 ;;
1971 END(hyper_thash)
1973 ENTRY(hyper_ptc_ga)
1974 #ifndef FAST_PTC_GA
1975 br.spnt.few dispatch_break_fault ;;
1976 #endif
1977 // FIXME: validate not flushing Xen addresses
1978 #ifdef FAST_HYPERPRIVOP_CNT
1979 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_PTC_GA);;
1980 ld4 r21=[r20];;
1981 adds r21=1,r21;;
1982 st4 [r20]=r21;;
1983 #endif
1984 movl r21=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1985 ld8 r21=[r21];;
1986 adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r21
1987 mov r28=r8
1988 extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2)
1989 mov r20=1
1990 shr.u r24=r8,61
1991 movl r26=0x8000000000000000 // INVALID_TI_TAG
1992 mov r30=ar.lc
1993 ;;
1994 ld1 r22=[r22] // current->arch.vhpt_pg_shift
1995 shl r19=r20,r19
1996 cmp.eq p7,p0=7,r24
1997 (p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7
1998 ;;
1999 shl r27=r22,2 // vhpt_pg_shift<<2 (for ptc.ga)
2000 shr.u r23=r19,r22 // repeat loop for n pages
2001 cmp.le p7,p0=r19,r0 // skip flush if size<=0
2002 (p7) br.cond.dpnt 2f ;;
2003 shl r24=r23,r22;;
2004 cmp.ne p7,p0=r24,r23 ;;
2005 (p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter
2006 mov ar.lc=r23
2007 shl r29=r20,r22;; // page_size
2008 1:
2009 thash r25=r28 ;;
2010 adds r25=16,r25 ;;
2011 ld8 r24=[r25] ;;
2012 // FIXME: should check if tag matches, not just blow it away
2013 or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1
2014 st8 [r25]=r24
2015 ptc.ga r28,r27 ;;
2016 srlz.i ;;
2017 add r28=r29,r28
2018 br.cloop.sptk.few 1b
2019 ;;
2020 2:
2021 mov ar.lc=r30 ;;
2022 mov r29=cr.ipsr
2023 mov r30=cr.iip;;
2024 adds r25=IA64_VCPU_DTLB_OFFSET,r21
2025 adds r26=IA64_VCPU_ITLB_OFFSET,r21;;
2026 ld8 r24=[r25]
2027 ld8 r27=[r26] ;;
2028 and r24=-2,r24
2029 and r27=-2,r27 ;;
2030 st8 [r25]=r24 // set 1-entry i/dtlb as not present
2031 st8 [r26]=r27 ;;
2032 // increment to point to next instruction
2033 extr.u r26=r29,IA64_PSR_RI_BIT,2 ;;
2034 cmp.eq p6,p7=2,r26 ;;
2035 (p6) mov r26=0
2036 (p6) adds r30=16,r30
2037 (p7) adds r26=1,r26
2038 ;;
2039 dep r29=r26,r29,IA64_PSR_RI_BIT,2
2040 ;;
2041 mov cr.ipsr=r29
2042 mov cr.iip=r30
2043 mov pr=r31,-1 ;;
2044 rfi
2045 ;;
2046 END(hyper_ptc_ga)
2048 // recovery block for hyper_itc metaphysical memory lookup
2049 ENTRY(recover_and_dispatch_break_fault)
2050 #ifdef PERF_COUNTERS
2051 movl r21=PERFC(recover_to_break_fault);;
2052 ld4 r22=[r21];;
2053 adds r22=1,r22;;
2054 st4 [r21]=r22;;
2055 #endif
2056 mov b0=r29 ;;
2057 br.sptk.many dispatch_break_fault;;
2058 END(recover_and_dispatch_break_fault)
2060 // Registers at entry
2061 // r17 = break immediate (HYPERPRIVOP_ITC_D or I)
2062 // r18 == XSI_PSR_IC_OFS
2063 // r31 == pr
2064 ENTRY(hyper_itc)
2065 hyper_itc_i:
2066 // fall through, hyper_itc_d handles both i and d
2067 hyper_itc_d:
2068 #ifndef FAST_ITC
2069 br.sptk.many dispatch_break_fault ;;
2070 #else
2071 // ensure itir.ps >= xen's pagesize
2072 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2073 ld8 r27=[r27];;
2074 adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27
2075 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
2076 ld1 r22=[r22]
2077 ld8 r23=[r23];;
2078 extr.u r24=r23,IA64_ITIR_PS,IA64_ITIR_PS_LEN;; // r24==logps
2079 cmp.gt p7,p0=r22,r24
2080 (p7) br.spnt.many dispatch_break_fault ;;
2081 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2082 ld8 r21=[r21];;
2083 // for now, punt on region0 inserts
2084 extr.u r21=r21,61,3;;
2085 cmp.eq p7,p0=r21,r0
2086 (p7) br.spnt.many dispatch_break_fault ;;
2087 adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;;
2088 ld8 r27=[r27]
2089 // FIXME: is the global var dom0 always pinned? assume so for now
2090 movl r28=dom0;;
2091 ld8 r28=[r28];;
2092 // FIXME: for now, only handle dom0 (see lookup_domain_mpa below)
2093 cmp.ne p7,p0=r27,r28
2094 (p7) br.spnt.many dispatch_break_fault ;;
2095 #ifdef FAST_HYPERPRIVOP_CNT
2096 cmp.eq p6,p7=HYPERPRIVOP_ITC_D,r17;;
2097 (p6) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_D)
2098 (p7) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_I);;
2099 ld4 r21=[r20];;
2100 adds r21=1,r21;;
2101 st4 [r20]=r21;;
2102 #endif
2103 (p6) mov r17=2;;
2104 (p7) mov r17=3;;
2105 mov r29=b0 ;;
2106 movl r30=recover_and_dispatch_break_fault ;;
2107 mov r16=r8;;
2108 // fall through
2109 #endif
2110 END(hyper_itc)
2112 #if defined(FAST_ITC) || defined (FAST_TLB_MISS_REFLECT)
2114 // fast_insert(PSCB(ifa),r24=ps,r16=pte)
2115 // r16 == pte
2116 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
2117 // r18 == XSI_PSR_IC_OFS
2118 // r24 == ps
2119 // r29 == saved value of b0 in case of recovery
2120 // r30 == recovery ip if failure occurs
2121 // r31 == pr
2122 ENTRY(fast_insert)
2123 // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir)
2124 mov r19=1
2125 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2126 shl r20=r19,r24
2127 ld8 r27=[r27];;
2128 adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27
2129 adds r20=-1,r20 // r20 == mask
2130 movl r19=_PAGE_PPN_MASK;;
2131 ld1 r23=[r23]
2132 mov r25=-1
2133 and r22=r16,r19;; // r22 == pteval & _PAGE_PPN_MASK
2134 andcm r19=r22,r20
2135 shl r25=r25,r23 // -1 << current->arch.vhpt_pg_shift
2136 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2137 ld8 r21=[r21];;
2138 and r20=r21,r20;;
2139 or r19=r19,r20;; // r19 == mpaddr
2140 // FIXME: for now, just do domain0 and skip mpaddr range checks
2141 and r20=r25,r19
2142 movl r21=PAGE_PHYS ;;
2143 or r20=r20,r21 ;; // r20==return value from lookup_domain_mpa
2144 // r16=pteval,r20=pteval2
2145 movl r19=_PAGE_PPN_MASK
2146 movl r21=_PAGE_PL_PRIV;;
2147 andcm r25=r16,r19 // r25==pteval & ~_PAGE_PPN_MASK
2148 and r22=r20,r19;;
2149 or r22=r22,r21;;
2150 or r22=r22,r25;; // r22==return value from translate_domain_pte
2151 // done with translate_domain_pte
2152 // now do vcpu_itc_no_srlz(vcpu,IorD,ifa,r22=pte,r16=mppte,r24=logps)
2153 // FIXME: for now, just domain0 and skip range check
2154 // psr.ic already cleared
2155 // NOTE: r24 still contains ps (from above)
2156 shladd r24=r24,2,r0;;
2157 mov cr.itir=r24
2158 adds r23=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2159 ld8 r23=[r23];;
2160 mov cr.ifa=r23
2161 tbit.z p6,p7=r17,0;;
2162 (p6) itc.d r22
2163 (p7) itc.i r22;;
2164 dv_serialize_data
2165 // vhpt_insert(r23=vaddr,r22=pte,r24=logps<<2)
2166 thash r28=r23
2167 or r26=1,r22;;
2168 ttag r21=r23
2169 adds r25=8,r28
2170 mov r19=r28;;
2171 st8 [r25]=r24
2172 adds r20=16,r28;;
2173 st8 [r19]=r26
2174 st8 [r20]=r21;;
2175 // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa)
2176 // TR_ENTRY = {page_flags,itir,addr,rid}
2177 tbit.z p6,p7=r17,0
2178 adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27
2179 (p6) adds r27=IA64_VCPU_DTLB_OFFSET,r27
2180 (p7) adds r27=IA64_VCPU_ITLB_OFFSET,r27;;
2181 st8 [r27]=r22,8;; // page_flags: already has pl >= 2 and p==1
2182 st8 [r27]=r24,8 // itir
2183 mov r19=-4096;;
2184 and r23=r23,r19;;
2185 st8 [r27]=r23,8 // ifa & ~0xfff
2186 adds r29 = XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
2187 extr.u r25=r23,61,3;;
2188 shladd r29=r25,3,r29;;
2189 ld8 r29=[r29]
2190 movl r20=IA64_RR_RID_MASK;;
2191 and r29=r29,r20;;
2192 st8 [r27]=r29,-8;; // rid
2193 //if ps > 12
2194 cmp.eq p7,p0=12<<IA64_ITIR_PS,r24
2195 (p7) br.cond.sptk.many 1f;;
2196 // if (ps > 12) {
2197 // trp->ppn &= ~((1UL<<(ps-12))-1); trp->vadr &= ~((1UL<<ps)-1); }
2198 extr.u r29=r24,IA64_ITIR_PS,IA64_ITIR_PS_LEN
2199 mov r28=1;;
2200 shl r26=r28,r29;;
2201 adds r29=-12,r29;;
2202 shl r25=r28,r29;;
2203 mov r29=-1
2204 adds r26=-1,r26
2205 adds r25=-1,r25;;
2206 andcm r26=r29,r26 // ~((1UL<<ps)-1)
2207 andcm r25=r29,r25;; // ~((1UL<<(ps-12))-1)
2208 ld8 r29=[r27];;
2209 and r29=r29,r26;;
2210 st8 [r27]=r29,-16;;
2211 ld8 r29=[r27];;
2212 extr.u r28=r29,12,38;;
2213 movl r26=0xfffc000000000fff;;
2214 and r29=r29,r26
2215 and r28=r28,r25;;
2216 shl r28=r28,12;;
2217 or r29=r29,r28;;
2218 st8 [r27]=r29;;
2219 1: // done with vcpu_set_tr_entry
2220 //PSCBX(vcpu,i/dtlb_pte) = mp_pte
2221 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2222 ld8 r27=[r27];;
2223 tbit.z p6,p7=r17,0;;
2224 (p6) adds r27=IA64_VCPU_DTLB_PTE_OFFSET,r27
2225 (p7) adds r27=IA64_VCPU_ITLB_PTE_OFFSET,r27;;
2226 st8 [r27]=r16;;
2227 // done with vcpu_itc_no_srlz
2229 // if hyper_itc, increment to point to next instruction
2230 tbit.z p7,p0=r17,1
2231 (p7) br.cond.sptk.few no_inc_iip;;
2233 mov r29=cr.ipsr
2234 mov r30=cr.iip;;
2235 extr.u r26=r29,IA64_PSR_RI_BIT,2 ;;
2236 cmp.eq p6,p7=2,r26 ;;
2237 (p6) mov r26=0
2238 (p6) adds r30=16,r30
2239 (p7) adds r26=1,r26
2240 ;;
2241 dep r29=r26,r29,IA64_PSR_RI_BIT,2
2242 ;;
2243 mov cr.ipsr=r29
2244 mov cr.iip=r30;;
2246 no_inc_iip:
2247 mov pr=r31,-1 ;;
2248 rfi
2249 ;;
2250 END(fast_insert)
2251 #endif