debuggers.hg

view xen/arch/ia64/xen/hyperprivop.S @ 0:7d21f7218375

Exact replica of unstable on 051908 + README-this
author Mukesh Rathor
date Mon May 19 15:34:57 2008 -0700 (2008-05-19)
parents
children
line source
1 /*
2 * arch/ia64/kernel/hyperprivop.S
3 *
4 * Copyright (C) 2005 Hewlett-Packard Co
5 * Dan Magenheimer <dan.magenheimer@hp.com>
6 */
8 #include <linux/config.h>
10 #include <asm/asmmacro.h>
11 #include <asm/kregs.h>
12 #include <asm/offsets.h>
13 #include <asm/processor.h>
14 #include <asm/system.h>
15 #include <asm/debugger.h>
16 #include <asm/asm-xsi-offsets.h>
17 #include <asm/pgtable.h>
18 #include <asm/vmmu.h>
19 #include <public/xen.h>
21 #ifdef PERF_COUNTERS
22 #define PERFC(n) (THIS_CPU(perfcounters) + (IA64_PERFC_ ## n) * 4)
23 #endif
25 #define PAGE_PHYS (__DIRTY_BITS | _PAGE_PL_PRIV | _PAGE_AR_RWX)
27 #if 1 // change to 0 to turn off all fast paths
28 # define FAST_HYPERPRIVOPS
29 # ifdef PERF_COUNTERS
30 # define FAST_HYPERPRIVOP_CNT
31 # define FAST_HYPERPRIVOP_PERFC(N) PERFC(fast_hyperprivop + N)
32 # define FAST_REFLECT_CNT
33 # endif
35 //#define FAST_TICK // mostly working (unat problems) but default off for now
36 //#define FAST_TLB_MISS_REFLECT // mostly working but default off for now
37 # undef FAST_ITC //XXX TODO fast_itc doesn't support dom0 vp yet
38 # define FAST_BREAK
39 # undef FAST_ACCESS_REFLECT //XXX TODO fast_access_reflect
40 // doesn't support dom0 vp yet.
41 # define FAST_RFI
42 // TODO: Since we use callback to deliver interrupt,
43 // FAST_SSM_I needs to be rewritten.
44 # define FAST_SSM_I
45 # define FAST_PTC_GA
46 # undef RFI_TO_INTERRUPT // not working yet
47 # define FAST_SET_RR0_TO_RR4
48 #endif
50 #ifdef CONFIG_SMP
51 //#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
52 #undef FAST_PTC_GA
53 #endif
55 // FIXME: turn off for now... but NaTs may crash Xen so re-enable soon!
56 #define HANDLE_AR_UNAT
58 // FIXME: This is defined in include/asm-ia64/hw_irq.h but this
59 // doesn't appear to be include'able from assembly?
60 #define IA64_TIMER_VECTOR 0xef
62 // Note: not hand-scheduled for now
63 // Registers at entry
64 // r16 == cr.isr
65 // r17 == cr.iim
66 // r18 == XSI_PSR_IC_OFS
67 // r19 == ipsr.cpl
68 // r31 == pr
69 GLOBAL_ENTRY(fast_hyperprivop)
70 adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18
71 // HYPERPRIVOP_SSM_I?
72 // assumes domain interrupts pending, so just do it
73 cmp.eq p7,p6=HYPERPRIVOP_SSM_I,r17
74 (p7) br.sptk.many hyper_ssm_i;;
76 // Check pending event indication
77 ld8 r20=[r20] // interrupt_mask_addr
78 ;;
79 ld1 r22=[r20],-1 // evtchn_upcall_mask
80 ;;
81 ld1 r20=[r20] // evtchn_upcall_pending
83 // HYPERPRIVOP_RFI?
84 cmp.eq p7,p6=HYPERPRIVOP_RFI,r17
85 (p7) br.sptk.many hyper_rfi
86 ;;
87 #ifndef FAST_HYPERPRIVOPS // see beginning of file
88 br.sptk.many dispatch_break_fault ;;
89 #endif
90 // if event enabled and there are pending events
91 cmp.ne p7,p0=r20,r0
92 ;;
93 cmp.eq.and p7,p0=r22,r0
94 (p7) br.spnt.many dispatch_break_fault
95 ;;
97 // HYPERPRIVOP_COVER?
98 cmp.eq p7,p0=HYPERPRIVOP_COVER,r17
99 (p7) br.sptk.many hyper_cover
100 ;;
102 // HYPERPRIVOP_SSM_DT?
103 cmp.eq p7,p0=HYPERPRIVOP_SSM_DT,r17
104 (p7) br.sptk.many hyper_ssm_dt
105 ;;
107 // HYPERPRIVOP_RSM_DT?
108 cmp.eq p7,p0=HYPERPRIVOP_RSM_DT,r17
109 (p7) br.sptk.many hyper_rsm_dt
110 ;;
112 // HYPERPRIVOP_SET_ITM?
113 cmp.eq p7,p0=HYPERPRIVOP_SET_ITM,r17
114 (p7) br.sptk.many hyper_set_itm
115 ;;
117 // HYPERPRIVOP_SET_RR0_TO_RR4?
118 cmp.eq p7,p0=HYPERPRIVOP_SET_RR0_TO_RR4,r17
119 (p7) br.sptk.many hyper_set_rr0_to_rr4
120 ;;
122 // HYPERPRIVOP_SET_RR?
123 cmp.eq p7,p0=HYPERPRIVOP_SET_RR,r17
124 (p7) br.sptk.many hyper_set_rr
125 ;;
127 // HYPERPRIVOP_GET_RR?
128 cmp.eq p7,p0=HYPERPRIVOP_GET_RR,r17
129 (p7) br.sptk.many hyper_get_rr
130 ;;
132 // HYPERPRIVOP_GET_PSR?
133 cmp.eq p7,p0=HYPERPRIVOP_GET_PSR,r17
134 (p7) br.sptk.many hyper_get_psr
135 ;;
137 // HYPERPRIVOP_PTC_GA?
138 cmp.eq p7,p0=HYPERPRIVOP_PTC_GA,r17
139 (p7) br.sptk.many hyper_ptc_ga
140 ;;
142 // HYPERPRIVOP_ITC_D?
143 cmp.eq p7,p0=HYPERPRIVOP_ITC_D,r17
144 (p7) br.sptk.many hyper_itc_d
145 ;;
147 // HYPERPRIVOP_ITC_I?
148 cmp.eq p7,p0=HYPERPRIVOP_ITC_I,r17
149 (p7) br.sptk.many hyper_itc_i
150 ;;
152 // HYPERPRIVOP_THASH?
153 cmp.eq p7,p0=HYPERPRIVOP_THASH,r17
154 (p7) br.sptk.many hyper_thash
155 ;;
157 // HYPERPRIVOP_SET_KR?
158 cmp.eq p7,p0=HYPERPRIVOP_SET_KR,r17
159 (p7) br.sptk.many hyper_set_kr
160 ;;
162 // if not one of the above, give up for now and do it the slow way
163 br.sptk.many dispatch_break_fault
164 ;;
165 END(fast_hyperprivop)
167 // give up for now if: ipsr.be==1, ipsr.pp==1
168 // from reflect_interruption, don't need to:
169 // - printk first extint (debug only)
170 // - check for interrupt collection enabled (routine will force on)
171 // - set ifa (not valid for extint)
172 // - set iha (not valid for extint)
173 // - set itir (not valid for extint)
174 // DO need to
175 // - increment the HYPER_SSM_I fast_hyperprivop counter
176 // - set shared_mem iip to instruction after HYPER_SSM_I
177 // - set cr.iip to guest iva+0x3000
178 // - set shared_mem ipsr to [vcpu_get_ipsr_int_state]
179 // be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0;
180 // i = shared_mem interrupt_delivery_enabled
181 // ic = shared_mem interrupt_collection_enabled
182 // ri = instruction after HYPER_SSM_I
183 // all other bits unchanged from real cr.ipsr
184 // - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!)
185 // - set shared_mem isr: isr.ei to instr following HYPER_SSM_I
186 // and isr.ri to cr.isr.ri (all other bits zero)
187 // - cover and set shared_mem precover_ifs to cr.ifs
188 // ^^^ MISSED THIS FOR fast_break??
189 // - set shared_mem interrupt_delivery_enabled to 0
190 // - set shared_mem interrupt_collection_enabled to 0
191 // - set r31 to SHAREDINFO_ADDR
192 // - virtual bank switch 0
193 // maybe implement later
194 // - verify that there really IS a deliverable interrupt pending
195 // - set shared_mem iva
196 // needs to be done but not implemented (in reflect_interruption)
197 // - set shared_mem iipa
198 // don't know for sure
199 // - set shared_mem unat
200 // r16 == cr.isr
201 // r17 == cr.iim
202 // r18 == XSI_PSR_IC
203 // r19 == ipsr.cpl
204 // r31 == pr
205 ENTRY(hyper_ssm_i)
206 #ifndef FAST_SSM_I
207 br.spnt.few dispatch_break_fault ;;
208 #endif
209 // give up for now if: ipsr.be==1, ipsr.pp==1
210 mov r30=cr.ipsr
211 mov r29=cr.iip;;
212 tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
213 (p7) br.spnt.many dispatch_break_fault ;;
214 #ifdef FAST_HYPERPRIVOP_CNT
215 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_I);;
216 ld4 r21=[r20];;
217 adds r21=1,r21;;
218 st4 [r20]=r21;;
219 #endif
220 // set shared_mem iip to instruction after HYPER_SSM_I
221 tbit.nz p6,p7=r30,IA64_PSR_RI_BIT+1 ;; // cr.ipsr.ri >= 2 ?
222 (p6) mov r20=0
223 (p6) adds r29=16,r29
224 (p7) adds r20=1,r20 ;;
225 dep r30=r20,r30,IA64_PSR_RI_BIT,2 // adjust cr.ipsr.ri but don't save yet
226 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
227 st8 [r21]=r29 ;;
228 // set shared_mem isr
229 extr.u r16=r16,IA64_ISR_IR_BIT,1;; // grab cr.isr.ir bit
230 dep r16=r16,r0,IA64_ISR_IR_BIT,1;; // insert into cr.isr (rest of bits zero)
231 dep r16=r20,r16,IA64_PSR_RI_BIT,2 // deposit cr.isr.ri
232 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
233 st8 [r21]=r16
234 // set cr.ipsr
235 mov r29=r30
236 movl r28=DELIVER_PSR_SET
237 movl r27=~(DELIVER_PSR_CLR & (~IA64_PSR_CPL));;
238 and r29=r29,r27;;
239 or r29=r29,r28;;
240 // set hpsr_dfh to ipsr
241 adds r28=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
242 ld1 r28=[r28];;
243 dep r29=r28,r29,IA64_PSR_DFH_BIT,1;;
244 mov cr.ipsr=r29;;
245 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
246 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
247 cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
248 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
249 // FOR SSM_I ONLY, also turn on psr.i and psr.ic
250 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC)
251 // movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
252 movl r27=~IA64_PSR_BN;;
253 or r30=r30,r28;;
254 and r30=r30,r27;;
255 mov r20=1
256 movl r22=THIS_CPU(current_psr_i_addr)
257 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
258 ld8 r22=[r22]
259 adds r27=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
260 ld1 r28=[r27];;
261 st1 [r27]=r0
262 dep r30=r28,r30,IA64_PSR_DFH_BIT,1
263 ;;
264 st8 [r21]=r30;;
265 // set shared_mem interrupt_delivery_enabled to 0
266 // set shared_mem interrupt_collection_enabled to 0
267 st1 [r22]=r20
268 st4 [r18]=r0
269 // cover and set shared_mem precover_ifs to cr.ifs
270 // set shared_mem ifs to 0
271 cover ;;
272 mov r20=cr.ifs
273 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
274 st8 [r21]=r0 ;;
275 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
276 st8 [r21]=r20 ;;
277 // leave cr.ifs alone for later rfi
278 // set iip to go to event callback handler
279 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
280 ld8 r22=[r22];;
281 adds r22=IA64_VCPU_EVENT_CALLBACK_IP_OFFSET,r22;;
282 ld8 r24=[r22];;
283 mov cr.iip=r24;;
284 // OK, now all set to go except for switch to virtual bank0
285 mov r30=r2
286 mov r29=r3
287 ;;
288 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
289 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18
290 // temporarily save ar.unat
291 mov r28=ar.unat
292 bsw.1;;
293 // FIXME?: ar.unat is not really handled correctly,
294 // but may not matter if the OS is NaT-clean
295 .mem.offset 0,0; st8.spill [r2]=r16,16
296 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
297 .mem.offset 0,0; st8.spill [r2]=r18,16
298 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
299 .mem.offset 0,0; st8.spill [r2]=r20,16
300 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
301 .mem.offset 0,0; st8.spill [r2]=r22,16
302 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
303 .mem.offset 0,0; st8.spill [r2]=r24,16
304 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
305 .mem.offset 0,0; st8.spill [r2]=r26,16
306 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
307 .mem.offset 0,0; st8.spill [r2]=r28,16
308 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
309 .mem.offset 0,0; st8.spill [r2]=r30,16
310 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
311 bsw.0 ;;
312 mov r27=ar.unat
313 adds r26=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;;
314 //save bank1 ar.unat
315 st8 [r26]=r27
316 //restore ar.unat
317 mov ar.unat=r28
318 mov r2=r30
319 mov r3=r29
320 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
321 st4 [r20]=r0
322 mov pr=r31,-1 ;;
323 rfi
324 ;;
325 END(hyper_ssm_i)
327 // reflect domain clock interrupt
328 // r31 == pr
329 // r30 == cr.ivr
330 // r29 == rp
331 GLOBAL_ENTRY(fast_tick_reflect)
332 #ifndef FAST_TICK // see beginning of file
333 br.cond.sptk.many rp;;
334 #endif
335 mov r28=IA64_TIMER_VECTOR;;
336 cmp.ne p6,p0=r28,r30
337 (p6) br.cond.spnt.few rp;;
338 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
339 ld8 r26=[r20]
340 mov r27=ar.itc;;
341 adds r27=200,r27;; // safety margin
342 cmp.ltu p6,p0=r26,r27
343 (p6) br.cond.spnt.few rp;;
344 mov r17=cr.ipsr;;
345 // slow path if: ipsr.pp==1
346 tbit.nz p6,p0=r17,IA64_PSR_PP_BIT
347 (p6) br.cond.spnt.few rp;;
348 // definitely have a domain tick
349 mov cr.eoi=r0
350 mov rp=r29
351 mov cr.itm=r26 // ensure next tick
352 #ifdef FAST_REFLECT_CNT
353 movl r20=PERFC(fast_reflect + (0x3000>>8));;
354 ld4 r21=[r20];;
355 adds r21=1,r21;;
356 st4 [r20]=r21;;
357 #endif
358 // vcpu_pend_timer(current)
359 movl r18=THIS_CPU(current_psr_ic_addr)
360 ;;
361 ld8 r18=[r18]
362 ;;
363 adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
364 ld8 r20=[r20];;
365 cmp.eq p6,p0=r20,r0 // if cr.itv==0 done
366 (p6) br.cond.spnt.few fast_tick_reflect_done;;
367 tbit.nz p6,p0=r20,16;; // check itv.m (discard) bit
368 (p6) br.cond.spnt.few fast_tick_reflect_done;;
369 extr.u r27=r20,0,6 // r27 has low 6 bits of itv.vector
370 extr.u r26=r20,6,2 // r26 has irr index of itv.vector
371 movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
372 ld8 r19=[r19];;
373 adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19
374 adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;;
375 ld8 r24=[r22]
376 ld8 r23=[r23];;
377 cmp.eq p6,p0=r23,r24 // skip if this tick already delivered
378 (p6) br.cond.spnt.few fast_tick_reflect_done;;
379 // set irr bit
380 adds r21=IA64_VCPU_IRR0_OFFSET,r19
381 shl r26=r26,3;;
382 add r21=r21,r26
383 mov r25=1;;
384 shl r22=r25,r27
385 ld8 r23=[r21];;
386 or r22=r22,r23;;
387 st8 [r21]=r22
388 // set evtchn_upcall_pending!
389 adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18;;
390 ld8 r20=[r20];;
391 adds r20=-1,r20;; // evtchn_upcall_pending
392 st1 [r20]=r25
393 // if interrupted at pl0, we're done
394 extr.u r16=r17,IA64_PSR_CPL0_BIT,2;;
395 cmp.eq p6,p0=r16,r0;;
396 (p6) br.cond.spnt.few fast_tick_reflect_done;;
397 // if guest vpsr.i is off, we're done
398 movl r21=THIS_CPU(current_psr_i_addr);;
399 ld8 r21=[r21];;
400 ld1 r21=[r21];;
401 cmp.eq p0,p6=r21,r0
402 (p6) br.cond.spnt.few fast_tick_reflect_done;;
404 // OK, we have a clock tick to deliver to the active domain!
405 // so deliver to iva+0x3000
406 // r17 == cr.ipsr
407 // r18 == XSI_PSR_IC
408 // r19 == IA64_KR(CURRENT)
409 // r31 == pr
410 mov r16=cr.isr
411 mov r29=cr.iip
412 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
413 st8 [r21]=r29
414 // set shared_mem isr
415 extr.u r16=r16,IA64_ISR_IR_BIT,1;; // grab cr.isr.ir bit
416 dep r16=r16,r0,IA64_ISR_IR_BIT,1 // insert into cr.isr (rest of bits zero)
417 extr.u r20=r17,IA64_PSR_RI_BIT,2;; // get ipsr.ri
418 dep r16=r20,r16,IA64_PSR_RI_BIT,2 // deposit cr.isr.ei
419 adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18;;
420 st8 [r21]=r16
421 // set cr.ipsr (make sure cpl==2!)
422 mov r29=r17
423 movl r28=DELIVER_PSR_SET | (CONFIG_CPL0_EMUL << IA64_PSR_CPL0_BIT)
424 movl r27=~DELIVER_PSR_CLR;;
425 and r29=r29,r27;;
426 or r29=r29,r28;;
427 mov cr.ipsr=r29;;
428 // set shared_mem ipsr (from ipsr in r17 with ipsr.ri already set)
429 extr.u r29=r17,IA64_PSR_CPL0_BIT,2;;
430 cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
431 (p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2
432 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
433 movl r27=~(IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
434 or r17=r17,r28;;
435 and r17=r17,r27
436 ld4 r16=[r18];;
437 cmp.ne p6,p0=r16,r0
438 movl r22=THIS_CPU(current_psr_i_addr);;
439 ld8 r22=[r22]
440 (p6) dep r17=-1,r17,IA64_PSR_IC_BIT,1 ;;
441 ld1 r16=[r22];;
442 cmp.eq p6,p0=r16,r0;;
443 (p6) dep r17=-1,r17,IA64_PSR_I_BIT,1
444 mov r20=1
445 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18;;
446 st8 [r21]=r17
447 // set shared_mem interrupt_delivery_enabled to 0
448 // set shared_mem interrupt_collection_enabled to 0
449 st1 [r22]=r20
450 st4 [r18]=r0;;
451 // cover and set shared_mem precover_ifs to cr.ifs
452 // set shared_mem ifs to 0
453 cover ;;
454 mov r20=cr.ifs
455 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
456 st8 [r21]=r0 ;;
457 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
458 st8 [r21]=r20
459 // leave cr.ifs alone for later rfi
460 // set iip to go to domain IVA break instruction vector
461 adds r22=IA64_VCPU_IVA_OFFSET,r19;;
462 ld8 r23=[r22]
463 movl r24=0x3000;;
464 add r24=r24,r23;;
465 mov cr.iip=r24
466 // OK, now all set to go except for switch to virtual bank0
467 mov r30=r2
468 mov r29=r3
469 #ifdef HANDLE_AR_UNAT
470 mov r28=ar.unat
471 #endif
472 ;;
473 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
474 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18
475 ;;
476 bsw.1;;
477 .mem.offset 0,0; st8.spill [r2]=r16,16
478 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
479 .mem.offset 0,0; st8.spill [r2]=r18,16
480 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
481 .mem.offset 0,0; st8.spill [r2]=r20,16
482 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
483 .mem.offset 0,0; st8.spill [r2]=r22,16
484 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
485 .mem.offset 0,0; st8.spill [r2]=r24,16
486 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
487 .mem.offset 0,0; st8.spill [r2]=r26,16
488 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
489 .mem.offset 0,0; st8.spill [r2]=r28,16
490 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
491 .mem.offset 0,0; st8.spill [r2]=r30,16
492 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
493 #ifdef HANDLE_AR_UNAT
494 // r16~r23 are preserved regsin bank0 regs, we need to restore them,
495 // r24~r31 are scratch regs, we don't need to handle NaT bit,
496 // because OS handler must assign it before access it
497 ld8 r16=[r2],16
498 ld8 r17=[r3],16;;
499 ld8 r18=[r2],16
500 ld8 r19=[r3],16;;
501 ld8 r20=[r2],16
502 ld8 r21=[r3],16;;
503 ld8 r22=[r2],16
504 ld8 r23=[r3],16;;
505 #endif
506 ;;
507 bsw.0 ;;
508 mov r24=ar.unat
509 mov r2=r30
510 mov r3=r29
511 #ifdef HANDLE_AR_UNAT
512 mov ar.unat=r28
513 #endif
514 ;;
515 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
516 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
517 st8 [r25]=r24
518 st4 [r20]=r0
519 fast_tick_reflect_done:
520 mov pr=r31,-1 ;;
521 rfi
522 END(fast_tick_reflect)
524 // reflect domain breaks directly to domain
525 // r16 == cr.isr
526 // r17 == cr.iim
527 // r18 == XSI_PSR_IC
528 // r19 == ipsr.cpl
529 // r31 == pr
530 GLOBAL_ENTRY(fast_break_reflect)
531 #ifndef FAST_BREAK // see beginning of file
532 br.sptk.many dispatch_break_fault ;;
533 #endif
534 mov r30=cr.ipsr
535 mov r29=cr.iip;;
536 tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
537 (p7) br.spnt.few dispatch_break_fault ;;
538 movl r20=IA64_PSR_CPL ;;
539 and r22=r20,r30 ;;
540 cmp.ne p7,p0=r22,r0
541 (p7) br.spnt.many 1f ;;
542 cmp.eq p7,p0=r17,r0
543 (p7) br.spnt.few dispatch_break_fault ;;
544 #ifdef CRASH_DEBUG
545 movl r21=CDB_BREAK_NUM ;;
546 cmp.eq p7,p0=r17,r21
547 (p7) br.spnt.few dispatch_break_fault ;;
548 #endif
549 1:
550 #if 1 /* special handling in case running on simulator */
551 movl r20=first_break;;
552 ld4 r23=[r20]
553 movl r21=0x80001
554 movl r22=0x80002;;
555 cmp.ne p7,p0=r23,r0
556 (p7) br.spnt.few dispatch_break_fault ;;
557 cmp.eq p7,p0=r21,r17
558 (p7) br.spnt.few dispatch_break_fault ;;
559 cmp.eq p7,p0=r22,r17
560 (p7) br.spnt.few dispatch_break_fault ;;
561 #endif
562 movl r20=0x2c00
563 // save iim in shared_info
564 adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
565 st8 [r21]=r17;;
566 // fall through
567 END(fast_break_reflect)
569 // reflect to domain ivt+r20
570 // sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
571 // r16 == cr.isr
572 // r18 == XSI_PSR_IC
573 // r20 == offset into ivt
574 // r29 == iip
575 // r30 == ipsr
576 // r31 == pr
577 ENTRY(fast_reflect)
578 #ifdef FAST_REFLECT_CNT
579 movl r22=PERFC(fast_reflect)
580 shr r23=r20,8-2;;
581 add r22=r22,r23;;
582 ld4 r21=[r22];;
583 adds r21=1,r21;;
584 st4 [r22]=r21;;
585 #endif
586 // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
587 adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
588 st8 [r21]=r29,XSI_ISR_OFS-XSI_IIP_OFS;;
589 // set shared_mem isr
590 st8 [r21]=r16 ;;
591 // set cr.ipsr
592 movl r21=THIS_CPU(current_psr_i_addr)
593 mov r29=r30 ;;
594 ld8 r21=[r21]
595 movl r28=DELIVER_PSR_SET | (CONFIG_CPL0_EMUL << IA64_PSR_CPL0_BIT)
596 movl r27=~DELIVER_PSR_CLR;;
597 and r29=r29,r27;;
598 or r29=r29,r28;;
599 // set hpsr_dfh to ipsr
600 adds r28=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
601 ld1 r28=[r28];;
602 dep r29=r28,r29,IA64_PSR_DFH_BIT,1;;
603 mov cr.ipsr=r29;;
604 // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
605 extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
606 cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;
607 (p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
608 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT)
609 movl r27=~(IA64_PSR_PP|IA64_PSR_BN);;
610 or r30=r30,r28;;
611 and r30=r30,r27
612 // also set shared_mem ipsr.i and ipsr.ic appropriately
613 ld1 r22=[r21]
614 ld4 r24=[r18];;
615 cmp4.eq p6,p7=r24,r0;;
616 (p6) dep r30=0,r30,IA64_PSR_IC_BIT,1
617 (p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1
618 mov r24=r21
619 cmp.ne p6,p7=r22,r0;;
620 (p6) dep r30=0,r30,IA64_PSR_I_BIT,1
621 (p7) dep r30=-1,r30,IA64_PSR_I_BIT,1
622 mov r22=1
623 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18
624 adds r27=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
625 ld1 r28=[r27];;
626 st1 [r27]=r0
627 dep r30=r28,r30,IA64_PSR_DFH_BIT,1
628 ;;
629 st8 [r21]=r30
630 // set shared_mem interrupt_delivery_enabled to 0
631 // set shared_mem interrupt_collection_enabled to 0
632 st1 [r24]=r22
633 st4 [r18]=r0;;
634 // cover and set shared_mem precover_ifs to cr.ifs
635 // set shared_mem ifs to 0
636 cover ;;
637 mov r24=cr.ifs
638 adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
639 st8 [r21]=r0 ;;
640 adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
641 st8 [r21]=r24
642 // FIXME: need to save iipa and isr to be arch-compliant
643 // set iip to go to domain IVA break instruction vector
644 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
645 ld8 r22=[r22];;
646 adds r22=IA64_VCPU_IVA_OFFSET,r22;;
647 ld8 r23=[r22];;
648 add r20=r20,r23;;
649 mov cr.iip=r20
650 // OK, now all set to go except for switch to virtual bank0
651 mov r30=r2
652 mov r29=r3
653 #ifdef HANDLE_AR_UNAT
654 mov r28=ar.unat
655 #endif
656 ;;
657 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
658 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18
659 ;;
660 bsw.1;;
661 .mem.offset 0,0; st8.spill [r2]=r16,16
662 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
663 .mem.offset 0,0; st8.spill [r2]=r18,16
664 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
665 .mem.offset 0,0; st8.spill [r2]=r20,16
666 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
667 .mem.offset 0,0; st8.spill [r2]=r22,16
668 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
669 .mem.offset 0,0; st8.spill [r2]=r24,16
670 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
671 .mem.offset 0,0; st8.spill [r2]=r26,16
672 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
673 .mem.offset 0,0; st8.spill [r2]=r28,16
674 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
675 .mem.offset 0,0; st8.spill [r2]=r30,16
676 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
677 #ifdef HANDLE_AR_UNAT
678 // r16~r23 are preserved regs in bank0 regs, we need to restore them,
679 // r24~r31 are scratch regs, we don't need to handle NaT bit,
680 // because OS handler must assign it before access it
681 ld8 r16=[r2],16
682 ld8 r17=[r3],16;;
683 ld8 r18=[r2],16
684 ld8 r19=[r3],16;;
685 ld8 r20=[r2],16
686 ld8 r21=[r3],16;;
687 ld8 r22=[r2],16
688 ld8 r23=[r3],16;;
689 #endif
690 ;;
691 bsw.0 ;;
692 mov r24=ar.unat
693 mov r2=r30
694 mov r3=r29
695 #ifdef HANDLE_AR_UNAT
696 mov ar.unat=r28
697 #endif
698 ;;
699 adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
700 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
701 st8 [r25]=r24
702 st4 [r20]=r0
703 mov pr=r31,-1 ;;
704 rfi
705 ;;
706 END(fast_reflect)
708 // reflect access faults (0x2400,0x2800,0x5300) directly to domain
709 // r16 == isr
710 // r17 == ifa
711 // r19 == reflect number (only pass-thru to dispatch_reflection)
712 // r20 == offset into ivt
713 // r31 == pr
714 GLOBAL_ENTRY(fast_access_reflect)
715 #ifndef FAST_ACCESS_REFLECT // see beginning of file
716 br.spnt.few dispatch_reflection ;;
717 #endif
718 mov r30=cr.ipsr
719 mov r29=cr.iip;;
720 tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
721 (p7) br.spnt.few dispatch_reflection ;;
722 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
723 cmp.eq p7,p0=r21,r0
724 (p7) br.spnt.few dispatch_reflection ;;
725 movl r18=THIS_CPU(current_psr_ic_addr);;
726 ld8 r18=[r18];;
727 ld4 r21=[r18];;
728 cmp.eq p7,p0=r0,r21
729 (p7) br.spnt.few dispatch_reflection ;;
730 // set shared_mem ifa, FIXME: should we validate it?
731 mov r17=cr.ifa
732 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
733 st8 [r21]=r17 ;;
734 // get rr[ifa] and save to itir in shared memory (extra bits ignored)
735 shr.u r22=r17,61
736 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18
737 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
738 shladd r22=r22,3,r21;;
739 ld8 r22=[r22];;
740 and r22=~3,r22;;
741 st8 [r23]=r22;;
742 br.cond.sptk.many fast_reflect;;
743 END(fast_access_reflect)
745 // when we get to here, VHPT_CCHAIN_LOOKUP has failed and everything
746 // is as it was at the time of original miss. We want to preserve that
747 // so if we get a nested fault, we can just branch to page_fault
748 GLOBAL_ENTRY(fast_tlb_miss_reflect)
749 #ifndef FAST_TLB_MISS_REFLECT // see beginning of file
750 br.spnt.few page_fault ;;
751 #else
752 mov r31=pr
753 mov r30=cr.ipsr
754 mov r29=cr.iip
755 mov r16=cr.isr
756 mov r17=cr.ifa;;
757 // for now, always take slow path for region 0 (e.g. metaphys mode)
758 extr.u r21=r17,61,3;;
759 cmp.eq p7,p0=r0,r21
760 (p7) br.spnt.few page_fault ;;
761 // always take slow path for PL0 (e.g. __copy_from_user)
762 extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
763 cmp.eq p7,p0=r21,r0
764 (p7) br.spnt.few page_fault ;;
765 // slow path if strange ipsr or isr bits set
766 tbit.nz p7,p0=r30,IA64_PSR_PP_BIT,1
767 (p7) br.spnt.few page_fault ;;
768 movl r21=IA64_ISR_IR|IA64_ISR_SP|IA64_ISR_NA ;;
769 and r21=r16,r21;;
770 cmp.ne p7,p0=r0,r21
771 (p7) br.spnt.few page_fault ;;
772 // also take slow path if virtual psr.ic=0
773 movl r18=XSI_PSR_IC;;
774 ld4 r21=[r18];;
775 cmp.eq p7,p0=r0,r21
776 (p7) br.spnt.few page_fault ;;
777 // OK, if we get to here, we are doing a fast vcpu_translate. Need to:
778 // 1) look in the virtual TR's (pinned), if not there
779 // 2) look in the 1-entry TLB (pinned), if not there
780 // 3) check the domain VHPT (NOT pinned, accesses domain memory!)
781 // If we find it in any of these places, we need to effectively do
782 // a hyper_itc_i/d
784 // short-term hack for now, if in region 5-7, take slow path
785 // since all Linux TRs are in region 5 or 7, we need not check TRs
786 extr.u r21=r17,61,3;;
787 cmp.le p7,p0=5,r21
788 (p7) br.spnt.few page_fault ;;
789 fast_tlb_no_tr_match:
790 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
791 ld8 r27=[r27]
792 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
793 (p6) adds r25=IA64_VCPU_ITLB_OFFSET,r27
794 (p7) adds r25=IA64_VCPU_DTLB_OFFSET,r27;;
795 ld8 r20=[r25],8;;
796 tbit.z p7,p0=r20,VTLB_PTE_P_BIT // present?
797 (p7) br.cond.spnt.few 1f;;
798 // if ifa is in range of tlb, don't bother to check rid, go slow path
799 ld8 r21=[r25],8;;
800 mov r23=1
801 extr.u r21=r21,IA64_ITIR_PS,IA64_ITIR_PS_LEN;;
802 shl r22=r23,r21
803 ld8 r21=[r25],8;;
804 cmp.ltu p7,p0=r17,r21
805 (p7) br.cond.sptk.many 1f;
806 add r21=r22,r21;;
807 cmp.ltu p7,p0=r17,r21
808 (p7) br.cond.spnt.few page_fault;;
810 1: // check the guest VHPT
811 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
812 ld8 r19=[r19]
813 // if (!rr.ve || !(pta & IA64_PTA_VE)) take slow way for now
814 // FIXME: later, we deliver an alt_d/i vector after thash and itir
815 extr.u r25=r17,61,3
816 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
817 shl r25=r25,3;;
818 add r21=r21,r25;;
819 ld8 r22=[r21];;
820 tbit.z p7,p0=r22,0
821 (p7) br.cond.spnt.few page_fault;;
822 tbit.z p7,p0=r19,IA64_PTA_VE_BIT
823 (p7) br.cond.spnt.few page_fault;;
824 tbit.nz p7,p0=r19,IA64_PTA_VF_BIT // long format VHPT
825 (p7) br.cond.spnt.few page_fault;;
827 // compute and save away itir (r22 & RR_PS_MASK)
828 movl r21=IA64_ITIR_PS_MASK;;
829 and r22=r22,r21;;
830 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
831 st8 [r21]=r22;;
833 // save away ifa
834 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
835 st8 [r21]=r17;;
836 // see vcpu_thash to save away iha
837 shr.u r20 = r17, 61
838 addl r25 = 1, r0
839 movl r30 = 0xe000000000000000
840 ;;
841 and r21 = r30, r17 // VHPT_Addr1
842 ;;
843 shladd r28 = r20, 3, r18
844 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
845 ;;
846 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
847 addl r28 = 32767, r0
848 ld8 r24 = [r19] // pta
849 ;;
850 ld8 r23 = [r27] // rrs[vadr>>61]
851 extr.u r26 = r24, IA64_PTA_SIZE_BIT, IA64_PTA_SIZE_LEN
852 ;;
853 extr.u r22 = r23, IA64_RR_PS, IA64_RR_PS_LEN
854 shl r30 = r25, r26 // pt size
855 ;;
856 shr.u r19 = r17, r22 // ifa pg number
857 shr.u r29 = r24, IA64_PTA_BASE_BIT
858 adds r30 = -1, r30 // pt size mask
859 ;;
860 shladd r27 = r19, 3, r0 // vhpt offset
861 extr.u r26 = r30, 15, 46
862 ;;
863 andcm r24 = r29, r26
864 and r19 = r28, r27
865 shr.u r25 = r27, 15
866 ;;
867 and r23 = r26, r25
868 ;;
869 or r22 = r24, r23
870 ;;
871 dep.z r20 = r22, 15, 46
872 ;;
873 or r30 = r20, r21
874 ;;
875 //or r8 = r19, r30
876 or r19 = r19, r30
877 ;;
878 adds r23=XSI_IHA_OFS-XSI_PSR_IC_OFS,r18 ;;
879 st8 [r23]=r19
880 // done with thash, check guest VHPT
882 adds r20 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;;
883 ld8 r24 = [r20];; // pta
884 // avoid recursively walking the VHPT
885 // if (((r17=address ^ r24=pta) & ((itir_mask(pta) << 3) >> 3)) != 0) {
886 mov r20=-8
887 xor r21=r17,r24
888 extr.u r24=r24,IA64_PTA_SIZE_BIT,IA64_PTA_SIZE_LEN;;
889 shl r20=r20,r24;;
890 shr.u r20=r20,3;;
891 and r21=r20,r21;;
892 cmp.eq p7,p0=r21,r0
893 (p7) br.cond.spnt.few 1f;;
894 // __copy_from_user(&pte, r19=(void *)(*iha), sizeof(pte)=8)
895 // prepare for possible nested dtlb fault
896 mov r29=b0
897 movl r30=guest_vhpt_miss
898 // now go fetch the entry from the guest VHPT
899 ld8 r20=[r19];;
900 // if we wind up here, we successfully loaded the VHPT entry
902 // this VHPT walker aborts on non-present pages instead
903 // of inserting a not-present translation, this allows
904 // vectoring directly to the miss handler
905 tbit.z p7,p0=r20,0
906 (p7) br.cond.spnt.few page_not_present;;
908 #ifdef FAST_REFLECT_CNT
909 movl r21=PERFC(fast_vhpt_translate);;
910 ld4 r22=[r21];;
911 adds r22=1,r22;;
912 st4 [r21]=r22;;
913 #endif
915 // prepare for fast_insert(PSCB(ifa),PSCB(itir),r16=pte)
916 // r16 == pte
917 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
918 // r18 == XSI_PSR_IC_OFS
919 // r24 == ps
920 // r29 == saved value of b0 in case of recovery
921 // r30 == recovery ip if failure occurs
922 // r31 == pr
923 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
924 (p6) mov r17=1
925 (p7) mov r17=0
926 mov r16=r20
927 mov r29=b0
928 movl r30=recover_and_page_fault
929 adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
930 ld8 r24=[r21];;
931 extr.u r24=r24,IA64_ITIR_PS,IA64_ITIR_PS_LEN
932 // IFA already in PSCB
933 br.cond.sptk.many fast_insert;;
934 END(fast_tlb_miss_reflect)
936 // we get here if fast_insert fails (e.g. due to metaphysical lookup)
937 ENTRY(recover_and_page_fault)
938 #ifdef PERF_COUNTERS
939 movl r21=PERFC(recover_to_page_fault);;
940 ld4 r22=[r21];;
941 adds r22=1,r22;;
942 st4 [r21]=r22;;
943 #endif
944 mov b0=r29
945 br.cond.sptk.many page_fault;;
947 // if we wind up here, we missed in guest VHPT so recover
948 // from nested dtlb fault and reflect a tlb fault to the guest
949 guest_vhpt_miss:
950 mov b0=r29
951 // fault = IA64_VHPT_FAULT
952 mov r20=r0
953 br.cond.sptk.many 1f;
955 // if we get to here, we are ready to reflect
956 // need to set up virtual ifa, iha, itir (fast_reflect handles
957 // virtual isr, iip, ipsr, ifs
958 // see vcpu_get_itir_on_fault: get ps,rid,(FIXME key) from rr[ifa]
959 page_not_present:
960 tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;
961 (p6) movl r20=0x400
962 (p7) movl r20=0x800
964 1: extr.u r25=r17,61,3;;
965 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
966 shl r25=r25,3;;
967 add r21=r21,r25;;
968 ld8 r22=[r21];;
969 extr.u r22=r22,IA64_RR_PS,IA64_RR_PS_LEN+IA64_RR_RID_LEN;;
970 dep.z r22=r22,IA64_RR_PS,IA64_RR_PS_LEN+IA64_RR_RID_LEN
971 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
972 st8 [r23]=r22
974 // fast reflect expects
975 // r16 == cr.isr
976 // r18 == XSI_PSR_IC
977 // r20 == offset into ivt
978 // r29 == iip
979 // r30 == ipsr
980 // r31 == pr
981 //mov r16=cr.isr
982 mov r29=cr.iip
983 mov r30=cr.ipsr
984 br.sptk.many fast_reflect;;
985 #endif
986 END(fast_tlb_miss_reflect)
988 ENTRY(slow_vcpu_rfi)
989 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18;;
990 ld8 r22=[r22];;
991 tbit.z p6,p0=r22,63
992 (p6) br.spnt.few dispatch_break_fault ;;
993 // If vifs.v is set, we have two IFS to consider:
994 // * the guest IFS
995 // * the hypervisor IFS (validated by cover)
996 // Because IFS is copied to CFM and is used to adjust AR.BSP,
997 // virtualization of rfi is not easy.
998 // Previously there was a two steps method (a first rfi jumped to
999 // a stub which performed a new rfi).
1000 // This new method discards the RS before executing the hypervisor
1001 // cover. After cover, IFS.IFM will be zero. This IFS would simply
1002 // clear CFM but not modifying AR.BSP. Therefore the guest IFS can
1003 // be used instead and there is no need of a second rfi.
1004 // Discarding the RS with the following alloc instruction just clears
1005 // CFM, which is safe because rfi will overwrite it.
1006 // There is a drawback: because the RS must be discarded before
1007 // executing C code, emulation of rfi must go through an hyperprivop
1008 // and not through normal instruction decoding.
1009 alloc r22=ar.pfs,0,0,0,0
1010 br.spnt.few dispatch_break_fault
1011 ;;
1012 END(slow_vcpu_rfi)
1014 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
1015 ENTRY(hyper_rfi)
1016 #ifndef FAST_RFI
1017 br.spnt.few slow_vcpu_rfi ;;
1018 #endif
1019 // if interrupts pending and vcr.ipsr.i=1, do it the slow way
1020 adds r19=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18
1021 adds r23=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18
1022 cmp.ne p8,p0=r20,r0;; // evtchn_upcall_pending != 0
1023 // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
1024 ld8 r21=[r19],XSI_IIP_OFS-XSI_IPSR_OFS // r21=vcr.ipsr
1025 movl r20=~(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
1026 or r20=r20,r21
1027 // p8 determines whether we might deliver an immediate extint
1028 (p8) tbit.nz p8,p0=r21,IA64_PSR_I_BIT;;
1029 cmp.ne p7,p0=-1,r20
1030 ld4 r23=[r23] // r23=metaphysical_mode
1031 #ifndef RFI_TO_INTERRUPT // see beginning of file
1032 (p8) br.cond.spnt.few slow_vcpu_rfi
1033 #endif
1034 (p7) br.spnt.few slow_vcpu_rfi;;
1035 // if was in metaphys mode, do it the slow way (FIXME later?)
1036 cmp.ne p7,p0=r23,r0
1037 ld8 r22=[r19] // r22=vcr.iip
1038 (p7) br.spnt.few slow_vcpu_rfi;;
1039 // OK now, let's do an rfi.
1040 #ifdef FAST_HYPERPRIVOP_CNT
1041 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RFI);;
1042 ld4 r23=[r20];;
1043 adds r23=1,r23;;
1044 st4 [r20]=r23;;
1045 #endif
1046 #ifdef RFI_TO_INTERRUPT
1047 // maybe do an immediate interrupt delivery?
1048 (p8) br.cond.spnt.few rfi_check_extint;;
1049 #endif
1051 just_do_rfi:
1052 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1053 mov cr.iip=r22
1054 extr.u r19=r21,IA64_PSR_CPL0_BIT,2
1055 adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1056 cmp.gtu p7,p0=CONFIG_CPL0_EMUL,r19
1057 ld8 r20=[r20];;
1058 (p7) mov r19=CONFIG_CPL0_EMUL
1059 dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set
1060 mov cr.ifs=r20 ;;
1061 // ipsr.cpl = max(vcr.ipsr.cpl, IA64_PSR_CPL0_BIT);
1062 movl r20=THIS_CPU(current_psr_i_addr)
1063 dep r21=r19,r21,IA64_PSR_CPL0_BIT,2;;
1064 // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
1065 ld8 r20=[r20]
1066 mov r19=1
1067 tbit.nz p7,p6=r21,IA64_PSR_I_BIT
1068 tbit.nz p9,p8=r21,IA64_PSR_IC_BIT;;
1069 // not done yet
1070 (p7) st1 [r20]=r0
1071 (p6) st1 [r20]=r19
1072 (p9) st4 [r18]=r19
1073 (p8) st4 [r18]=r0
1074 // force on psr.ic, i, dt, rt, it, bn
1075 movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT| \
1076 IA64_PSR_IT|IA64_PSR_BN)
1077 // keep cr.ipsr.pp and set vPSR.pp = vIPSR.pp
1078 mov r22=cr.ipsr
1079 ;;
1080 or r21=r21,r20
1081 tbit.z p10,p11 = r22, IA64_PSR_PP_BIT
1082 ;;
1083 adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
1084 tbit.z p8,p9 = r21, IA64_PSR_DFH_BIT
1085 adds r23=XSI_VPSR_PP_OFS-XSI_PSR_IC_OFS,r18
1086 ;;
1087 (p9) mov r27=1;;
1088 (p9) st1 [r20]=r27
1089 dep r21=r22,r21,IA64_PSR_PP_BIT,1
1090 (p10) st1 [r23]=r0
1091 (p11) st1 [r23]=r27
1092 ;;
1093 (p8) st1 [r20]=r0
1094 (p8) adds r20=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
1095 (p8) ld1 r27=[r20]
1096 ;;
1097 (p8) dep r21=r27,r21, IA64_PSR_DFH_BIT, 1
1098 ;;
1099 mov cr.ipsr=r21
1100 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1101 ld4 r21=[r20];;
1102 cmp.ne p7,p0=r21,r0 // domain already did "bank 1 switch?"
1103 (p7) br.cond.spnt.few 1f;
1104 // OK, now all set to go except for switch to virtual bank1
1105 mov r22=1;;
1106 st4 [r20]=r22
1107 mov r30=r2
1108 mov r29=r3
1109 mov r17=ar.unat;;
1110 adds r16=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18
1111 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18
1112 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;;
1113 ld8 r16=[r16];;
1114 mov ar.unat=r16;;
1115 bsw.1;;
1116 // FIXME?: ar.unat is not really handled correctly,
1117 // but may not matter if the OS is NaT-clean
1118 .mem.offset 0,0; ld8.fill r16=[r2],16
1119 .mem.offset 8,0; ld8.fill r17=[r3],16 ;;
1120 .mem.offset 0,0; ld8.fill r18=[r2],16
1121 .mem.offset 0,0; ld8.fill r19=[r3],16 ;;
1122 .mem.offset 8,0; ld8.fill r20=[r2],16
1123 .mem.offset 8,0; ld8.fill r21=[r3],16 ;;
1124 .mem.offset 8,0; ld8.fill r22=[r2],16
1125 .mem.offset 8,0; ld8.fill r23=[r3],16 ;;
1126 .mem.offset 8,0; ld8.fill r24=[r2],16
1127 .mem.offset 8,0; ld8.fill r25=[r3],16 ;;
1128 .mem.offset 8,0; ld8.fill r26=[r2],16
1129 .mem.offset 8,0; ld8.fill r27=[r3],16 ;;
1130 .mem.offset 8,0; ld8.fill r28=[r2],16
1131 .mem.offset 8,0; ld8.fill r29=[r3],16 ;;
1132 .mem.offset 8,0; ld8.fill r30=[r2],16
1133 .mem.offset 8,0; ld8.fill r31=[r3],16 ;;
1134 bsw.0 ;;
1135 mov ar.unat=r17
1136 mov r2=r30
1137 mov r3=r29
1138 1: mov pr=r31,-1
1139 ;;
1140 rfi
1141 ;;
1142 END(hyper_rfi)
1144 #ifdef RFI_TO_INTERRUPT
1145 ENTRY(rfi_check_extint)
1146 //br.sptk.many dispatch_break_fault ;;
1148 // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
1149 // make sure none of these get trashed in case going to just_do_rfi
1150 movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1151 ld8 r30=[r30];;
1152 adds r24=IA64_VCPU_INSVC3_OFFSET,r30
1153 mov r25=192
1154 adds r16=IA64_VCPU_IRR3_OFFSET,r30;;
1155 ld8 r23=[r16];;
1156 cmp.eq p6,p0=r23,r0;;
1157 (p6) adds r16=-8,r16;;
1158 (p6) adds r24=-8,r24;;
1159 (p6) adds r25=-64,r25;;
1160 (p6) ld8 r23=[r16];;
1161 (p6) cmp.eq p6,p0=r23,r0;;
1162 (p6) adds r16=-8,r16;;
1163 (p6) adds r24=-8,r24;;
1164 (p6) adds r25=-64,r25;;
1165 (p6) ld8 r23=[r16];;
1166 (p6) cmp.eq p6,p0=r23,r0;;
1167 (p6) adds r16=-8,r16;;
1168 (p6) adds r24=-8,r24;;
1169 (p6) adds r25=-64,r25;;
1170 (p6) ld8 r23=[r16];;
1171 cmp.eq p6,p0=r23,r0
1172 (p6) br.cond.spnt.few just_do_rfi; // this is actually an error
1173 // r16 points to non-zero element of irr, r23 has value
1174 // r24 points to corr element of insvc, r25 has elt*64
1175 ld8 r26=[r24];;
1176 cmp.geu p6,p0=r26,r23
1177 (p6) br.cond.spnt.many just_do_rfi;
1179 // not masked by insvc, get vector number
1180 shr.u r26=r23,1;;
1181 or r26=r23,r26;;
1182 shr.u r27=r26,2;;
1183 or r26=r26,r27;;
1184 shr.u r27=r26,4;;
1185 or r26=r26,r27;;
1186 shr.u r27=r26,8;;
1187 or r26=r26,r27;;
1188 shr.u r27=r26,16;;
1189 or r26=r26,r27;;
1190 shr.u r27=r26,32;;
1191 or r26=r26,r27;;
1192 andcm r26=0xffffffffffffffff,r26;;
1193 popcnt r26=r26;;
1194 sub r26=63,r26;;
1195 // r26 now contains the bit index (mod 64)
1196 mov r27=1;;
1197 shl r27=r27,r26;;
1198 // r27 now contains the (within the proper word) bit mask
1199 add r26=r25,r26
1200 // r26 now contains the vector [0..255]
1201 adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
1202 ld8 r20=[r20] ;;
1203 extr.u r29=r20,4,4
1204 tbit.nz p6,p0=r20,16 // if tpr.mmi is set, just rfi
1205 (p6) br.cond.spnt.few just_do_rfi;;
1206 shl r29=r29,4;;
1207 adds r29=15,r29;;
1208 cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi
1209 (p6) br.cond.spnt.few just_do_rfi;;
1210 END(rfi_check_extint)
1212 // this doesn't work yet (dies early after getting to user mode)
1213 // but happens relatively infrequently, so fix it later.
1214 // NOTE that these will be counted incorrectly for now (for privcnt output)
1215 ENTRY(rfi_with_interrupt)
1216 #if 1
1217 br.sptk.many dispatch_break_fault ;;
1218 #endif
1220 // OK, have an unmasked vector, so deliver extint to vcr.iva+0x3000
1221 // r18 == XSI_PSR_IC
1222 // r21 == vipsr (ipsr in shared_mem)
1223 // r30 == IA64_KR(CURRENT)
1224 // r31 == pr
1225 mov r17=cr.ipsr
1226 mov r16=cr.isr;;
1227 // set shared_mem isr
1228 extr.u r16=r16,IA64_ISR_IR_BIT,1;; // grab cr.isr.ir bit
1229 dep r16=r16,r0,IA64_ISR_IR_BIT,1 // insert into cr.isr (rest of bits zero)
1230 extr.u r20=r21,IA64_PSR_RI_BIT,2 ;; // get v(!)psr.ri
1231 dep r16=r20,r16,IA64_PSR_RI_BIT,2 ;; // deposit cr.isr.ei
1232 adds r22=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
1233 st8 [r22]=r16;;
1234 movl r22=THIS_CPU(current_psr_i_addr)
1235 // set cr.ipsr (make sure cpl==2!)
1236 mov r29=r17
1237 movl r27=~DELIVER_PSR_CLR
1238 movl r28=DELIVER_PSR_SET | (CONFIG_CPL0_EMUL << IA64_PSR_CPL0_BIT)
1239 mov r20=1;;
1240 ld8 r22=[r22]
1241 and r29=r29,r27;;
1242 or r29=r29,r28;;
1243 mov cr.ipsr=r29
1244 // v.ipsr and v.iip are already set (and v.iip validated) as rfi target
1245 // set shared_mem interrupt_delivery_enabled to 0
1246 // set shared_mem interrupt_collection_enabled to 0
1247 st1 [r22]=r20
1248 st4 [r18]=r0;;
1249 // cover and set shared_mem precover_ifs to cr.ifs
1250 // set shared_mem ifs to 0
1251 #if 0
1252 cover ;;
1253 mov r20=cr.ifs
1254 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1255 st8 [r22]=r0 ;;
1256 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1257 st8 [r22]=r20 ;;
1258 // leave cr.ifs alone for later rfi
1259 #else
1260 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1261 ld8 r20=[r22];;
1262 st8 [r22]=r0 ;;
1263 adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
1264 st8 [r22]=r20 ;;
1265 #endif
1266 // set iip to go to domain IVA break instruction vector
1267 adds r22=IA64_VCPU_IVA_OFFSET,r30;;
1268 ld8 r23=[r22]
1269 movl r24=0x3000;;
1270 add r24=r24,r23;;
1271 mov cr.iip=r24;;
1272 #if 0
1273 // OK, now all set to go except for switch to virtual bank0
1274 mov r30=r2
1275 mov r29=r3;;
1276 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18
1277 adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
1278 bsw.1;;
1279 // FIXME: need to handle ar.unat!
1280 .mem.offset 0,0; st8.spill [r2]=r16,16
1281 .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
1282 .mem.offset 0,0; st8.spill [r2]=r18,16
1283 .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
1284 .mem.offset 0,0; st8.spill [r2]=r20,16
1285 .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
1286 .mem.offset 0,0; st8.spill [r2]=r22,16
1287 .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
1288 .mem.offset 0,0; st8.spill [r2]=r24,16
1289 .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
1290 .mem.offset 0,0; st8.spill [r2]=r26,16
1291 .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
1292 .mem.offset 0,0; st8.spill [r2]=r28,16
1293 .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
1294 .mem.offset 0,0; st8.spill [r2]=r30,16
1295 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
1296 bsw.0 ;;
1297 mov r2=r30
1298 mov r3=r29;;
1299 #endif
1300 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
1301 st4 [r20]=r0
1302 mov pr=r31,-1 ;;
1303 rfi
1304 END(rfi_with_interrupt)
1305 #endif // RFI_TO_INTERRUPT
1307 ENTRY(hyper_cover)
1308 #ifdef FAST_HYPERPRIVOP_CNT
1309 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_COVER);;
1310 ld4 r21=[r20];;
1311 adds r21=1,r21;;
1312 st4 [r20]=r21;;
1313 #endif
1314 mov r24=cr.ipsr
1315 mov r25=cr.iip;;
1316 // skip test for vpsr.ic.. it's a prerequisite for hyperprivops
1317 cover ;;
1318 mov r30=cr.ifs
1319 adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18;;
1320 st8 [r22]=r30
1321 mov cr.ifs=r0
1322 // adjust return address to skip over break instruction
1323 extr.u r26=r24,41,2 ;;
1324 cmp.eq p6,p7=2,r26 ;;
1325 (p6) mov r26=0
1326 (p6) adds r25=16,r25
1327 (p7) adds r26=1,r26
1328 ;;
1329 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1330 ;;
1331 mov cr.ipsr=r24
1332 mov cr.iip=r25
1333 mov pr=r31,-1 ;;
1334 rfi
1335 ;;
1336 END(hyper_cover)
1338 // return from metaphysical mode (meta=1) to virtual mode (meta=0)
1339 ENTRY(hyper_ssm_dt)
1340 #ifdef FAST_HYPERPRIVOP_CNT
1341 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_DT);;
1342 ld4 r21=[r20];;
1343 adds r21=1,r21;;
1344 st4 [r20]=r21;;
1345 #endif
1346 mov r24=cr.ipsr
1347 mov r25=cr.iip
1348 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1349 ld4 r21=[r20];;
1350 cmp.eq p7,p0=r21,r0 // meta==0?
1351 (p7) br.spnt.many 1f ;; // already in virtual mode
1352 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1353 ld8 r22=[r22];;
1354 adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;;
1355 ld8 r23=[r22];;
1356 mov rr[r0]=r23;;
1357 srlz.i;;
1358 st4 [r20]=r0
1359 // adjust return address to skip over break instruction
1360 1: extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1361 cmp.eq p6,p7=2,r26 ;;
1362 (p6) mov r26=0
1363 (p6) adds r25=16,r25
1364 (p7) adds r26=1,r26
1365 ;;
1366 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1367 ;;
1368 mov cr.ipsr=r24
1369 mov cr.iip=r25
1370 mov pr=r31,-1 ;;
1371 rfi
1372 ;;
1373 END(hyper_ssm_dt)
1375 // go to metaphysical mode (meta=1) from virtual mode (meta=0)
1376 ENTRY(hyper_rsm_dt)
1377 #ifdef FAST_HYPERPRIVOP_CNT
1378 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RSM_DT);;
1379 ld4 r21=[r20];;
1380 adds r21=1,r21;;
1381 st4 [r20]=r21;;
1382 #endif
1383 mov r24=cr.ipsr
1384 mov r25=cr.iip
1385 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1386 ld4 r21=[r20];;
1387 cmp.ne p7,p0=r21,r0 // meta==0?
1388 (p7) br.spnt.many 1f ;; // already in metaphysical mode
1389 movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1390 ld8 r22=[r22];;
1391 adds r22=IA64_VCPU_META_RID_DT_OFFSET,r22;;
1392 ld8 r23=[r22];;
1393 mov rr[r0]=r23;;
1394 srlz.i;;
1395 adds r21=1,r0 ;;
1396 st4 [r20]=r21
1397 // adjust return address to skip over break instruction
1398 1: extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1399 cmp.eq p6,p7=2,r26 ;;
1400 (p6) mov r26=0
1401 (p6) adds r25=16,r25
1402 (p7) adds r26=1,r26
1403 ;;
1404 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1405 ;;
1406 mov cr.ipsr=r24
1407 mov cr.iip=r25
1408 mov pr=r31,-1 ;;
1409 rfi
1410 ;;
1411 END(hyper_rsm_dt)
1413 ENTRY(hyper_set_itm)
1414 // when we get to here r20=~=interrupts pending
1415 cmp.ne p7,p0=r20,r0
1416 (p7) br.spnt.many dispatch_break_fault ;;
1417 #ifdef FAST_HYPERPRIVOP_CNT
1418 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_ITM);;
1419 ld4 r21=[r20];;
1420 adds r21=1,r21;;
1421 st4 [r20]=r21;;
1422 #endif
1423 movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
1424 ld8 r21=[r20];;
1425 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1426 ld8 r20=[r20];;
1427 adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;;
1428 st8 [r20]=r8
1429 cmp.geu p6,p0=r21,r8;;
1430 (p6) mov r21=r8
1431 // now "safe set" cr.itm=r21
1432 mov r23=100;;
1433 2: mov cr.itm=r21;;
1434 srlz.d;;
1435 mov r22=ar.itc ;;
1436 cmp.leu p6,p0=r21,r22;;
1437 add r21=r21,r23;;
1438 shl r23=r23,1
1439 (p6) br.cond.spnt.few 2b;;
1440 1: mov r24=cr.ipsr
1441 mov r25=cr.iip;;
1442 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1443 cmp.eq p6,p7=2,r26 ;;
1444 (p6) mov r26=0
1445 (p6) adds r25=16,r25
1446 (p7) adds r26=1,r26
1447 ;;
1448 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1449 ;;
1450 mov cr.ipsr=r24
1451 mov cr.iip=r25
1452 mov pr=r31,-1 ;;
1453 rfi
1454 ;;
1455 END(hyper_set_itm)
1457 ENTRY(hyper_get_psr)
1458 #ifdef FAST_HYPERPRIVOP_CNT
1459 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_PSR);;
1460 ld4 r21=[r20];;
1461 adds r21=1,r21;;
1462 st4 [r20]=r21;;
1463 #endif
1464 mov r24=cr.ipsr
1465 movl r8=0xffffffff | IA64_PSR_MC | IA64_PSR_IT;;
1466 // only return PSR{36:35,31:0}
1467 and r8=r8,r24
1468 // get vpsr.ic
1469 ld4 r21=[r18];;
1470 dep r8=r21,r8,IA64_PSR_IC_BIT,1
1471 // get vpsr.pp
1472 adds r20=XSI_VPSR_PP_OFS-XSI_PSR_IC_OFS,r18 ;;
1473 ld1 r21=[r20];;
1474 dep r8=r21,r8,IA64_PSR_PP_BIT,1
1475 // get vpsr.dt
1476 adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
1477 ld4 r21=[r20];;
1478 cmp.ne p6,p0=r21,r0
1479 ;;
1480 (p6) dep.z r8=r8,IA64_PSR_DT_BIT,1
1481 // get vpsr.i
1482 adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18 ;;
1483 ld8 r20=[r20];;
1484 ld1 r21=[r20];;
1485 cmp.eq p8,p9=r0,r21
1486 ;;
1487 (p8) dep r8=-1,r8,IA64_PSR_I_BIT,1
1488 (p9) dep r8=0,r8,IA64_PSR_I_BIT,1
1489 // get vpsr.dfh
1490 adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
1491 ld1 r21=[r20];;
1492 dep r8=r21,r8,IA64_PSR_DFH_BIT,1
1493 ;;
1494 mov r25=cr.iip
1495 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1496 cmp.eq p6,p7=2,r26 ;;
1497 (p6) mov r26=0
1498 (p6) adds r25=16,r25
1499 (p7) adds r26=1,r26
1500 ;;
1501 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1502 ;;
1503 mov cr.ipsr=r24
1504 mov cr.iip=r25
1505 mov pr=r31,-1 ;;
1506 rfi
1507 ;;
1508 END(hyper_get_psr)
1511 ENTRY(hyper_get_rr)
1512 #ifdef FAST_HYPERPRIVOP_CNT
1513 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_RR);;
1514 ld4 r21=[r20];;
1515 adds r21=1,r21;;
1516 st4 [r20]=r21;;
1517 #endif
1518 extr.u r25=r8,61,3;;
1519 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
1520 shl r25=r25,3;;
1521 add r20=r20,r25;;
1522 ld8 r8=[r20]
1523 1: mov r24=cr.ipsr
1524 mov r25=cr.iip;;
1525 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1526 cmp.eq p6,p7=2,r26 ;;
1527 (p6) mov r26=0
1528 (p6) adds r25=16,r25
1529 (p7) adds r26=1,r26
1530 ;;
1531 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1532 ;;
1533 mov cr.ipsr=r24
1534 mov cr.iip=r25
1535 mov pr=r31,-1 ;;
1536 rfi
1537 ;;
1538 END(hyper_get_rr)
1540 ENTRY(hyper_set_rr)
1541 extr.u r25=r8,61,3;;
1542 cmp.leu p7,p0=7,r25 // punt on setting rr7
1543 (p7) br.spnt.many dispatch_break_fault ;;
1544 #ifdef FAST_HYPERPRIVOP_CNT
1545 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_RR);;
1546 ld4 r21=[r20];;
1547 adds r21=1,r21;;
1548 st4 [r20]=r21;;
1549 #endif
1550 extr.u r26=r9,IA64_RR_RID,IA64_RR_RID_LEN // r26 = r9.rid
1551 movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1552 ld8 r20=[r20];;
1553 adds r22=IA64_VCPU_STARTING_RID_OFFSET,r20
1554 adds r23=IA64_VCPU_ENDING_RID_OFFSET,r20
1555 adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20
1556 adds r21=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r20;;
1557 ld4 r22=[r22]
1558 ld4 r23=[r23]
1559 ld1 r21=[r21];;
1560 add r22=r26,r22;;
1561 cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid
1562 (p6) br.cond.spnt.few 1f; // this is an error, but just ignore/return
1563 adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
1564 shl r25=r25,3;;
1565 add r20=r20,r25;;
1566 st8 [r20]=r9;; // store away exactly what was passed
1567 // but adjust value actually placed in rr[r8]
1568 // r22 contains adjusted rid, "mangle" it (see regionreg.c)
1569 // and set ps to v->arch.vhpt_pg_shift and ve to 1
1570 extr.u r27=r22,0,8
1571 extr.u r28=r22,8,8
1572 extr.u r29=r22,16,8
1573 dep.z r23=r21,IA64_RR_PS,IA64_RR_PS_LEN;;
1574 dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3
1575 dep r23=r27,r23,24,8;;
1576 dep r23=r28,r23,16,8;;
1577 dep r23=r29,r23,8,8
1578 cmp.eq p6,p0=r25,r0;; // if rr0, save for metaphysical
1579 (p6) st8 [r24]=r23
1580 mov rr[r8]=r23;;
1581 // done, mosey on back
1582 1: mov r24=cr.ipsr
1583 mov r25=cr.iip;;
1584 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1585 cmp.eq p6,p7=2,r26 ;;
1586 (p6) mov r26=0
1587 (p6) adds r25=16,r25
1588 (p7) adds r26=1,r26
1589 ;;
1590 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1591 ;;
1592 mov cr.ipsr=r24
1593 mov cr.iip=r25
1594 mov pr=r31,-1 ;;
1595 rfi
1596 ;;
1597 END(hyper_set_rr)
1599 // r8 = val0
1600 // r9 = val1
1601 // r10 = val2
1602 // r11 = val3
1603 // r14 = val4
1604 // mov rr[0x0000000000000000UL] = r8
1605 // mov rr[0x2000000000000000UL] = r9
1606 // mov rr[0x4000000000000000UL] = r10
1607 // mov rr[0x6000000000000000UL] = r11
1608 // mov rr[0x8000000000000000UL] = r14
1609 ENTRY(hyper_set_rr0_to_rr4)
1610 #ifndef FAST_SET_RR0_TO_RR4
1611 br.spnt.few dispatch_break_fault ;;
1612 #endif
1613 #ifdef FAST_HYPERPRIVOP_CNT
1614 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_RR0_TO_RR4);;
1615 ld4 r21=[r20];;
1616 adds r21=1,r21;;
1617 st4 [r20]=r21;;
1618 #endif
1619 movl r17=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1620 ld8 r17=[r17];;
1622 adds r21=IA64_VCPU_STARTING_RID_OFFSET,r17
1623 adds r22=IA64_VCPU_ENDING_RID_OFFSET,r17
1624 adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r17
1625 ;;
1626 ld4 r21=[r21] // r21 = current->starting_rid
1627 extr.u r26=r8,IA64_RR_RID,IA64_RR_RID_LEN // r26 = r8.rid
1628 extr.u r27=r9,IA64_RR_RID,IA64_RR_RID_LEN // r27 = r9.rid
1629 ld4 r22=[r22] // r22 = current->ending_rid
1630 extr.u r28=r10,IA64_RR_RID,IA64_RR_RID_LEN // r28 = r10.rid
1631 extr.u r29=r11,IA64_RR_RID,IA64_RR_RID_LEN // r29 = r11.rid
1632 adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r17
1633 extr.u r30=r14,IA64_RR_RID,IA64_RR_RID_LEN // r30 = r14.rid
1634 ld1 r23=[r23] // r23 = current->vhpt_pg_shift
1635 ;;
1636 add r16=r26,r21
1637 add r17=r27,r21
1638 add r19=r28,r21
1639 add r20=r29,r21
1640 add r21=r30,r21
1641 dep.z r23=r23,IA64_RR_PS,IA64_RR_PS_LEN // r23 = rr.ps
1642 ;;
1643 cmp.geu p6,p0=r16,r22 // if r8.rid + starting_rid >= ending_rid
1644 cmp.geu p7,p0=r17,r22 // if r9.rid + starting_rid >= ending_rid
1645 cmp.geu p8,p0=r19,r22 // if r10.rid + starting_rid >= ending_rid
1646 (p6) br.cond.spnt.few 1f // this is an error, but just ignore/return
1647 (p7) br.cond.spnt.few 1f // this is an error, but just ignore/return
1648 cmp.geu p9,p0=r20,r22 // if r11.rid + starting_rid >= ending_rid
1649 (p8) br.cond.spnt.few 1f // this is an error, but just ignore/return
1650 (p9) br.cond.spnt.few 1f // this is an error, but just ignore/return
1651 cmp.geu p10,p0=r21,r22 // if r14.rid + starting_rid >= ending_rid
1652 (p10) br.cond.spnt.few 1f // this is an error, but just ignore/return
1653 dep r23=-1,r23,0,1 // add rr.ve
1654 ;;
1655 mov r25=1
1656 adds r22=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
1657 ;;
1658 shl r30=r25,61 // r30 = 0x2000000000000000
1660 #if 0
1661 // simple plain version
1662 // rr0
1663 st8 [r22]=r8, 8 // current->rrs[0] = r8
1665 mov r26=0 // r26=0x0000000000000000
1666 extr.u r27=r16,0,8
1667 extr.u r28=r16,8,8
1668 extr.u r29=r16,16,8;;
1669 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1670 dep r25=r28,r25,16,8;;
1671 dep r25=r29,r25,8,8;;
1672 st8 [r24]=r25 // save for metaphysical
1673 mov rr[r26]=r25
1674 dv_serialize_data
1676 // rr1
1677 st8 [r22]=r9, 8 // current->rrs[1] = r9
1678 add r26=r26,r30 // r26 = 0x2000000000000000
1679 extr.u r27=r17,0,8
1680 extr.u r28=r17,8,8
1681 extr.u r29=r17,16,8;;
1682 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1683 dep r25=r28,r25,16,8;;
1684 dep r25=r29,r25,8,8;;
1685 mov rr[r26]=r25
1686 dv_serialize_data
1688 // rr2
1689 st8 [r22]=r10, 8 // current->rrs[2] = r10
1690 add r26=r26,r30 // r26 = 0x4000000000000000
1691 extr.u r27=r19,0,8
1692 extr.u r28=r19,8,8
1693 extr.u r29=r19,16,8;;
1694 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1695 dep r25=r28,r25,16,8;;
1696 dep r25=r29,r25,8,8;;
1697 mov rr[r26]=r25
1698 dv_serialize_data
1700 // rr3
1701 st8 [r22]=r11, 8 // current->rrs[3] = r11
1703 add r26=r26,r30 // r26 = 0x6000000000000000
1704 extr.u r27=r20,0,8
1705 extr.u r28=r20,8,8
1706 extr.u r29=r20,16,8;;
1707 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1708 dep r25=r28,r25,16,8;;
1709 dep r25=r29,r25,8,8;;
1710 mov rr[r26]=r25
1711 dv_serialize_data
1713 // rr4
1714 st8 [r22]=r14 // current->rrs[4] = r14
1716 add r26=r26,r30 // r26 = 0x8000000000000000
1717 extr.u r27=r21,0,8
1718 extr.u r28=r21,8,8
1719 extr.u r29=r21,16,8;;
1720 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1721 dep r25=r28,r25,16,8;;
1722 dep r25=r29,r25,8,8;;
1723 mov rr[r26]=r25
1724 dv_serialize_data
1725 #else
1726 // shuffled version
1727 // rr0
1728 // uses r27, r28, r29 for mangling
1729 // r25 for mangled value
1730 st8 [r22]=r8, 8 // current->rrs[0] = r8
1731 mov r26=0 // r26=0x0000000000000000
1732 extr.u r27=r16,0,8
1733 extr.u r28=r16,8,8
1734 extr.u r29=r16,16,8;;
1735 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1736 dep r25=r28,r25,16,8;;
1737 dep r25=r29,r25,8,8;;
1738 st8 [r24]=r25 // save for metaphysical
1739 mov rr[r26]=r25
1740 dv_serialize_data
1742 // r16, r24, r25 is usable.
1743 // rr1
1744 // uses r25, r28, r29 for mangling
1745 // r25 for mangled value
1746 extr.u r25=r17,0,8
1747 extr.u r28=r17,8,8
1748 st8 [r22]=r9, 8 // current->rrs[1] = r9
1749 extr.u r29=r17,16,8 ;;
1750 add r26=r26,r30 // r26 = 0x2000000000000000
1751 extr.u r24=r19,8,8
1752 extr.u r16=r19,0,8
1753 dep r25=r25,r23,24,8;; // mangling is swapping bytes 1 & 3
1754 dep r25=r28,r25,16,8;;
1755 dep r25=r29,r25,8,8;;
1756 mov rr[r26]=r25
1757 dv_serialize_data
1759 // r16, r17, r24, r25 is usable
1760 // rr2
1761 // uses r16, r24, r29 for mangling
1762 // r17 for mangled value
1763 extr.u r29=r19,16,8
1764 extr.u r27=r20,0,8
1765 st8 [r22]=r10, 8 // current->rrs[2] = r10
1766 add r26=r26,r30 // r26 = 0x4000000000000000
1767 dep r17=r16,r23,24,8;; // mangling is swapping bytes 1 & 3
1768 dep r17=r24,r17,16,8;;
1769 dep r17=r29,r17,8,8;;
1770 mov rr[r26]=r17
1771 dv_serialize_data
1773 // r16, r17, r19, r24, r25 is usable
1774 // rr3
1775 // uses r27, r28, r29 for mangling
1776 // r25 for mangled value
1777 extr.u r28=r20,8,8
1778 extr.u r29=r20,16,8
1779 st8 [r22]=r11, 8 // current->rrs[3] = r11
1780 extr.u r16=r21,0,8
1781 add r26=r26,r30 // r26 = 0x6000000000000000
1782 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3
1783 dep r25=r28,r25,16,8;;
1784 dep r25=r29,r25,8,8;;
1785 mov rr[r26]=r25
1786 dv_serialize_data
1788 // r16, r17, r19, r20, r24, r25
1789 // rr4
1790 // uses r16, r17, r24 for mangling
1791 // r25 for mangled value
1792 extr.u r17=r21,8,8
1793 extr.u r24=r21,16,8
1794 st8 [r22]=r14 // current->rrs[4] = r14
1795 add r26=r26,r30 // r26 = 0x8000000000000000
1796 dep r25=r16,r23,24,8;; // mangling is swapping bytes 1 & 3
1797 dep r25=r17,r25,16,8;;
1798 dep r25=r24,r25,8,8;;
1799 mov rr[r26]=r25
1800 dv_serialize_data
1801 #endif
1803 // done, mosey on back
1804 1: mov r24=cr.ipsr
1805 mov r25=cr.iip;;
1806 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1807 cmp.eq p6,p7=2,r26 ;;
1808 (p6) mov r26=0
1809 (p6) adds r25=16,r25
1810 (p7) adds r26=1,r26
1811 ;;
1812 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1813 ;;
1814 mov cr.ipsr=r24
1815 mov cr.iip=r25
1816 mov pr=r31,-1 ;;
1817 rfi
1818 ;;
1819 END(hyper_set_rr0_to_rr4)
1821 ENTRY(hyper_set_kr)
1822 extr.u r25=r8,3,61;;
1823 cmp.ne p7,p0=r0,r25 // if kr# > 7, go slow way
1824 (p7) br.spnt.many dispatch_break_fault ;;
1825 #ifdef FAST_HYPERPRIVOP_CNT
1826 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_KR);;
1827 ld4 r21=[r20];;
1828 adds r21=1,r21;;
1829 st4 [r20]=r21;;
1830 #endif
1831 adds r21=XSI_KR0_OFS-XSI_PSR_IC_OFS,r18
1832 shl r20=r8,3;;
1833 add r22=r20,r21;;
1834 st8 [r22]=r9;;
1835 cmp.eq p7,p0=r8,r0
1836 adds r8=-1,r8;;
1837 (p7) mov ar0=r9;;
1838 cmp.eq p7,p0=r8,r0
1839 adds r8=-1,r8;;
1840 (p7) mov ar1=r9;;
1841 cmp.eq p7,p0=r8,r0
1842 adds r8=-1,r8;;
1843 (p7) mov ar2=r9;;
1844 cmp.eq p7,p0=r8,r0
1845 adds r8=-1,r8;;
1846 (p7) mov ar3=r9;;
1847 cmp.eq p7,p0=r8,r0
1848 adds r8=-1,r8;;
1849 (p7) mov ar4=r9;;
1850 cmp.eq p7,p0=r8,r0
1851 adds r8=-1,r8;;
1852 (p7) mov ar5=r9;;
1853 cmp.eq p7,p0=r8,r0
1854 adds r8=-1,r8;;
1855 (p7) mov ar6=r9;;
1856 cmp.eq p7,p0=r8,r0
1857 adds r8=-1,r8;;
1858 (p7) mov ar7=r9;;
1859 // done, mosey on back
1860 1: mov r24=cr.ipsr
1861 mov r25=cr.iip;;
1862 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1863 cmp.eq p6,p7=2,r26 ;;
1864 (p6) mov r26=0
1865 (p6) adds r25=16,r25
1866 (p7) adds r26=1,r26
1867 ;;
1868 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1869 ;;
1870 mov cr.ipsr=r24
1871 mov cr.iip=r25
1872 mov pr=r31,-1 ;;
1873 rfi
1874 ;;
1875 END(hyper_set_kr)
1877 // this routine was derived from optimized assembly output from
1878 // vcpu_thash so it is dense and difficult to read but it works
1879 // On entry:
1880 // r18 == XSI_PSR_IC
1881 // r31 == pr
1882 ENTRY(hyper_thash)
1883 #ifdef FAST_HYPERPRIVOP_CNT
1884 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_THASH);;
1885 ld4 r21=[r20];;
1886 adds r21=1,r21;;
1887 st4 [r20]=r21;;
1888 #endif
1889 shr.u r20 = r8, 61
1890 addl r25 = 1, r0
1891 movl r17 = 0xe000000000000000
1892 ;;
1893 and r21 = r17, r8 // VHPT_Addr1
1894 ;;
1895 shladd r28 = r20, 3, r18
1896 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
1897 ;;
1898 adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
1899 addl r28 = 32767, r0
1900 ld8 r24 = [r19] // pta
1901 ;;
1902 ld8 r23 = [r27] // rrs[vadr>>61]
1903 extr.u r26 = r24, IA64_PTA_SIZE_BIT, IA64_PTA_SIZE_LEN
1904 ;;
1905 extr.u r22 = r23, IA64_RR_PS, IA64_RR_PS_LEN
1906 shl r30 = r25, r26
1907 ;;
1908 shr.u r19 = r8, r22
1909 shr.u r29 = r24, 15
1910 ;;
1911 adds r17 = -1, r30
1912 ;;
1913 shladd r27 = r19, 3, r0
1914 extr.u r26 = r17, 15, 46
1915 ;;
1916 andcm r24 = r29, r26
1917 and r19 = r28, r27
1918 shr.u r25 = r27, 15
1919 ;;
1920 and r23 = r26, r25
1921 ;;
1922 or r22 = r24, r23
1923 ;;
1924 dep.z r20 = r22, 15, 46
1925 ;;
1926 or r16 = r20, r21
1927 ;;
1928 or r8 = r19, r16
1929 // done, update iip/ipsr to next instruction
1930 mov r24=cr.ipsr
1931 mov r25=cr.iip;;
1932 extr.u r26=r24,IA64_PSR_RI_BIT,2 ;;
1933 cmp.eq p6,p7=2,r26 ;;
1934 (p6) mov r26=0
1935 (p6) adds r25=16,r25
1936 (p7) adds r26=1,r26
1937 ;;
1938 dep r24=r26,r24,IA64_PSR_RI_BIT,2
1939 ;;
1940 mov cr.ipsr=r24
1941 mov cr.iip=r25
1942 mov pr=r31,-1 ;;
1943 rfi
1944 ;;
1945 END(hyper_thash)
1947 ENTRY(hyper_ptc_ga)
1948 #ifndef FAST_PTC_GA
1949 br.spnt.few dispatch_break_fault ;;
1950 #endif
1951 // FIXME: validate not flushing Xen addresses
1952 #ifdef FAST_HYPERPRIVOP_CNT
1953 movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_PTC_GA);;
1954 ld4 r21=[r20];;
1955 adds r21=1,r21;;
1956 st4 [r20]=r21;;
1957 #endif
1958 movl r21=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
1959 ld8 r21=[r21];;
1960 adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r21
1961 mov r28=r8
1962 extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2)
1963 mov r20=1
1964 shr.u r24=r8,61
1965 movl r26=0x8000000000000000 // INVALID_TI_TAG
1966 mov r30=ar.lc
1967 ;;
1968 ld1 r22=[r22] // current->arch.vhpt_pg_shift
1969 shl r19=r20,r19
1970 cmp.eq p7,p0=7,r24
1971 (p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7
1972 ;;
1973 shl r27=r22,2 // vhpt_pg_shift<<2 (for ptc.ga)
1974 shr.u r23=r19,r22 // repeat loop for n pages
1975 cmp.le p7,p0=r19,r0 // skip flush if size<=0
1976 (p7) br.cond.dpnt 2f ;;
1977 shl r24=r23,r22;;
1978 cmp.ne p7,p0=r24,r23 ;;
1979 (p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter
1980 mov ar.lc=r23
1981 shl r29=r20,r22;; // page_size
1982 1:
1983 thash r25=r28 ;;
1984 adds r25=16,r25 ;;
1985 ld8 r24=[r25] ;;
1986 // FIXME: should check if tag matches, not just blow it away
1987 or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1
1988 st8 [r25]=r24
1989 ptc.ga r28,r27 ;;
1990 srlz.i ;;
1991 add r28=r29,r28
1992 br.cloop.sptk.few 1b
1993 ;;
1994 2:
1995 mov ar.lc=r30 ;;
1996 mov r29=cr.ipsr
1997 mov r30=cr.iip;;
1998 adds r25=IA64_VCPU_DTLB_OFFSET,r21
1999 adds r26=IA64_VCPU_ITLB_OFFSET,r21;;
2000 ld8 r24=[r25]
2001 ld8 r27=[r26] ;;
2002 and r24=-2,r24
2003 and r27=-2,r27 ;;
2004 st8 [r25]=r24 // set 1-entry i/dtlb as not present
2005 st8 [r26]=r27 ;;
2006 // increment to point to next instruction
2007 extr.u r26=r29,IA64_PSR_RI_BIT,2 ;;
2008 cmp.eq p6,p7=2,r26 ;;
2009 (p6) mov r26=0
2010 (p6) adds r30=16,r30
2011 (p7) adds r26=1,r26
2012 ;;
2013 dep r29=r26,r29,IA64_PSR_RI_BIT,2
2014 ;;
2015 mov cr.ipsr=r29
2016 mov cr.iip=r30
2017 mov pr=r31,-1 ;;
2018 rfi
2019 ;;
2020 END(hyper_ptc_ga)
2022 // recovery block for hyper_itc metaphysical memory lookup
2023 ENTRY(recover_and_dispatch_break_fault)
2024 #ifdef PERF_COUNTERS
2025 movl r21=PERFC(recover_to_break_fault);;
2026 ld4 r22=[r21];;
2027 adds r22=1,r22;;
2028 st4 [r21]=r22;;
2029 #endif
2030 mov b0=r29 ;;
2031 br.sptk.many dispatch_break_fault;;
2032 END(recover_and_dispatch_break_fault)
2034 // Registers at entry
2035 // r17 = break immediate (HYPERPRIVOP_ITC_D or I)
2036 // r18 == XSI_PSR_IC_OFS
2037 // r31 == pr
2038 ENTRY(hyper_itc)
2039 hyper_itc_i:
2040 // fall through, hyper_itc_d handles both i and d
2041 hyper_itc_d:
2042 #ifndef FAST_ITC
2043 br.sptk.many dispatch_break_fault ;;
2044 #else
2045 // ensure itir.ps >= xen's pagesize
2046 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2047 ld8 r27=[r27];;
2048 adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27
2049 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;;
2050 ld1 r22=[r22]
2051 ld8 r23=[r23];;
2052 extr.u r24=r23,IA64_ITIR_PS,IA64_ITIR_PS_LEN;; // r24==logps
2053 cmp.gt p7,p0=r22,r24
2054 (p7) br.spnt.many dispatch_break_fault ;;
2055 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2056 ld8 r21=[r21];;
2057 // for now, punt on region0 inserts
2058 extr.u r21=r21,61,3;;
2059 cmp.eq p7,p0=r21,r0
2060 (p7) br.spnt.many dispatch_break_fault ;;
2061 adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;;
2062 ld8 r27=[r27]
2063 // FIXME: is the global var dom0 always pinned? assume so for now
2064 movl r28=dom0;;
2065 ld8 r28=[r28];;
2066 // FIXME: for now, only handle dom0 (see lookup_domain_mpa below)
2067 cmp.ne p7,p0=r27,r28
2068 (p7) br.spnt.many dispatch_break_fault ;;
2069 #ifdef FAST_HYPERPRIVOP_CNT
2070 cmp.eq p6,p7=HYPERPRIVOP_ITC_D,r17;;
2071 (p6) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_D)
2072 (p7) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_I);;
2073 ld4 r21=[r20];;
2074 adds r21=1,r21;;
2075 st4 [r20]=r21;;
2076 #endif
2077 (p6) mov r17=2;;
2078 (p7) mov r17=3;;
2079 mov r29=b0 ;;
2080 movl r30=recover_and_dispatch_break_fault ;;
2081 mov r16=r8;;
2082 // fall through
2083 #endif
2084 END(hyper_itc)
2086 #if defined(FAST_ITC) || defined (FAST_TLB_MISS_REFLECT)
2088 // fast_insert(PSCB(ifa),r24=ps,r16=pte)
2089 // r16 == pte
2090 // r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate
2091 // r18 == XSI_PSR_IC_OFS
2092 // r24 == ps
2093 // r29 == saved value of b0 in case of recovery
2094 // r30 == recovery ip if failure occurs
2095 // r31 == pr
2096 ENTRY(fast_insert)
2097 // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir)
2098 mov r19=1
2099 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2100 shl r20=r19,r24
2101 ld8 r27=[r27];;
2102 adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27
2103 adds r20=-1,r20 // r20 == mask
2104 movl r19=_PAGE_PPN_MASK;;
2105 ld1 r23=[r23]
2106 mov r25=-1
2107 and r22=r16,r19;; // r22 == pteval & _PAGE_PPN_MASK
2108 andcm r19=r22,r20
2109 shl r25=r25,r23 // -1 << current->arch.vhpt_pg_shift
2110 adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2111 ld8 r21=[r21];;
2112 and r20=r21,r20;;
2113 or r19=r19,r20;; // r19 == mpaddr
2114 // FIXME: for now, just do domain0 and skip mpaddr range checks
2115 and r20=r25,r19
2116 movl r21=PAGE_PHYS ;;
2117 or r20=r20,r21 ;; // r20==return value from lookup_domain_mpa
2118 // r16=pteval,r20=pteval2
2119 movl r19=_PAGE_PPN_MASK
2120 movl r21=_PAGE_PL_PRIV;;
2121 andcm r25=r16,r19 // r25==pteval & ~_PAGE_PPN_MASK
2122 and r22=r20,r19;;
2123 or r22=r22,r21;;
2124 or r22=r22,r25;; // r22==return value from translate_domain_pte
2125 // done with translate_domain_pte
2126 // now do vcpu_itc_no_srlz(vcpu,IorD,ifa,r22=pte,r16=mppte,r24=logps)
2127 // FIXME: for now, just domain0 and skip range check
2128 // psr.ic already cleared
2129 // NOTE: r24 still contains ps (from above)
2130 shladd r24=r24,2,r0;;
2131 mov cr.itir=r24
2132 adds r23=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
2133 ld8 r23=[r23];;
2134 mov cr.ifa=r23
2135 tbit.z p6,p7=r17,0;;
2136 (p6) itc.d r22
2137 (p7) itc.i r22;;
2138 dv_serialize_data
2139 // vhpt_insert(r23=vaddr,r22=pte,r24=logps<<2)
2140 thash r28=r23
2141 or r26=1,r22;;
2142 ttag r21=r23
2143 adds r25=8,r28
2144 mov r19=r28;;
2145 st8 [r25]=r24
2146 adds r20=16,r28;;
2147 st8 [r19]=r26
2148 st8 [r20]=r21;;
2149 // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa)
2150 // TR_ENTRY = {page_flags,itir,addr,rid}
2151 tbit.z p6,p7=r17,0
2152 adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27
2153 (p6) adds r27=IA64_VCPU_DTLB_OFFSET,r27
2154 (p7) adds r27=IA64_VCPU_ITLB_OFFSET,r27;;
2155 st8 [r27]=r22,8;; // page_flags: already has pl >= 2 and p==1
2156 st8 [r27]=r24,8 // itir
2157 mov r19=-4096;;
2158 and r23=r23,r19;;
2159 st8 [r27]=r23,8 // ifa & ~0xfff
2160 adds r29 = XSI_RR0_OFS-XSI_PSR_IC_OFS,r18
2161 extr.u r25=r23,61,3;;
2162 shladd r29=r25,3,r29;;
2163 ld8 r29=[r29]
2164 movl r20=IA64_RR_RID_MASK;;
2165 and r29=r29,r20;;
2166 st8 [r27]=r29,-8;; // rid
2167 //if ps > 12
2168 cmp.eq p7,p0=12<<IA64_ITIR_PS,r24
2169 (p7) br.cond.sptk.many 1f;;
2170 // if (ps > 12) {
2171 // trp->ppn &= ~((1UL<<(ps-12))-1); trp->vadr &= ~((1UL<<ps)-1); }
2172 extr.u r29=r24,IA64_ITIR_PS,IA64_ITIR_PS_LEN
2173 mov r28=1;;
2174 shl r26=r28,r29;;
2175 adds r29=-12,r29;;
2176 shl r25=r28,r29;;
2177 mov r29=-1
2178 adds r26=-1,r26
2179 adds r25=-1,r25;;
2180 andcm r26=r29,r26 // ~((1UL<<ps)-1)
2181 andcm r25=r29,r25;; // ~((1UL<<(ps-12))-1)
2182 ld8 r29=[r27];;
2183 and r29=r29,r26;;
2184 st8 [r27]=r29,-16;;
2185 ld8 r29=[r27];;
2186 extr.u r28=r29,12,38;;
2187 movl r26=0xfffc000000000fff;;
2188 and r29=r29,r26
2189 and r28=r28,r25;;
2190 shl r28=r28,12;;
2191 or r29=r29,r28;;
2192 st8 [r27]=r29;;
2193 1: // done with vcpu_set_tr_entry
2194 //PSCBX(vcpu,i/dtlb_pte) = mp_pte
2195 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
2196 ld8 r27=[r27];;
2197 tbit.z p6,p7=r17,0;;
2198 (p6) adds r27=IA64_VCPU_DTLB_PTE_OFFSET,r27
2199 (p7) adds r27=IA64_VCPU_ITLB_PTE_OFFSET,r27;;
2200 st8 [r27]=r16;;
2201 // done with vcpu_itc_no_srlz
2203 // if hyper_itc, increment to point to next instruction
2204 tbit.z p7,p0=r17,1
2205 (p7) br.cond.sptk.few no_inc_iip;;
2207 mov r29=cr.ipsr
2208 mov r30=cr.iip;;
2209 extr.u r26=r29,IA64_PSR_RI_BIT,2 ;;
2210 cmp.eq p6,p7=2,r26 ;;
2211 (p6) mov r26=0
2212 (p6) adds r30=16,r30
2213 (p7) adds r26=1,r26
2214 ;;
2215 dep r29=r26,r29,IA64_PSR_RI_BIT,2
2216 ;;
2217 mov cr.ipsr=r29
2218 mov cr.iip=r30;;
2220 no_inc_iip:
2221 mov pr=r31,-1 ;;
2222 rfi
2223 ;;
2224 END(fast_insert)
2225 #endif