debuggers.hg

view xen/crypto/vmac.c @ 22855:1d1eec7e1fb4

xl: Perform minimal validation of virtual disk file while parsing config file

This patch performs some very basic validation on the virtual disk
file passed through the config file. This validation ensures that we
don't go too far with the initialization like spawn qemu and more
while there could be some potentially fundamental issues.

[ Patch fixed up to work with PHYSTYPE_EMPTY 22808:6ec61438713a -iwj ]

Signed-off-by: Kamala Narasimhan <kamala.narasimhan@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
author Kamala Narasimhan <kamala.narasimhan@gmail.com>
date Tue Jan 25 18:09:49 2011 +0000 (2011-01-25)
parents c6b22d0d1e90
children
line source
1 /* --------------------------------------------------------------------------
2 * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
3 * This implementation is herby placed in the public domain.
4 * The authors offers no warranty. Use at your own risk.
5 * Please send bug reports to the authors.
6 * Last modified: 17 APR 08, 1700 PDT
7 * ----------------------------------------------------------------------- */
9 /* start for Xen */
10 #include <xen/config.h>
11 #include <xen/init.h>
12 #include <xen/types.h>
13 #include <xen/lib.h>
14 #include <crypto/vmac.h>
15 #define UINT64_C(x) x##ULL
16 /* end for Xen */
18 /* Enable code tuned for 64-bit registers; otherwise tuned for 32-bit */
19 #ifndef VMAC_ARCH_64
20 #define VMAC_ARCH_64 (__x86_64__ || __ppc64__ || _M_X64)
21 #endif
23 /* Enable code tuned for Intel SSE2 instruction set */
24 #if ((__SSE2__ || (_M_IX86_FP >= 2)) && ( ! VMAC_ARCH_64))
25 #define VMAC_USE_SSE2 1
26 #include <emmintrin.h>
27 #endif
29 /* Native word reads. Update (or define via compiler) if incorrect */
30 #ifndef VMAC_ARCH_BIG_ENDIAN /* Assume big-endian unless on the list */
31 #define VMAC_ARCH_BIG_ENDIAN \
32 (!(__x86_64__ || __i386__ || _M_IX86 || \
33 _M_X64 || __ARMEL__ || __MIPSEL__))
34 #endif
36 /* ----------------------------------------------------------------------- */
37 /* Constants and masks */
39 const uint64_t p64 = UINT64_C(0xfffffffffffffeff); /* 2^64 - 257 prime */
40 const uint64_t m62 = UINT64_C(0x3fffffffffffffff); /* 62-bit mask */
41 const uint64_t m63 = UINT64_C(0x7fffffffffffffff); /* 63-bit mask */
42 const uint64_t m64 = UINT64_C(0xffffffffffffffff); /* 64-bit mask */
43 const uint64_t mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask */
45 /* ----------------------------------------------------------------------- *
46 * The following routines are used in this implementation. They are
47 * written via macros to simulate zero-overhead call-by-reference.
48 * All have default implemantations for when they are not defined in an
49 * architecture-specific manner.
50 *
51 * MUL64: 64x64->128-bit multiplication
52 * PMUL64: assumes top bits cleared on inputs
53 * ADD128: 128x128->128-bit addition
54 * GET_REVERSED_64: load and byte-reverse 64-bit word
55 * ----------------------------------------------------------------------- */
57 /* ----------------------------------------------------------------------- */
58 #if (__GNUC__ && (__x86_64__ || __amd64__))
59 /* ----------------------------------------------------------------------- */
61 #define ADD128(rh,rl,ih,il) \
62 asm ("addq %3, %1 \n\t" \
63 "adcq %2, %0" \
64 : "+r"(rh),"+r"(rl) \
65 : "r"(ih),"r"(il) : "cc");
67 #define MUL64(rh,rl,i1,i2) \
68 asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "r"(i2) : "cc")
70 #define PMUL64 MUL64
72 #define GET_REVERSED_64(p) \
73 ({uint64_t x; \
74 asm ("bswapq %0" : "=r" (x) : "0"(*(uint64_t *)(p))); x;})
76 /* ----------------------------------------------------------------------- */
77 #elif (__GNUC__ && __i386__)
78 /* ----------------------------------------------------------------------- */
80 #define GET_REVERSED_64(p) \
81 ({ uint64_t x; \
82 uint32_t *tp = (uint32_t *)(p); \
83 asm ("bswap %%edx\n\t" \
84 "bswap %%eax" \
85 : "=A"(x) \
86 : "a"(tp[1]), "d"(tp[0])); \
87 x; })
89 /* ----------------------------------------------------------------------- */
90 #elif (__GNUC__ && __ppc64__)
91 /* ----------------------------------------------------------------------- */
93 #define ADD128(rh,rl,ih,il) \
94 asm volatile ( "addc %1, %1, %3 \n\t" \
95 "adde %0, %0, %2" \
96 : "+r"(rh),"+r"(rl) \
97 : "r"(ih),"r"(il));
99 #define MUL64(rh,rl,i1,i2) \
100 { uint64_t _i1 = (i1), _i2 = (i2); \
101 rl = _i1 * _i2; \
102 asm volatile ("mulhdu %0, %1, %2" : "=r" (rh) : "r" (_i1), "r" (_i2));\
103 }
105 #define PMUL64 MUL64
107 #define GET_REVERSED_64(p) \
108 ({ uint32_t hi, lo, *_p = (uint32_t *)(p); \
109 asm volatile ("lwbrx %0, %1, %2" : "=r"(lo) : "b%"(0), "r"(_p) ); \
110 asm volatile ("lwbrx %0, %1, %2" : "=r"(hi) : "b%"(4), "r"(_p) ); \
111 ((uint64_t)hi << 32) | (uint64_t)lo; } )
113 /* ----------------------------------------------------------------------- */
114 #elif (__GNUC__ && (__ppc__ || __PPC__))
115 /* ----------------------------------------------------------------------- */
117 #define GET_REVERSED_64(p) \
118 ({ uint32_t hi, lo, *_p = (uint32_t *)(p); \
119 asm volatile ("lwbrx %0, %1, %2" : "=r"(lo) : "b%"(0), "r"(_p) ); \
120 asm volatile ("lwbrx %0, %1, %2" : "=r"(hi) : "b%"(4), "r"(_p) ); \
121 ((uint64_t)hi << 32) | (uint64_t)lo; } )
123 /* ----------------------------------------------------------------------- */
124 #elif (__GNUC__ && (__ARMEL__ || __ARM__))
125 /* ----------------------------------------------------------------------- */
127 #define bswap32(v) \
128 ({ uint32_t tmp,out; \
129 asm volatile( \
130 "eor %1, %2, %2, ror #16\n" \
131 "bic %1, %1, #0x00ff0000\n" \
132 "mov %0, %2, ror #8\n" \
133 "eor %0, %0, %1, lsr #8" \
134 : "=r" (out), "=&r" (tmp) \
135 : "r" (v)); \
136 out;})
138 /* ----------------------------------------------------------------------- */
139 #elif _MSC_VER
140 /* ----------------------------------------------------------------------- */
142 #include <intrin.h>
144 #if (_M_IA64 || _M_X64) && \
145 (!defined(__INTEL_COMPILER) || __INTEL_COMPILER >= 1000)
146 #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
147 #pragma intrinsic(_umul128)
148 #define PMUL64 MUL64
149 #endif
151 /* MSVC uses add, adc in this version */
152 #define ADD128(rh,rl,ih,il) \
153 { uint64_t _il = (il); \
154 (rl) += (_il); \
155 (rh) += (ih) + ((rl) < (_il)); \
156 }
158 #if _MSC_VER >= 1300
159 #define GET_REVERSED_64(p) _byteswap_uint64(*(uint64_t *)(p))
160 #pragma intrinsic(_byteswap_uint64)
161 #endif
163 #if _MSC_VER >= 1400 && \
164 (!defined(__INTEL_COMPILER) || __INTEL_COMPILER >= 1000)
165 #define MUL32(i1,i2) (__emulu((uint32_t)(i1),(uint32_t)(i2)))
166 #pragma intrinsic(__emulu)
167 #endif
169 /* ----------------------------------------------------------------------- */
170 #endif
171 /* ----------------------------------------------------------------------- */
173 #if __GNUC__
174 #define ALIGN(n) __attribute__ ((aligned(n)))
175 #define NOINLINE __attribute__ ((noinline))
176 #define FASTCALL
177 #elif _MSC_VER
178 #define ALIGN(n) __declspec(align(n))
179 #define NOINLINE __declspec(noinline)
180 #define FASTCALL __fastcall
181 #else
182 #define ALIGN(n)
183 #define NOINLINE
184 #define FASTCALL
185 #endif
187 /* ----------------------------------------------------------------------- */
188 /* Default implementations, if not defined above */
189 /* ----------------------------------------------------------------------- */
191 #ifndef ADD128
192 #define ADD128(rh,rl,ih,il) \
193 { uint64_t _il = (il); \
194 (rl) += (_il); \
195 if ((rl) < (_il)) (rh)++; \
196 (rh) += (ih); \
197 }
198 #endif
200 #ifndef MUL32
201 #define MUL32(i1,i2) ((uint64_t)(uint32_t)(i1)*(uint32_t)(i2))
202 #endif
204 #ifndef PMUL64 /* rh may not be same as i1 or i2 */
205 #define PMUL64(rh,rl,i1,i2) /* Assumes m doesn't overflow */ \
206 { uint64_t _i1 = (i1), _i2 = (i2); \
207 uint64_t m = MUL32(_i1,_i2>>32) + MUL32(_i1>>32,_i2); \
208 rh = MUL32(_i1>>32,_i2>>32); \
209 rl = MUL32(_i1,_i2); \
210 ADD128(rh,rl,(m >> 32),(m << 32)); \
211 }
212 #endif
214 #ifndef MUL64
215 #define MUL64(rh,rl,i1,i2) \
216 { uint64_t _i1 = (i1), _i2 = (i2); \
217 uint64_t m1= MUL32(_i1,_i2>>32); \
218 uint64_t m2= MUL32(_i1>>32,_i2); \
219 rh = MUL32(_i1>>32,_i2>>32); \
220 rl = MUL32(_i1,_i2); \
221 ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
222 ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
223 }
224 #endif
226 #ifndef GET_REVERSED_64
227 #ifndef bswap64
228 #ifndef bswap32
229 #define bswap32(x) \
230 ({ uint32_t bsx = (x); \
231 ((((bsx) & 0xff000000u) >> 24) | (((bsx) & 0x00ff0000u) >> 8) | \
232 (((bsx) & 0x0000ff00u) << 8) | (((bsx) & 0x000000ffu) << 24)); })
233 #endif
234 #define bswap64(x) \
235 ({ union { uint64_t ll; uint32_t l[2]; } w, r; \
236 w.ll = (x); \
237 r.l[0] = bswap32 (w.l[1]); \
238 r.l[1] = bswap32 (w.l[0]); \
239 r.ll; })
240 #endif
241 #define GET_REVERSED_64(p) bswap64(*(uint64_t *)(p))
242 #endif
244 /* ----------------------------------------------------------------------- */
246 #if (VMAC_PREFER_BIG_ENDIAN)
247 # define get64PE get64BE
248 #else
249 # define get64PE get64LE
250 #endif
252 #if (VMAC_ARCH_BIG_ENDIAN)
253 # define get64BE(ptr) (*(uint64_t *)(ptr))
254 # define get64LE(ptr) GET_REVERSED_64(ptr)
255 #else /* assume little-endian */
256 # define get64BE(ptr) GET_REVERSED_64(ptr)
257 # define get64LE(ptr) (*(uint64_t *)(ptr))
258 #endif
261 /* --------------------------------------------------------------------- *
262 * For highest performance the L1 NH and L2 polynomial hashes should be
263 * carefully implemented to take advantage of one's target architechture.
264 * Here these two hash functions are defined multiple time; once for
265 * 64-bit architectures, once for 32-bit SSE2 architectures, and once
266 * for the rest (32-bit) architectures.
267 * For each, nh_16 *must* be defined (works on multiples of 16 bytes).
268 * Optionally, nh_vmac_nhbytes can be defined (for multiples of
269 * VMAC_NHBYTES), and nh_16_2 and nh_vmac_nhbytes_2 (versions that do two
270 * NH computations at once).
271 * --------------------------------------------------------------------- */
273 /* ----------------------------------------------------------------------- */
274 #if VMAC_ARCH_64
275 /* ----------------------------------------------------------------------- */
277 #define nh_16(mp, kp, nw, rh, rl) \
278 { int i; uint64_t th, tl; \
279 rh = rl = 0; \
280 for (i = 0; i < nw; i+= 2) { \
281 MUL64(th,tl,get64PE((mp)+i )+(kp)[i ],get64PE((mp)+i+1)+(kp)[i+1]);\
282 ADD128(rh,rl,th,tl); \
283 } \
284 }
285 #define nh_16_2(mp, kp, nw, rh, rl, rh1, rl1) \
286 { int i; uint64_t th, tl; \
287 rh1 = rl1 = rh = rl = 0; \
288 for (i = 0; i < nw; i+= 2) { \
289 MUL64(th,tl,get64PE((mp)+i )+(kp)[i ],get64PE((mp)+i+1)+(kp)[i+1]);\
290 ADD128(rh,rl,th,tl); \
291 MUL64(th,tl,get64PE((mp)+i )+(kp)[i+2],get64PE((mp)+i+1)+(kp)[i+3]);\
292 ADD128(rh1,rl1,th,tl); \
293 } \
294 }
296 #if (VMAC_NHBYTES >= 64) /* These versions do 64-bytes of message at a time */
297 #define nh_vmac_nhbytes(mp, kp, nw, rh, rl) \
298 { int i; uint64_t th, tl; \
299 rh = rl = 0; \
300 for (i = 0; i < nw; i+= 8) { \
301 MUL64(th,tl,get64PE((mp)+i )+(kp)[i ],get64PE((mp)+i+1)+(kp)[i+1]);\
302 ADD128(rh,rl,th,tl); \
303 MUL64(th,tl,get64PE((mp)+i+2)+(kp)[i+2],get64PE((mp)+i+3)+(kp)[i+3]);\
304 ADD128(rh,rl,th,tl); \
305 MUL64(th,tl,get64PE((mp)+i+4)+(kp)[i+4],get64PE((mp)+i+5)+(kp)[i+5]);\
306 ADD128(rh,rl,th,tl); \
307 MUL64(th,tl,get64PE((mp)+i+6)+(kp)[i+6],get64PE((mp)+i+7)+(kp)[i+7]);\
308 ADD128(rh,rl,th,tl); \
309 } \
310 }
311 #define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh1, rl1) \
312 { int i; uint64_t th, tl; \
313 rh1 = rl1 = rh = rl = 0; \
314 for (i = 0; i < nw; i+= 8) { \
315 MUL64(th,tl,get64PE((mp)+i )+(kp)[i ],get64PE((mp)+i+1)+(kp)[i+1]);\
316 ADD128(rh,rl,th,tl); \
317 MUL64(th,tl,get64PE((mp)+i )+(kp)[i+2],get64PE((mp)+i+1)+(kp)[i+3]);\
318 ADD128(rh1,rl1,th,tl); \
319 MUL64(th,tl,get64PE((mp)+i+2)+(kp)[i+2],get64PE((mp)+i+3)+(kp)[i+3]);\
320 ADD128(rh,rl,th,tl); \
321 MUL64(th,tl,get64PE((mp)+i+2)+(kp)[i+4],get64PE((mp)+i+3)+(kp)[i+5]);\
322 ADD128(rh1,rl1,th,tl); \
323 MUL64(th,tl,get64PE((mp)+i+4)+(kp)[i+4],get64PE((mp)+i+5)+(kp)[i+5]);\
324 ADD128(rh,rl,th,tl); \
325 MUL64(th,tl,get64PE((mp)+i+4)+(kp)[i+6],get64PE((mp)+i+5)+(kp)[i+7]);\
326 ADD128(rh1,rl1,th,tl); \
327 MUL64(th,tl,get64PE((mp)+i+6)+(kp)[i+6],get64PE((mp)+i+7)+(kp)[i+7]);\
328 ADD128(rh,rl,th,tl); \
329 MUL64(th,tl,get64PE((mp)+i+6)+(kp)[i+8],get64PE((mp)+i+7)+(kp)[i+9]);\
330 ADD128(rh1,rl1,th,tl); \
331 } \
332 }
333 #endif
335 #define poly_step(ah, al, kh, kl, mh, ml) \
336 { uint64_t t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
337 /* compute ab*cd, put bd into result registers */ \
338 PMUL64(t3h,t3l,al,kh); \
339 PMUL64(t2h,t2l,ah,kl); \
340 PMUL64(t1h,t1l,ah,2*kh); \
341 PMUL64(ah,al,al,kl); \
342 /* add 2 * ac to result */ \
343 ADD128(ah,al,t1h,t1l); \
344 /* add together ad + bc */ \
345 ADD128(t2h,t2l,t3h,t3l); \
346 /* now (ah,al), (t2l,2*t2h) need summing */ \
347 /* first add the high registers, carrying into t2h */ \
348 ADD128(t2h,ah,z,t2l); \
349 /* double t2h and add top bit of ah */ \
350 t2h = 2 * t2h + (ah >> 63); \
351 ah &= m63; \
352 /* now add the low registers */ \
353 ADD128(ah,al,mh,ml); \
354 ADD128(ah,al,z,t2h); \
355 }
357 /* ----------------------------------------------------------------------- */
358 #elif VMAC_USE_SSE2
359 /* ----------------------------------------------------------------------- */
361 // macros from Crypto++ for sharing inline assembly code between MSVC and GNU C
362 #if defined(__GNUC__)
363 // define these in two steps to allow arguments to be expanded
364 #define GNU_AS2(x, y) #x ", " #y ";"
365 #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
366 #define GNU_ASL(x) "\n" #x ":"
367 #define GNU_ASJ(x, y, z) #x " " #y #z ";"
368 #define AS2(x, y) GNU_AS2(x, y)
369 #define AS3(x, y, z) GNU_AS3(x, y, z)
370 #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
371 #define ASL(x) GNU_ASL(x)
372 #define ASJ(x, y, z) GNU_ASJ(x, y, z)
373 #else
374 #define AS2(x, y) __asm {x, y}
375 #define AS3(x, y, z) __asm {x, y, z}
376 #define ASS(x, y, a, b, c, d) __asm {x, y, _MM_SHUFFLE(a, b, c, d)}
377 #define ASL(x) __asm {label##x:}
378 #define ASJ(x, y, z) __asm {x label##y}
379 #endif
381 static void NOINLINE nh_16_func(const uint64_t *mp, const uint64_t *kp, size_t nw, uint64_t *rh, uint64_t *rl)
382 {
383 // This assembly version, using MMX registers, is just as fast as the
384 // intrinsics version (which uses XMM registers) on the Intel Core 2,
385 // but is much faster on the Pentium 4. In order to schedule multiplies
386 // as early as possible, the loop interleaves operations for the current
387 // block and the next block. To mask out high 32-bits, we use "movd"
388 // to move the lower 32-bits to the stack and then back. Surprisingly,
389 // this is faster than any other method.
390 #ifdef __GNUC__
391 __asm__ __volatile__
392 (
393 ".intel_syntax noprefix;"
394 #else
395 AS2( mov esi, mp)
396 AS2( mov edi, kp)
397 AS2( mov ecx, nw)
398 AS2( mov eax, rl)
399 AS2( mov edx, rh)
400 #endif
401 AS2( sub esp, 12)
402 AS2( movq mm6, [esi])
403 AS2( paddq mm6, [edi])
404 AS2( movq mm5, [esi+8])
405 AS2( paddq mm5, [edi+8])
406 AS2( add esi, 16)
407 AS2( add edi, 16)
408 AS2( movq mm4, mm6)
409 ASS( pshufw mm2, mm6, 1, 0, 3, 2)
410 AS2( pmuludq mm6, mm5)
411 ASS( pshufw mm3, mm5, 1, 0, 3, 2)
412 AS2( pmuludq mm5, mm2)
413 AS2( pmuludq mm2, mm3)
414 AS2( pmuludq mm3, mm4)
415 AS2( pxor mm7, mm7)
416 AS2( movd [esp], mm6)
417 AS2( psrlq mm6, 32)
418 AS2( movd [esp+4], mm5)
419 AS2( psrlq mm5, 32)
420 AS2( sub ecx, 2)
421 ASJ( jz, 1, f)
422 ASL(0)
423 AS2( movq mm0, [esi])
424 AS2( paddq mm0, [edi])
425 AS2( movq mm1, [esi+8])
426 AS2( paddq mm1, [edi+8])
427 AS2( add esi, 16)
428 AS2( add edi, 16)
429 AS2( movq mm4, mm0)
430 AS2( paddq mm5, mm2)
431 ASS( pshufw mm2, mm0, 1, 0, 3, 2)
432 AS2( pmuludq mm0, mm1)
433 AS2( movd [esp+8], mm3)
434 AS2( psrlq mm3, 32)
435 AS2( paddq mm5, mm3)
436 ASS( pshufw mm3, mm1, 1, 0, 3, 2)
437 AS2( pmuludq mm1, mm2)
438 AS2( pmuludq mm2, mm3)
439 AS2( pmuludq mm3, mm4)
440 AS2( movd mm4, [esp])
441 AS2( paddq mm7, mm4)
442 AS2( movd mm4, [esp+4])
443 AS2( paddq mm6, mm4)
444 AS2( movd mm4, [esp+8])
445 AS2( paddq mm6, mm4)
446 AS2( movd [esp], mm0)
447 AS2( psrlq mm0, 32)
448 AS2( paddq mm6, mm0)
449 AS2( movd [esp+4], mm1)
450 AS2( psrlq mm1, 32)
451 AS2( paddq mm5, mm1)
452 AS2( sub ecx, 2)
453 ASJ( jnz, 0, b)
454 ASL(1)
455 AS2( paddq mm5, mm2)
456 AS2( movd [esp+8], mm3)
457 AS2( psrlq mm3, 32)
458 AS2( paddq mm5, mm3)
459 AS2( movd mm4, [esp])
460 AS2( paddq mm7, mm4)
461 AS2( movd mm4, [esp+4])
462 AS2( paddq mm6, mm4)
463 AS2( movd mm4, [esp+8])
464 AS2( paddq mm6, mm4)
466 ASS( pshufw mm0, mm7, 3, 2, 1, 0)
467 AS2( psrlq mm7, 32)
468 AS2( paddq mm6, mm7)
469 AS2( punpckldq mm0, mm6)
470 AS2( psrlq mm6, 32)
471 AS2( paddq mm5, mm6)
472 AS2( movq [eax], mm0)
473 AS2( movq [edx], mm5)
474 AS2( add esp, 12)
475 #ifdef __GNUC__
476 ".att_syntax prefix;"
477 :
478 : "S" (mp), "D" (kp), "c" (nw), "a" (rl), "d" (rh)
479 : "memory", "cc"
480 );
481 #endif
482 }
483 #define nh_16(mp, kp, nw, rh, rl) nh_16_func(mp, kp, nw, &(rh), &(rl));
485 static void poly_step_func(uint64_t *ahi, uint64_t *alo, const uint64_t *kh,
486 const uint64_t *kl, const uint64_t *mh, const uint64_t *ml)
487 {
488 // This code tries to schedule the multiplies as early as possible to overcome
489 // the long latencies on the Pentium 4. It also minimizes "movq" instructions
490 // which are very expensive on the P4.
492 #define a0 [eax+0]
493 #define a1 [eax+4]
494 #define a2 [ebx+0]
495 #define a3 [ebx+4]
496 #define k0 [ecx+0]
497 #define k1 [ecx+4]
498 #define k2 [edx+0]
499 #define k3 [edx+4]
501 #ifdef __GNUC__
502 uint32_t temp;
503 __asm__ __volatile__
504 (
505 "mov %%ebx, %0;"
506 "mov %1, %%ebx;"
507 ".intel_syntax noprefix;"
508 #else
509 AS2( mov ebx, ahi)
510 AS2( mov edx, kh)
511 AS2( mov eax, alo)
512 AS2( mov ecx, kl)
513 AS2( mov esi, mh)
514 AS2( mov edi, ml)
515 #endif
517 AS2( movd mm0, a3)
518 AS2( movq mm4, mm0)
519 AS2( pmuludq mm0, k3) // a3*k3
520 AS2( movd mm1, a0)
521 AS2( pmuludq mm1, k2) // a0*k2
522 AS2( movd mm2, a1)
523 AS2( movd mm6, k1)
524 AS2( pmuludq mm2, mm6) // a1*k1
525 AS2( movd mm3, a2)
526 AS2( movq mm5, mm3)
527 AS2( movd mm7, k0)
528 AS2( pmuludq mm3, mm7) // a2*k0
529 AS2( pmuludq mm4, mm7) // a3*k0
530 AS2( pmuludq mm5, mm6) // a2*k1
531 AS2( psllq mm0, 1)
532 AS2( paddq mm0, [esi])
533 AS2( paddq mm0, mm1)
534 AS2( movd mm1, a1)
535 AS2( paddq mm4, mm5)
536 AS2( movq mm5, mm1)
537 AS2( pmuludq mm1, k2) // a1*k2
538 AS2( paddq mm0, mm2)
539 AS2( movd mm2, a0)
540 AS2( paddq mm0, mm3)
541 AS2( movq mm3, mm2)
542 AS2( pmuludq mm2, k3) // a0*k3
543 AS2( pmuludq mm3, mm7) // a0*k0
544 AS2( movd esi, mm0)
545 AS2( psrlq mm0, 32)
546 AS2( pmuludq mm7, mm5) // a1*k0
547 AS2( pmuludq mm5, k3) // a1*k3
548 AS2( paddq mm0, mm1)
549 AS2( movd mm1, a2)
550 AS2( pmuludq mm1, k2) // a2*k2
551 AS2( paddq mm0, mm2)
552 AS2( paddq mm0, mm4)
553 AS2( movq mm4, mm0)
554 AS2( movd mm2, a3)
555 AS2( pmuludq mm2, mm6) // a3*k1
556 AS2( pmuludq mm6, a0) // a0*k1
557 AS2( psrlq mm0, 31)
558 AS2( paddq mm0, mm3)
559 AS2( movd mm3, [edi])
560 AS2( paddq mm0, mm3)
561 AS2( movd mm3, a2)
562 AS2( pmuludq mm3, k3) // a2*k3
563 AS2( paddq mm5, mm1)
564 AS2( movd mm1, a3)
565 AS2( pmuludq mm1, k2) // a3*k2
566 AS2( paddq mm5, mm2)
567 AS2( movd mm2, [edi+4])
568 AS2( psllq mm5, 1)
569 AS2( paddq mm0, mm5)
570 AS2( movq mm5, mm0)
571 AS2( psllq mm4, 33)
572 AS2( psrlq mm0, 32)
573 AS2( paddq mm6, mm7)
574 AS2( movd mm7, esi)
575 AS2( paddq mm0, mm6)
576 AS2( paddq mm0, mm2)
577 AS2( paddq mm3, mm1)
578 AS2( psllq mm3, 1)
579 AS2( paddq mm0, mm3)
580 AS2( psrlq mm4, 1)
581 AS2( punpckldq mm5, mm0)
582 AS2( psrlq mm0, 32)
583 AS2( por mm4, mm7)
584 AS2( paddq mm0, mm4)
585 AS2( movq a0, mm5)
586 AS2( movq a2, mm0)
587 #ifdef __GNUC__
588 ".att_syntax prefix;"
589 "mov %0, %%ebx;"
590 : "=m" (temp)
591 : "m" (ahi), "D" (ml), "d" (kh), "a" (alo), "S" (mh), "c" (kl)
592 : "memory", "cc"
593 );
594 #endif
597 #undef a0
598 #undef a1
599 #undef a2
600 #undef a3
601 #undef k0
602 #undef k1
603 #undef k2
604 #undef k3
605 }
607 #define poly_step(ah, al, kh, kl, mh, ml) \
608 poly_step_func(&(ah), &(al), &(kh), &(kl), &(mh), &(ml))
610 /* ----------------------------------------------------------------------- */
611 #else /* not VMAC_ARCH_64 and not SSE2 */
612 /* ----------------------------------------------------------------------- */
614 #ifndef nh_16
615 #define nh_16(mp, kp, nw, rh, rl) \
616 { uint64_t t1,t2,m1,m2,t; \
617 int i; \
618 rh = rl = t = 0; \
619 for (i = 0; i < nw; i+=2) { \
620 t1 = get64PE(mp+i) + kp[i]; \
621 t2 = get64PE(mp+i+1) + kp[i+1]; \
622 m2 = MUL32(t1 >> 32, t2); \
623 m1 = MUL32(t1, t2 >> 32); \
624 ADD128(rh,rl,MUL32(t1 >> 32,t2 >> 32),MUL32(t1,t2)); \
625 rh += (uint64_t)(uint32_t)(m1 >> 32) + (uint32_t)(m2 >> 32); \
626 t += (uint64_t)(uint32_t)m1 + (uint32_t)m2; \
627 } \
628 ADD128(rh,rl,(t >> 32),(t << 32)); \
629 }
630 #endif
632 static void poly_step_func(uint64_t *ahi, uint64_t *alo, const uint64_t *kh,
633 const uint64_t *kl, const uint64_t *mh, const uint64_t *ml)
634 {
636 #if VMAC_ARCH_BIG_ENDIAN
637 #define INDEX_HIGH 0
638 #define INDEX_LOW 1
639 #else
640 #define INDEX_HIGH 1
641 #define INDEX_LOW 0
642 #endif
644 #define a0 *(((uint32_t*)alo)+INDEX_LOW)
645 #define a1 *(((uint32_t*)alo)+INDEX_HIGH)
646 #define a2 *(((uint32_t*)ahi)+INDEX_LOW)
647 #define a3 *(((uint32_t*)ahi)+INDEX_HIGH)
648 #define k0 *(((uint32_t*)kl)+INDEX_LOW)
649 #define k1 *(((uint32_t*)kl)+INDEX_HIGH)
650 #define k2 *(((uint32_t*)kh)+INDEX_LOW)
651 #define k3 *(((uint32_t*)kh)+INDEX_HIGH)
653 uint64_t p, q, t;
654 uint32_t t2;
656 p = MUL32(a3, k3);
657 p += p;
658 p += *(uint64_t *)mh;
659 p += MUL32(a0, k2);
660 p += MUL32(a1, k1);
661 p += MUL32(a2, k0);
662 t = (uint32_t)(p);
663 p >>= 32;
664 p += MUL32(a0, k3);
665 p += MUL32(a1, k2);
666 p += MUL32(a2, k1);
667 p += MUL32(a3, k0);
668 t |= ((uint64_t)((uint32_t)p & 0x7fffffff)) << 32;
669 p >>= 31;
670 p += (uint64_t)(((uint32_t*)ml)[INDEX_LOW]);
671 p += MUL32(a0, k0);
672 q = MUL32(a1, k3);
673 q += MUL32(a2, k2);
674 q += MUL32(a3, k1);
675 q += q;
676 p += q;
677 t2 = (uint32_t)(p);
678 p >>= 32;
679 p += (uint64_t)(((uint32_t*)ml)[INDEX_HIGH]);
680 p += MUL32(a0, k1);
681 p += MUL32(a1, k0);
682 q = MUL32(a2, k3);
683 q += MUL32(a3, k2);
684 q += q;
685 p += q;
686 *(uint64_t *)(alo) = (p << 32) | t2;
687 p >>= 32;
688 *(uint64_t *)(ahi) = p + t;
690 #undef a0
691 #undef a1
692 #undef a2
693 #undef a3
694 #undef k0
695 #undef k1
696 #undef k2
697 #undef k3
698 }
700 #define poly_step(ah, al, kh, kl, mh, ml) \
701 poly_step_func(&(ah), &(al), &(kh), &(kl), &(mh), &(ml))
703 /* ----------------------------------------------------------------------- */
704 #endif /* end of specialized NH and poly definitions */
705 /* ----------------------------------------------------------------------- */
707 /* At least nh_16 is defined. Defined others as needed here */
708 #ifndef nh_16_2
709 #define nh_16_2(mp, kp, nw, rh, rl, rh2, rl2) \
710 nh_16(mp, kp, nw, rh, rl); \
711 nh_16(mp, ((kp)+2), nw, rh2, rl2);
712 #endif
713 #ifndef nh_vmac_nhbytes
714 #define nh_vmac_nhbytes(mp, kp, nw, rh, rl) \
715 nh_16(mp, kp, nw, rh, rl)
716 #endif
717 #ifndef nh_vmac_nhbytes_2
718 #define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh2, rl2) \
719 nh_vmac_nhbytes(mp, kp, nw, rh, rl); \
720 nh_vmac_nhbytes(mp, ((kp)+2), nw, rh2, rl2);
721 #endif
723 /* ----------------------------------------------------------------------- */
725 void vhash_abort(vmac_ctx_t *ctx)
726 {
727 ctx->polytmp[0] = ctx->polykey[0] ;
728 ctx->polytmp[1] = ctx->polykey[1] ;
729 #if (VMAC_TAG_LEN == 128)
730 ctx->polytmp[2] = ctx->polykey[2] ;
731 ctx->polytmp[3] = ctx->polykey[3] ;
732 #endif
733 ctx->first_block_processed = 0;
734 }
736 /* ----------------------------------------------------------------------- */
737 static uint64_t l3hash(uint64_t p1, uint64_t p2,
738 uint64_t k1, uint64_t k2, uint64_t len)
739 {
740 uint64_t rh, rl, t, z=0;
742 /* fully reduce (p1,p2)+(len,0) mod p127 */
743 t = p1 >> 63;
744 p1 &= m63;
745 ADD128(p1, p2, len, t);
746 /* At this point, (p1,p2) is at most 2^127+(len<<64) */
747 t = (p1 > m63) + ((p1 == m63) && (p2 == m64));
748 ADD128(p1, p2, z, t);
749 p1 &= m63;
751 /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
752 t = p1 + (p2 >> 32);
753 t += (t >> 32);
754 t += (uint32_t)t > 0xfffffffeu;
755 p1 += (t >> 32);
756 p2 += (p1 << 32);
758 /* compute (p1+k1)%p64 and (p2+k2)%p64 */
759 p1 += k1;
760 p1 += (0 - (p1 < k1)) & 257;
761 p2 += k2;
762 p2 += (0 - (p2 < k2)) & 257;
764 /* compute (p1+k1)*(p2+k2)%p64 */
765 MUL64(rh, rl, p1, p2);
766 t = rh >> 56;
767 ADD128(t, rl, z, rh);
768 rh <<= 8;
769 ADD128(t, rl, z, rh);
770 t += t << 8;
771 rl += t;
772 rl += (0 - (rl < t)) & 257;
773 rl += (0 - (rl > p64-1)) & 257;
774 return rl;
775 }
777 /* ----------------------------------------------------------------------- */
779 void vhash_update(unsigned char *m,
780 unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */
781 vmac_ctx_t *ctx)
782 {
783 uint64_t rh, rl, *mptr;
784 const uint64_t *kptr = (uint64_t *)ctx->nhkey;
785 int i;
786 uint64_t ch, cl;
787 uint64_t pkh = ctx->polykey[0];
788 uint64_t pkl = ctx->polykey[1];
789 #if (VMAC_TAG_LEN == 128)
790 uint64_t ch2, cl2, rh2, rl2;
791 uint64_t pkh2 = ctx->polykey[2];
792 uint64_t pkl2 = ctx->polykey[3];
793 #endif
795 mptr = (uint64_t *)m;
796 i = mbytes / VMAC_NHBYTES; /* Must be non-zero */
798 ch = ctx->polytmp[0];
799 cl = ctx->polytmp[1];
800 #if (VMAC_TAG_LEN == 128)
801 ch2 = ctx->polytmp[2];
802 cl2 = ctx->polytmp[3];
803 #endif
805 if ( ! ctx->first_block_processed) {
806 ctx->first_block_processed = 1;
807 #if (VMAC_TAG_LEN == 64)
808 nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,rh,rl);
809 #else
810 nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,rh,rl,rh2,rl2);
811 rh2 &= m62;
812 ADD128(ch2,cl2,rh2,rl2);
813 #endif
814 rh &= m62;
815 ADD128(ch,cl,rh,rl);
816 mptr += (VMAC_NHBYTES/sizeof(uint64_t));
817 i--;
818 }
820 while (i--) {
821 #if (VMAC_TAG_LEN == 64)
822 nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,rh,rl);
823 #else
824 nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,rh,rl,rh2,rl2);
825 rh2 &= m62;
826 poly_step(ch2,cl2,pkh2,pkl2,rh2,rl2);
827 #endif
828 rh &= m62;
829 poly_step(ch,cl,pkh,pkl,rh,rl);
830 mptr += (VMAC_NHBYTES/sizeof(uint64_t));
831 }
833 ctx->polytmp[0] = ch;
834 ctx->polytmp[1] = cl;
835 #if (VMAC_TAG_LEN == 128)
836 ctx->polytmp[2] = ch2;
837 ctx->polytmp[3] = cl2;
838 #endif
839 #if VMAC_USE_SSE2
840 _mm_empty(); /* SSE2 version of poly_step uses mmx instructions */
841 #endif
842 }
844 /* ----------------------------------------------------------------------- */
846 uint64_t vhash(unsigned char m[],
847 unsigned int mbytes,
848 uint64_t *tagl,
849 vmac_ctx_t *ctx)
850 {
851 uint64_t rh, rl, *mptr;
852 const uint64_t *kptr = (uint64_t *)ctx->nhkey;
853 int i, remaining;
854 uint64_t ch, cl;
855 uint64_t pkh = ctx->polykey[0];
856 uint64_t pkl = ctx->polykey[1];
857 #if (VMAC_TAG_LEN == 128)
858 uint64_t ch2, cl2, rh2, rl2;
859 uint64_t pkh2 = ctx->polykey[2];
860 uint64_t pkl2 = ctx->polykey[3];
861 #endif
863 mptr = (uint64_t *)m;
864 i = mbytes / VMAC_NHBYTES;
865 remaining = mbytes % VMAC_NHBYTES;
867 if (ctx->first_block_processed)
868 {
869 ch = ctx->polytmp[0];
870 cl = ctx->polytmp[1];
871 #if (VMAC_TAG_LEN == 128)
872 ch2 = ctx->polytmp[2];
873 cl2 = ctx->polytmp[3];
874 #endif
875 }
876 else if (i)
877 {
878 #if (VMAC_TAG_LEN == 64)
879 nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,ch,cl);
880 #else
881 nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,ch,cl,ch2,cl2);
882 ch2 &= m62;
883 ADD128(ch2,cl2,pkh2,pkl2);
884 #endif
885 ch &= m62;
886 ADD128(ch,cl,pkh,pkl);
887 mptr += (VMAC_NHBYTES/sizeof(uint64_t));
888 i--;
889 }
890 else if (remaining)
891 {
892 #if (VMAC_TAG_LEN == 64)
893 nh_16(mptr,kptr,2*((remaining+15)/16),ch,cl);
894 #else
895 nh_16_2(mptr,kptr,2*((remaining+15)/16),ch,cl,ch2,cl2);
896 ch2 &= m62;
897 ADD128(ch2,cl2,pkh2,pkl2);
898 #endif
899 ch &= m62;
900 ADD128(ch,cl,pkh,pkl);
901 mptr += (VMAC_NHBYTES/sizeof(uint64_t));
902 goto do_l3;
903 }
904 else /* Empty String */
905 {
906 ch = pkh; cl = pkl;
907 #if (VMAC_TAG_LEN == 128)
908 ch2 = pkh2; cl2 = pkl2;
909 #endif
910 goto do_l3;
911 }
913 while (i--) {
914 #if (VMAC_TAG_LEN == 64)
915 nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,rh,rl);
916 #else
917 nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,rh,rl,rh2,rl2);
918 rh2 &= m62;
919 poly_step(ch2,cl2,pkh2,pkl2,rh2,rl2);
920 #endif
921 rh &= m62;
922 poly_step(ch,cl,pkh,pkl,rh,rl);
923 mptr += (VMAC_NHBYTES/sizeof(uint64_t));
924 }
925 if (remaining) {
926 #if (VMAC_TAG_LEN == 64)
927 nh_16(mptr,kptr,2*((remaining+15)/16),rh,rl);
928 #else
929 nh_16_2(mptr,kptr,2*((remaining+15)/16),rh,rl,rh2,rl2);
930 rh2 &= m62;
931 poly_step(ch2,cl2,pkh2,pkl2,rh2,rl2);
932 #endif
933 rh &= m62;
934 poly_step(ch,cl,pkh,pkl,rh,rl);
935 }
937 do_l3:
938 #if VMAC_USE_SSE2
939 _mm_empty(); /* SSE2 version of poly_step uses mmx instructions */
940 #endif
941 vhash_abort(ctx);
942 remaining *= 8;
943 #if (VMAC_TAG_LEN == 128)
944 *tagl = l3hash(ch2, cl2, ctx->l3key[2], ctx->l3key[3],remaining);
945 #endif
946 return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1],remaining);
947 }
949 /* ----------------------------------------------------------------------- */
951 uint64_t vmac(unsigned char m[],
952 unsigned int mbytes,
953 unsigned char n[16],
954 uint64_t *tagl,
955 vmac_ctx_t *ctx)
956 {
957 #if (VMAC_TAG_LEN == 64)
958 uint64_t *in_n, *out_p;
959 uint64_t p, h;
960 int i;
962 #if VMAC_CACHE_NONCES
963 in_n = ctx->cached_nonce;
964 out_p = ctx->cached_aes;
965 #else
966 uint64_t tmp[2];
967 in_n = out_p = tmp;
968 #endif
970 i = n[15] & 1;
971 #if VMAC_CACHE_NONCES
972 if ((*(uint64_t *)(n+8) != in_n[1]) ||
973 (*(uint64_t *)(n ) != in_n[0])) {
974 #endif
976 in_n[0] = *(uint64_t *)(n );
977 in_n[1] = *(uint64_t *)(n+8);
978 ((unsigned char *)in_n)[15] &= 0xFE;
979 aes_encryption(in_n, out_p, &ctx->cipher_key);
981 #if VMAC_CACHE_NONCES
982 ((unsigned char *)in_n)[15] |= (unsigned char)(1-i);
983 }
984 #endif
985 p = get64BE(out_p + i);
986 h = vhash(m, mbytes, (uint64_t *)0, ctx);
987 return p + h;
988 #else
989 uint64_t tmp[2];
990 uint64_t th,tl;
991 aes_encryption(n, (unsigned char *)tmp, &ctx->cipher_key);
992 th = vhash(m, mbytes, &tl, ctx);
993 th += get64BE(tmp);
994 *tagl = tl + get64BE(tmp+1);
995 return th;
996 #endif
997 }
999 /* ----------------------------------------------------------------------- */
1001 void vmac_set_key(unsigned char user_key[], vmac_ctx_t *ctx)
1003 uint64_t in[2] = {0}, out[2];
1004 unsigned i;
1005 aes_key_setup(user_key, &ctx->cipher_key);
1007 /* Fill nh key */
1008 ((unsigned char *)in)[0] = 0x80;
1009 for (i = 0; i < sizeof(ctx->nhkey)/8; i+=2) {
1010 aes_encryption((unsigned char *)in, (unsigned char *)out,
1011 &ctx->cipher_key);
1012 ctx->nhkey[i ] = get64BE(out);
1013 ctx->nhkey[i+1] = get64BE(out+1);
1014 ((unsigned char *)in)[15] += 1;
1017 /* Fill poly key */
1018 ((unsigned char *)in)[0] = 0xC0;
1019 in[1] = 0;
1020 for (i = 0; i < sizeof(ctx->polykey)/8; i+=2) {
1021 aes_encryption((unsigned char *)in, (unsigned char *)out,
1022 &ctx->cipher_key);
1023 ctx->polytmp[i ] = ctx->polykey[i ] = get64BE(out) & mpoly;
1024 ctx->polytmp[i+1] = ctx->polykey[i+1] = get64BE(out+1) & mpoly;
1025 ((unsigned char *)in)[15] += 1;
1028 /* Fill ip key */
1029 ((unsigned char *)in)[0] = 0xE0;
1030 in[1] = 0;
1031 for (i = 0; i < sizeof(ctx->l3key)/8; i+=2) {
1032 do {
1033 aes_encryption((unsigned char *)in, (unsigned char *)out,
1034 &ctx->cipher_key);
1035 ctx->l3key[i ] = get64BE(out);
1036 ctx->l3key[i+1] = get64BE(out+1);
1037 ((unsigned char *)in)[15] += 1;
1038 } while (ctx->l3key[i] >= p64 || ctx->l3key[i+1] >= p64);
1041 /* Invalidate nonce/aes cache and reset other elements */
1042 #if (VMAC_TAG_LEN == 64) && (VMAC_CACHE_NONCES)
1043 ctx->cached_nonce[0] = (uint64_t)-1; /* Ensure illegal nonce */
1044 ctx->cached_nonce[1] = (uint64_t)0; /* Ensure illegal nonce */
1045 #endif
1046 ctx->first_block_processed = 0;
1049 /* ----------------------------------------------------------------------- */
1052 #if VMAC_RUN_TESTS
1054 #include <stdlib.h>
1055 #include <stdio.h>
1056 #include <time.h>
1057 #include <string.h>
1059 unsigned prime(void) /* Wake variable speed cpu, get rough speed estimate */
1061 volatile uint64_t i;
1062 volatile uint64_t j=1;
1063 unsigned cnt=0;
1064 volatile clock_t ticks = clock();
1065 do {
1066 for (i = 0; i < 500000; i++) {
1067 uint64_t x = get64PE(&j);
1068 j = x * x + (uint64_t)ticks;
1070 cnt++;
1071 } while (clock() - ticks < (CLOCKS_PER_SEC/2));
1072 return cnt; /* cnt is millions of iterations per second */
1075 int main(void)
1077 ALIGN(16) vmac_ctx_t ctx, ctx_aio, ctx_inc1, ctx_inc2;
1078 uint64_t res, tagl;
1079 void *p;
1080 unsigned char *m;
1081 ALIGN(4) unsigned char key[] = "abcdefghijklmnop";
1082 ALIGN(4) unsigned char nonce[] = "\0\0\0\0\0\0\0\0bcdefghi";
1083 unsigned int vector_lengths[] = {0,3,48,300,3000000};
1084 #if (VMAC_TAG_LEN == 64)
1085 ALIGN(4) char *should_be[] = {"2576BE1C56D8B81B","2D376CF5B1813CE5",
1086 "E8421F61D573D298","4492DF6C5CAC1BBE",
1087 "09BA597DD7601113"};
1088 #else
1089 ALIGN(4) char *should_be[] = {"472766C70F74ED23481D6D7DE4E80DAC",
1090 "4EE815A06A1D71EDD36FC75D51188A42",
1091 "09F2C80C8E1007A0C12FAE19FE4504AE",
1092 "66438817154850C61D8A412164803BCB",
1093 "2B6B02288FFC461B75485DE893C629DC"};
1094 #endif
1095 unsigned speed_lengths[] = {16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
1096 unsigned i, j, *speed_iters;
1097 clock_t ticks;
1098 double cpb;
1099 const unsigned int buf_len = 3 * (1 << 20);
1101 j = prime();
1102 i = sizeof(speed_lengths)/sizeof(speed_lengths[0]);
1103 speed_iters = (unsigned *)malloc(i*sizeof(speed_iters[0]));
1104 speed_iters[i-1] = j * (1 << 12);
1105 while (--i) speed_iters[i-1] = (unsigned)(1.3 * speed_iters[i]);
1107 /* Initialize context and message buffer, all 16-byte aligned */
1108 p = malloc(buf_len + 32);
1109 m = (unsigned char *)(((size_t)p + 16) & ~((size_t)15));
1110 memset(m, 0, buf_len + 16);
1111 vmac_set_key(key, &ctx);
1113 /* Test incremental and all-in-one interfaces for correctness */
1114 vmac_set_key(key, &ctx_aio);
1115 vmac_set_key(key, &ctx_inc1);
1116 vmac_set_key(key, &ctx_inc2);
1119 /*
1120 for (i = 0; i <= 512; i++) {
1121 vhash_update(m,(i/VMAC_NHBYTES)*VMAC_NHBYTES,&ctx_inc1);
1122 tagh = vmac(m+(i/VMAC_NHBYTES)*VMAC_NHBYTES, i%VMAC_NHBYTES,
1123 nonce, &tagl, &ctx);
1124 vhash_update(m,(i/VMAC_NHBYTES)*VMAC_NHBYTES,&ctx_inc1);
1125 for (j = 0; j < vector_lengths[i]; j++)
1126 m[j] = (unsigned char)('a'+j%3);
1129 */
1131 /* Generate vectors */
1132 for (i = 0; i < sizeof(vector_lengths)/sizeof(unsigned int); i++) {
1133 for (j = 0; j < vector_lengths[i]; j++)
1134 m[j] = (unsigned char)('a'+j%3);
1135 res = vmac(m, vector_lengths[i], nonce, &tagl, &ctx);
1136 #if (VMAC_TAG_LEN == 64)
1137 printf("\'abc\' * %7u: %016llX Should be: %s\n",
1138 vector_lengths[i]/3,res,should_be[i]);
1139 #else
1140 printf("\'abc\' * %7u: %016llX%016llX\nShould be : %s\n",
1141 vector_lengths[i]/3,res,tagl,should_be[i]);
1142 #endif
1145 /* Speed test */
1146 for (i = 0; i < sizeof(speed_lengths)/sizeof(unsigned int); i++) {
1147 ticks = clock();
1148 for (j = 0; j < speed_iters[i]; j++) {
1149 #if HASH_ONLY
1150 res = vhash(m, speed_lengths[i], &tagl, &ctx);
1151 #else
1152 res = vmac(m, speed_lengths[i], nonce, &tagl, &ctx);
1153 nonce[7]++;
1154 #endif
1156 ticks = clock() - ticks;
1157 cpb = ((ticks*VMAC_HZ)/
1158 ((double)CLOCKS_PER_SEC*speed_lengths[i]*speed_iters[i]));
1159 printf("%4u bytes, %2.2f cpb\n", speed_lengths[i], cpb);
1161 return 1;
1164 #endif