debuggers.hg

view xen/common/lzo.c @ 19684:f210a633571c

Transcendent memory ("tmem") for Xen.

Tmem, when called from a tmem-capable (paravirtualized) guest, makes
use of otherwise unutilized ("fallow") memory to create and manage
pools of pages that can be accessed from the guest either as
"ephemeral" pages or as "persistent" pages. In either case, the pages
are not directly addressible by the guest, only copied to and fro via
the tmem interface. Ephemeral pages are a nice place for a guest to
put recently evicted clean pages that it might need again; these pages
can be reclaimed synchronously by Xen for other guests or other uses.
Persistent pages are a nice place for a guest to put "swap" pages to
avoid sending them to disk. These pages retain data as long as the
guest lives, but count against the guest memory allocation.

Tmem pages may optionally be compressed and, in certain cases, can be
shared between guests. Tmem also handles concurrency nicely and
provides limited QoS settings to combat malicious DoS attempts.
Save/restore and live migration support is not yet provided.

Tmem is primarily targeted for an x86 64-bit hypervisor. On a 32-bit
x86 hypervisor, it has limited functionality and testing due to
limitations of the xen heap. Nearly all of tmem is
architecture-independent; three routines remain to be ported to ia64
and it should work on that architecture too. It is also structured to
be portable to non-Xen environments.

Tmem defaults off (for now) and must be enabled with a "tmem" xen boot
option (and does nothing unless a tmem-capable guest is running). The
"tmem_compress" boot option enables compression which takes about 10x
more CPU but approximately doubles the number of pages that can be
stored.

Tmem can be controlled via several "xm" commands and many interesting
tmem statistics can be obtained. A README and internal specification
will follow, but lots of useful prose about tmem, as well as Linux
patches, can be found at http://oss.oracle.com/projects/tmem .

Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:05:04 2009 +0100 (2009-05-26)
parents
children
line source
1 /*
2 * lzo.c -- LZO1X Compressor from MiniLZO
3 *
4 * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
5 *
6 * The full LZO package can be found at:
7 * http://www.oberhumer.com/opensource/lzo/
8 *
9 * Adapted for Xen (files combined and syntactic/header changes) by:
10 * Dan Magenheimer <dan.magenheimer@oracle.com>
11 *
12 */
14 /*
15 * lzodefs.h -- architecture, OS and compiler specific defines
16 *
17 * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
18 *
19 * The full LZO package can be found at:
20 * http://www.oberhumer.com/opensource/lzo/
21 *
22 * Changed for kernel use by:
23 * Nitin Gupta <nitingupta910@gmail.com>
24 * Richard Purdie <rpurdie@openedhand.com>
25 */
27 #define LZO_VERSION 0x2020
28 #define LZO_VERSION_STRING "2.02"
29 #define LZO_VERSION_DATE "Oct 17 2005"
31 #define M1_MAX_OFFSET 0x0400
32 #define M2_MAX_OFFSET 0x0800
33 #define M3_MAX_OFFSET 0x4000
34 #define M4_MAX_OFFSET 0xbfff
36 #define M1_MIN_LEN 2
37 #define M1_MAX_LEN 2
38 #define M2_MIN_LEN 3
39 #define M2_MAX_LEN 8
40 #define M3_MIN_LEN 3
41 #define M3_MAX_LEN 33
42 #define M4_MIN_LEN 3
43 #define M4_MAX_LEN 9
45 #define M1_MARKER 0
46 #define M2_MARKER 64
47 #define M3_MARKER 32
48 #define M4_MARKER 16
50 #define D_BITS 14
51 #define D_MASK ((1u << D_BITS) - 1)
52 #define D_HIGH ((D_MASK >> 1) + 1)
54 #define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \
55 << (s1)) ^ (p)[0])
56 #define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0])
58 /*
59 * LZO1X Compressor from MiniLZO
60 *
61 * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
62 *
63 * The full LZO package can be found at:
64 * http://www.oberhumer.com/opensource/lzo/
65 *
66 * Changed for kernel use by:
67 * Nitin Gupta <nitingupta910@gmail.com>
68 * Richard Purdie <rpurdie@openedhand.com>
69 */
71 #include <xen/types.h>
72 #include <xen/lzo.h>
73 #define get_unaligned(_p) (*(_p))
74 #define put_unaligned(_val,_p) (*(_p)=_val)
75 #define get_unaligned_le16(_p) (*(u16 *)(_p))
77 static noinline size_t
78 _lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
79 unsigned char *out, size_t *out_len, void *wrkmem)
80 {
81 const unsigned char * const in_end = in + in_len;
82 const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5;
83 const unsigned char ** const dict = wrkmem;
84 const unsigned char *ip = in, *ii = ip;
85 const unsigned char *end, *m, *m_pos;
86 size_t m_off, m_len, dindex;
87 unsigned char *op = out;
89 ip += 4;
91 for (;;) {
92 dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK;
93 m_pos = dict[dindex];
95 if (m_pos < in)
96 goto literal;
98 if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET))
99 goto literal;
101 m_off = ip - m_pos;
102 if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
103 goto try_match;
105 dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f);
106 m_pos = dict[dindex];
108 if (m_pos < in)
109 goto literal;
111 if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET))
112 goto literal;
114 m_off = ip - m_pos;
115 if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
116 goto try_match;
118 goto literal;
120 try_match:
121 if (get_unaligned((const unsigned short *)m_pos)
122 == get_unaligned((const unsigned short *)ip)) {
123 if (likely(m_pos[2] == ip[2]))
124 goto match;
125 }
127 literal:
128 dict[dindex] = ip;
129 ++ip;
130 if (unlikely(ip >= ip_end))
131 break;
132 continue;
134 match:
135 dict[dindex] = ip;
136 if (ip != ii) {
137 size_t t = ip - ii;
139 if (t <= 3) {
140 op[-2] |= t;
141 } else if (t <= 18) {
142 *op++ = (t - 3);
143 } else {
144 size_t tt = t - 18;
146 *op++ = 0;
147 while (tt > 255) {
148 tt -= 255;
149 *op++ = 0;
150 }
151 *op++ = tt;
152 }
153 do {
154 *op++ = *ii++;
155 } while (--t > 0);
156 }
158 ip += 3;
159 if (m_pos[3] != *ip++ || m_pos[4] != *ip++
160 || m_pos[5] != *ip++ || m_pos[6] != *ip++
161 || m_pos[7] != *ip++ || m_pos[8] != *ip++) {
162 --ip;
163 m_len = ip - ii;
165 if (m_off <= M2_MAX_OFFSET) {
166 m_off -= 1;
167 *op++ = (((m_len - 1) << 5)
168 | ((m_off & 7) << 2));
169 *op++ = (m_off >> 3);
170 } else if (m_off <= M3_MAX_OFFSET) {
171 m_off -= 1;
172 *op++ = (M3_MARKER | (m_len - 2));
173 goto m3_m4_offset;
174 } else {
175 m_off -= 0x4000;
177 *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11)
178 | (m_len - 2));
179 goto m3_m4_offset;
180 }
181 } else {
182 end = in_end;
183 m = m_pos + M2_MAX_LEN + 1;
185 while (ip < end && *m == *ip) {
186 m++;
187 ip++;
188 }
189 m_len = ip - ii;
191 if (m_off <= M3_MAX_OFFSET) {
192 m_off -= 1;
193 if (m_len <= 33) {
194 *op++ = (M3_MARKER | (m_len - 2));
195 } else {
196 m_len -= 33;
197 *op++ = M3_MARKER | 0;
198 goto m3_m4_len;
199 }
200 } else {
201 m_off -= 0x4000;
202 if (m_len <= M4_MAX_LEN) {
203 *op++ = (M4_MARKER
204 | ((m_off & 0x4000) >> 11)
205 | (m_len - 2));
206 } else {
207 m_len -= M4_MAX_LEN;
208 *op++ = (M4_MARKER
209 | ((m_off & 0x4000) >> 11));
210 m3_m4_len:
211 while (m_len > 255) {
212 m_len -= 255;
213 *op++ = 0;
214 }
216 *op++ = (m_len);
217 }
218 }
219 m3_m4_offset:
220 *op++ = ((m_off & 63) << 2);
221 *op++ = (m_off >> 6);
222 }
224 ii = ip;
225 if (unlikely(ip >= ip_end))
226 break;
227 }
229 *out_len = op - out;
230 return in_end - ii;
231 }
233 int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out,
234 size_t *out_len, void *wrkmem)
235 {
236 const unsigned char *ii;
237 unsigned char *op = out;
238 size_t t;
240 if (unlikely(in_len <= M2_MAX_LEN + 5)) {
241 t = in_len;
242 } else {
243 t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem);
244 op += *out_len;
245 }
247 if (t > 0) {
248 ii = in + in_len - t;
250 if (op == out && t <= 238) {
251 *op++ = (17 + t);
252 } else if (t <= 3) {
253 op[-2] |= t;
254 } else if (t <= 18) {
255 *op++ = (t - 3);
256 } else {
257 size_t tt = t - 18;
259 *op++ = 0;
260 while (tt > 255) {
261 tt -= 255;
262 *op++ = 0;
263 }
265 *op++ = tt;
266 }
267 do {
268 *op++ = *ii++;
269 } while (--t > 0);
270 }
272 *op++ = M4_MARKER | 1;
273 *op++ = 0;
274 *op++ = 0;
276 *out_len = op - out;
277 return LZO_E_OK;
278 }
280 /*
281 * LZO1X Decompressor from MiniLZO
282 *
283 * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
284 *
285 * The full LZO package can be found at:
286 * http://www.oberhumer.com/opensource/lzo/
287 *
288 * Changed for kernel use by:
289 * Nitin Gupta <nitingupta910@gmail.com>
290 * Richard Purdie <rpurdie@openedhand.com>
291 */
293 #define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
294 #define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x))
295 #define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op)
297 #define COPY4(dst, src) \
298 put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
300 int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
301 unsigned char *out, size_t *out_len)
302 {
303 const unsigned char * const ip_end = in + in_len;
304 unsigned char * const op_end = out + *out_len;
305 const unsigned char *ip = in, *m_pos;
306 unsigned char *op = out;
307 size_t t;
309 *out_len = 0;
311 if (*ip > 17) {
312 t = *ip++ - 17;
313 if (t < 4)
314 goto match_next;
315 if (HAVE_OP(t, op_end, op))
316 goto output_overrun;
317 if (HAVE_IP(t + 1, ip_end, ip))
318 goto input_overrun;
319 do {
320 *op++ = *ip++;
321 } while (--t > 0);
322 goto first_literal_run;
323 }
325 while ((ip < ip_end)) {
326 t = *ip++;
327 if (t >= 16)
328 goto match;
329 if (t == 0) {
330 if (HAVE_IP(1, ip_end, ip))
331 goto input_overrun;
332 while (*ip == 0) {
333 t += 255;
334 ip++;
335 if (HAVE_IP(1, ip_end, ip))
336 goto input_overrun;
337 }
338 t += 15 + *ip++;
339 }
340 if (HAVE_OP(t + 3, op_end, op))
341 goto output_overrun;
342 if (HAVE_IP(t + 4, ip_end, ip))
343 goto input_overrun;
345 COPY4(op, ip);
346 op += 4;
347 ip += 4;
348 if (--t > 0) {
349 if (t >= 4) {
350 do {
351 COPY4(op, ip);
352 op += 4;
353 ip += 4;
354 t -= 4;
355 } while (t >= 4);
356 if (t > 0) {
357 do {
358 *op++ = *ip++;
359 } while (--t > 0);
360 }
361 } else {
362 do {
363 *op++ = *ip++;
364 } while (--t > 0);
365 }
366 }
368 first_literal_run:
369 t = *ip++;
370 if (t >= 16)
371 goto match;
372 m_pos = op - (1 + M2_MAX_OFFSET);
373 m_pos -= t >> 2;
374 m_pos -= *ip++ << 2;
376 if (HAVE_LB(m_pos, out, op))
377 goto lookbehind_overrun;
379 if (HAVE_OP(3, op_end, op))
380 goto output_overrun;
381 *op++ = *m_pos++;
382 *op++ = *m_pos++;
383 *op++ = *m_pos;
385 goto match_done;
387 do {
388 match:
389 if (t >= 64) {
390 m_pos = op - 1;
391 m_pos -= (t >> 2) & 7;
392 m_pos -= *ip++ << 3;
393 t = (t >> 5) - 1;
394 if (HAVE_LB(m_pos, out, op))
395 goto lookbehind_overrun;
396 if (HAVE_OP(t + 3 - 1, op_end, op))
397 goto output_overrun;
398 goto copy_match;
399 } else if (t >= 32) {
400 t &= 31;
401 if (t == 0) {
402 if (HAVE_IP(1, ip_end, ip))
403 goto input_overrun;
404 while (*ip == 0) {
405 t += 255;
406 ip++;
407 if (HAVE_IP(1, ip_end, ip))
408 goto input_overrun;
409 }
410 t += 31 + *ip++;
411 }
412 m_pos = op - 1;
413 m_pos -= get_unaligned_le16(ip) >> 2;
414 ip += 2;
415 } else if (t >= 16) {
416 m_pos = op;
417 m_pos -= (t & 8) << 11;
419 t &= 7;
420 if (t == 0) {
421 if (HAVE_IP(1, ip_end, ip))
422 goto input_overrun;
423 while (*ip == 0) {
424 t += 255;
425 ip++;
426 if (HAVE_IP(1, ip_end, ip))
427 goto input_overrun;
428 }
429 t += 7 + *ip++;
430 }
431 m_pos -= get_unaligned_le16(ip) >> 2;
432 ip += 2;
433 if (m_pos == op)
434 goto eof_found;
435 m_pos -= 0x4000;
436 } else {
437 m_pos = op - 1;
438 m_pos -= t >> 2;
439 m_pos -= *ip++ << 2;
441 if (HAVE_LB(m_pos, out, op))
442 goto lookbehind_overrun;
443 if (HAVE_OP(2, op_end, op))
444 goto output_overrun;
446 *op++ = *m_pos++;
447 *op++ = *m_pos;
448 goto match_done;
449 }
451 if (HAVE_LB(m_pos, out, op))
452 goto lookbehind_overrun;
453 if (HAVE_OP(t + 3 - 1, op_end, op))
454 goto output_overrun;
456 if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
457 COPY4(op, m_pos);
458 op += 4;
459 m_pos += 4;
460 t -= 4 - (3 - 1);
461 do {
462 COPY4(op, m_pos);
463 op += 4;
464 m_pos += 4;
465 t -= 4;
466 } while (t >= 4);
467 if (t > 0)
468 do {
469 *op++ = *m_pos++;
470 } while (--t > 0);
471 } else {
472 copy_match:
473 *op++ = *m_pos++;
474 *op++ = *m_pos++;
475 do {
476 *op++ = *m_pos++;
477 } while (--t > 0);
478 }
479 match_done:
480 t = ip[-2] & 3;
481 if (t == 0)
482 break;
483 match_next:
484 if (HAVE_OP(t, op_end, op))
485 goto output_overrun;
486 if (HAVE_IP(t + 1, ip_end, ip))
487 goto input_overrun;
489 *op++ = *ip++;
490 if (t > 1) {
491 *op++ = *ip++;
492 if (t > 2)
493 *op++ = *ip++;
494 }
496 t = *ip++;
497 } while (ip < ip_end);
498 }
500 *out_len = op - out;
501 return LZO_E_EOF_NOT_FOUND;
503 eof_found:
504 *out_len = op - out;
505 return (ip == ip_end ? LZO_E_OK :
506 (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
507 input_overrun:
508 *out_len = op - out;
509 return LZO_E_INPUT_OVERRUN;
511 output_overrun:
512 *out_len = op - out;
513 return LZO_E_OUTPUT_OVERRUN;
515 lookbehind_overrun:
516 *out_len = op - out;
517 return LZO_E_LOOKBEHIND_OVERRUN;
518 }