debuggers.hg

view xen/tools/symbols.c @ 22855:1d1eec7e1fb4

xl: Perform minimal validation of virtual disk file while parsing config file

This patch performs some very basic validation on the virtual disk
file passed through the config file. This validation ensures that we
don't go too far with the initialization like spawn qemu and more
while there could be some potentially fundamental issues.

[ Patch fixed up to work with PHYSTYPE_EMPTY 22808:6ec61438713a -iwj ]

Signed-off-by: Kamala Narasimhan <kamala.narasimhan@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
author Kamala Narasimhan <kamala.narasimhan@gmail.com>
date Tue Jan 25 18:09:49 2011 +0000 (2011-01-25)
parents 75e63b73075a
children
line source
1 /* Generate assembler source containing symbol information
2 *
3 * Copyright 2002 by Kai Germaschewski
4 *
5 * This software may be used and distributed according to the terms
6 * of the GNU General Public License, incorporated herein by reference.
7 *
8 * Usage: nm -n vmlinux | scripts/symbols [--all-symbols] > symbols.S
9 *
10 * ChangeLog:
11 *
12 * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
13 * Changed the compression method from stem compression to "table lookup"
14 * compression
15 *
16 * Table compression uses all the unused char codes on the symbols and
17 * maps these to the most used substrings (tokens). For instance, it might
18 * map char code 0xF7 to represent "write_" and then in every symbol where
19 * "write_" appears it can be replaced by 0xF7, saving 5 bytes.
20 * The used codes themselves are also placed in the table so that the
21 * decompresion can work without "special cases".
22 * Applied to kernel symbols, this usually produces a compression ratio
23 * of about 50%.
24 *
25 */
27 #define _GNU_SOURCE
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdint.h>
33 #include <ctype.h>
35 #define KSYM_NAME_LEN 127
38 struct sym_entry {
39 unsigned long long addr;
40 unsigned int len;
41 unsigned char *sym;
42 };
45 static struct sym_entry *table;
46 static unsigned int table_size, table_cnt;
47 static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
48 static int all_symbols = 0;
49 static char symbol_prefix_char = '\0';
51 int token_profit[0x10000];
53 /* the table that holds the result of the compression */
54 unsigned char best_table[256][2];
55 unsigned char best_table_len[256];
58 static void usage(void)
59 {
60 fprintf(stderr, "Usage: symbols [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
61 exit(1);
62 }
64 /*
65 * This ignores the intensely annoying "mapping symbols" found
66 * in ARM ELF files: $a, $t and $d.
67 */
68 static inline int is_arm_mapping_symbol(const char *str)
69 {
70 return str[0] == '$' && strchr("atd", str[1])
71 && (str[2] == '\0' || str[2] == '.');
72 }
74 static int read_symbol(FILE *in, struct sym_entry *s)
75 {
76 char str[500];
77 char *sym, stype;
78 int rc;
80 rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str);
81 if (rc != 3) {
82 if (rc != EOF) {
83 /* skip line */
84 if (fgets(str, 500, in) == NULL)
85 return -1; /* must check fgets result */
86 }
87 return -1;
88 }
90 sym = str;
91 /* skip prefix char */
92 if (symbol_prefix_char && str[0] == symbol_prefix_char)
93 sym++;
95 /* Ignore most absolute/undefined (?) symbols. */
96 if (strcmp(sym, "_stext") == 0)
97 _stext = s->addr;
98 else if (strcmp(sym, "_etext") == 0)
99 _etext = s->addr;
100 else if (strcmp(sym, "_sinittext") == 0)
101 _sinittext = s->addr;
102 else if (strcmp(sym, "_einittext") == 0)
103 _einittext = s->addr;
104 else if (strcmp(sym, "_sextratext") == 0)
105 _sextratext = s->addr;
106 else if (strcmp(sym, "_eextratext") == 0)
107 _eextratext = s->addr;
108 else if (toupper((uint8_t)stype) == 'A')
109 {
110 /* Keep these useful absolute symbols */
111 if (strcmp(sym, "__gp"))
112 return -1;
114 }
115 else if (toupper((uint8_t)stype) == 'U' ||
116 is_arm_mapping_symbol(sym))
117 return -1;
118 /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */
119 else if (str[0] == '$')
120 return -1;
122 /* include the type field in the symbol name, so that it gets
123 * compressed together */
124 s->len = strlen(str) + 1;
125 s->sym = malloc(s->len + 1);
126 strcpy((char *)s->sym + 1, str);
127 s->sym[0] = stype;
129 return 0;
130 }
132 static int symbol_valid(struct sym_entry *s)
133 {
134 int offset = 1;
136 /* skip prefix char */
137 if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
138 offset++;
140 /* if --all-symbols is not specified, then symbols outside the text
141 * and inittext sections are discarded */
142 if (!all_symbols) {
143 if ((s->addr < _stext || s->addr > _etext)
144 && (s->addr < _sinittext || s->addr > _einittext)
145 && (s->addr < _sextratext || s->addr > _eextratext))
146 return 0;
147 /* Corner case. Discard any symbols with the same value as
148 * _etext _einittext or _eextratext; they can move between pass
149 * 1 and 2 when the symbols data are added. If these symbols
150 * move then they may get dropped in pass 2, which breaks the
151 * symbols rules.
152 */
153 if ((s->addr == _etext && strcmp((char*)s->sym + offset, "_etext")) ||
154 (s->addr == _einittext && strcmp((char*)s->sym + offset, "_einittext")) ||
155 (s->addr == _eextratext && strcmp((char*)s->sym + offset, "_eextratext")))
156 return 0;
157 }
159 /* Exclude symbols which vary between passes. */
160 if (strstr((char *)s->sym + offset, "_compiled."))
161 return 0;
163 return 1;
164 }
166 static void read_map(FILE *in)
167 {
168 while (!feof(in)) {
169 if (table_cnt >= table_size) {
170 table_size += 10000;
171 table = realloc(table, sizeof(*table) * table_size);
172 if (!table) {
173 fprintf(stderr, "out of memory\n");
174 exit (1);
175 }
176 }
177 if (read_symbol(in, &table[table_cnt]) == 0)
178 table_cnt++;
179 }
180 }
182 static void output_label(char *label)
183 {
184 if (symbol_prefix_char)
185 printf(".globl %c%s\n", symbol_prefix_char, label);
186 else
187 printf(".globl %s\n", label);
188 printf("\tALGN\n");
189 if (symbol_prefix_char)
190 printf("%c%s:\n", symbol_prefix_char, label);
191 else
192 printf("%s:\n", label);
193 }
195 /* uncompress a compressed symbol. When this function is called, the best table
196 * might still be compressed itself, so the function needs to be recursive */
197 static int expand_symbol(unsigned char *data, int len, char *result)
198 {
199 int c, rlen, total=0;
201 while (len) {
202 c = *data;
203 /* if the table holds a single char that is the same as the one
204 * we are looking for, then end the search */
205 if (best_table[c][0]==c && best_table_len[c]==1) {
206 *result++ = c;
207 total++;
208 } else {
209 /* if not, recurse and expand */
210 rlen = expand_symbol(best_table[c], best_table_len[c], result);
211 total += rlen;
212 result += rlen;
213 }
214 data++;
215 len--;
216 }
217 *result=0;
219 return total;
220 }
222 static void write_src(void)
223 {
224 unsigned int i, k, off;
225 unsigned int best_idx[256];
226 unsigned int *markers;
227 char buf[KSYM_NAME_LEN+1];
229 printf("#include <xen/config.h>\n");
230 printf("#include <asm/types.h>\n");
231 printf("#if BITS_PER_LONG == 64 && !defined(SYMBOLS_ORIGIN)\n");
232 printf("#define PTR .quad\n");
233 printf("#define ALGN .align 8\n");
234 printf("#else\n");
235 printf("#define PTR .long\n");
236 printf("#define ALGN .align 4\n");
237 printf("#endif\n");
239 printf("\t.section .rodata, \"a\"\n");
241 printf("#ifndef SYMBOLS_ORIGIN\n");
242 printf("#define SYMBOLS_ORIGIN 0\n");
243 output_label("symbols_addresses");
244 printf("#else\n");
245 output_label("symbols_offsets");
246 printf("#endif\n");
247 for (i = 0; i < table_cnt; i++) {
248 printf("\tPTR\t%#llx - SYMBOLS_ORIGIN\n", table[i].addr);
249 }
250 printf("\n");
252 output_label("symbols_num_syms");
253 printf("\t.long\t%d\n", table_cnt);
254 printf("\n");
256 /* table of offset markers, that give the offset in the compressed stream
257 * every 256 symbols */
258 markers = (unsigned int *) malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256));
260 output_label("symbols_names");
261 off = 0;
262 for (i = 0; i < table_cnt; i++) {
263 if ((i & 0xFF) == 0)
264 markers[i >> 8] = off;
266 printf("\t.byte 0x%02x", table[i].len);
267 for (k = 0; k < table[i].len; k++)
268 printf(", 0x%02x", table[i].sym[k]);
269 printf("\n");
271 off += table[i].len + 1;
272 }
273 printf("\n");
275 output_label("symbols_markers");
276 for (i = 0; i < ((table_cnt + 255) >> 8); i++)
277 printf("\t.long\t%d\n", markers[i]);
278 printf("\n");
280 free(markers);
282 output_label("symbols_token_table");
283 off = 0;
284 for (i = 0; i < 256; i++) {
285 best_idx[i] = off;
286 expand_symbol(best_table[i], best_table_len[i], buf);
287 printf("\t.asciz\t\"%s\"\n", buf);
288 off += strlen(buf) + 1;
289 }
290 printf("\n");
292 output_label("symbols_token_index");
293 for (i = 0; i < 256; i++)
294 printf("\t.short\t%d\n", best_idx[i]);
295 printf("\n");
296 }
299 /* table lookup compression functions */
301 /* count all the possible tokens in a symbol */
302 static void learn_symbol(unsigned char *symbol, int len)
303 {
304 int i;
306 for (i = 0; i < len - 1; i++)
307 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
308 }
310 /* decrease the count for all the possible tokens in a symbol */
311 static void forget_symbol(unsigned char *symbol, int len)
312 {
313 int i;
315 for (i = 0; i < len - 1; i++)
316 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
317 }
319 /* remove all the invalid symbols from the table and do the initial token count */
320 static void build_initial_tok_table(void)
321 {
322 unsigned int i, pos;
324 pos = 0;
325 for (i = 0; i < table_cnt; i++) {
326 if ( symbol_valid(&table[i]) ) {
327 if (pos != i)
328 table[pos] = table[i];
329 learn_symbol(table[pos].sym, table[pos].len);
330 pos++;
331 }
332 }
333 table_cnt = pos;
334 }
336 static void *memmem_pvt(void *h, size_t hlen, void *n, size_t nlen)
337 {
338 char *p;
339 for (p = h; (p - (char *)h) <= (long)(hlen - nlen); p++)
340 if (!memcmp(p, n, nlen)) return p;
341 return NULL;
342 }
344 /* replace a given token in all the valid symbols. Use the sampled symbols
345 * to update the counts */
346 static void compress_symbols(unsigned char *str, int idx)
347 {
348 unsigned int i, len, size;
349 unsigned char *p1, *p2;
351 for (i = 0; i < table_cnt; i++) {
353 len = table[i].len;
354 p1 = table[i].sym;
356 /* find the token on the symbol */
357 p2 = memmem_pvt(p1, len, str, 2);
358 if (!p2) continue;
360 /* decrease the counts for this symbol's tokens */
361 forget_symbol(table[i].sym, len);
363 size = len;
365 do {
366 *p2 = idx;
367 p2++;
368 size -= (p2 - p1);
369 memmove(p2, p2 + 1, size);
370 p1 = p2;
371 len--;
373 if (size < 2) break;
375 /* find the token on the symbol */
376 p2 = memmem_pvt(p1, size, str, 2);
378 } while (p2);
380 table[i].len = len;
382 /* increase the counts for this symbol's new tokens */
383 learn_symbol(table[i].sym, len);
384 }
385 }
387 /* search the token with the maximum profit */
388 static int find_best_token(void)
389 {
390 int i, best, bestprofit;
392 bestprofit=-10000;
393 best = 0;
395 for (i = 0; i < 0x10000; i++) {
396 if (token_profit[i] > bestprofit) {
397 best = i;
398 bestprofit = token_profit[i];
399 }
400 }
401 return best;
402 }
404 /* this is the core of the algorithm: calculate the "best" table */
405 static void optimize_result(void)
406 {
407 int i, best;
409 /* using the '\0' symbol last allows compress_symbols to use standard
410 * fast string functions */
411 for (i = 255; i >= 0; i--) {
413 /* if this table slot is empty (it is not used by an actual
414 * original char code */
415 if (!best_table_len[i]) {
417 /* find the token with the breates profit value */
418 best = find_best_token();
420 /* place it in the "best" table */
421 best_table_len[i] = 2;
422 best_table[i][0] = best & 0xFF;
423 best_table[i][1] = (best >> 8) & 0xFF;
425 /* replace this token in all the valid symbols */
426 compress_symbols(best_table[i], i);
427 }
428 }
429 }
431 /* start by placing the symbols that are actually used on the table */
432 static void insert_real_symbols_in_table(void)
433 {
434 unsigned int i, j, c;
436 memset(best_table, 0, sizeof(best_table));
437 memset(best_table_len, 0, sizeof(best_table_len));
439 for (i = 0; i < table_cnt; i++) {
440 for (j = 0; j < table[i].len; j++) {
441 c = table[i].sym[j];
442 best_table[c][0]=c;
443 best_table_len[c]=1;
444 }
445 }
446 }
448 static void optimize_token_table(void)
449 {
450 build_initial_tok_table();
452 insert_real_symbols_in_table();
454 /* When valid symbol is not registered, exit to error */
455 if (!table_cnt) {
456 fprintf(stderr, "No valid symbol.\n");
457 exit(1);
458 }
460 optimize_result();
461 }
464 int main(int argc, char **argv)
465 {
466 if (argc >= 2) {
467 int i;
468 for (i = 1; i < argc; i++) {
469 if(strcmp(argv[i], "--all-symbols") == 0)
470 all_symbols = 1;
471 else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) {
472 char *p = &argv[i][16];
473 /* skip quote */
474 if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
475 p++;
476 symbol_prefix_char = *p;
477 } else
478 usage();
479 }
480 } else if (argc != 1)
481 usage();
483 read_map(stdin);
484 optimize_token_table();
485 write_src();
487 return 0;
488 }