/root/src/xen/xen/arch/x86/cpu/intel_cacheinfo.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Routines to indentify caches on Intel CPU. |
3 | | * |
4 | | * Changes: |
5 | | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) |
6 | | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. |
7 | | * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. |
8 | | */ |
9 | | |
10 | | #include <xen/init.h> |
11 | | #include <xen/lib.h> |
12 | | #include <xen/errno.h> |
13 | | #include <asm/processor.h> |
14 | | |
15 | 0 | #define LVL_1_INST 1 |
16 | 0 | #define LVL_1_DATA 2 |
17 | 0 | #define LVL_2 3 |
18 | 0 | #define LVL_3 4 |
19 | 0 | #define LVL_TRACE 5 |
20 | | |
21 | | struct _cache_table |
22 | | { |
23 | | unsigned char descriptor; |
24 | | char cache_type; |
25 | | short size; |
26 | | }; |
27 | | |
28 | | /* all the cache descriptor types we care about (no TLB or trace cache entries) */ |
29 | | static const struct _cache_table cache_table[] = |
30 | | { |
31 | | { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ |
32 | | { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ |
33 | | { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ |
34 | | { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ |
35 | | { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
36 | | { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
37 | | { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
38 | | { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
39 | | { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ |
40 | | { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ |
41 | | { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
42 | | { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ |
43 | | { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ |
44 | | { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
45 | | { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ |
46 | | { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
47 | | { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ |
48 | | { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ |
49 | | { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ |
50 | | { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ |
51 | | { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ |
52 | | { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ |
53 | | { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ |
54 | | { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ |
55 | | { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ |
56 | | { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ |
57 | | { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ |
58 | | { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ |
59 | | { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
60 | | { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
61 | | { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
62 | | { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
63 | | { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ |
64 | | { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ |
65 | | { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ |
66 | | { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ |
67 | | { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ |
68 | | { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
69 | | { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
70 | | { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
71 | | { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
72 | | { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ |
73 | | { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ |
74 | | { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ |
75 | | { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ |
76 | | { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ |
77 | | { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ |
78 | | { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ |
79 | | { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ |
80 | | { 0x00, 0, 0} |
81 | | }; |
82 | | |
83 | | unsigned short num_cache_leaves; |
84 | | |
85 | | int cpuid4_cache_lookup(int index, struct cpuid4_info *this_leaf) |
86 | 48 | { |
87 | 48 | union _cpuid4_leaf_eax eax; |
88 | 48 | union _cpuid4_leaf_ebx ebx; |
89 | 48 | union _cpuid4_leaf_ecx ecx; |
90 | 48 | unsigned edx; |
91 | 48 | |
92 | 48 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
93 | 48 | if (eax.split.type == CACHE_TYPE_NULL) |
94 | 0 | return -EIO; /* better error ? */ |
95 | 48 | |
96 | 48 | this_leaf->eax = eax; |
97 | 48 | this_leaf->ebx = ebx; |
98 | 48 | this_leaf->ecx = ecx; |
99 | 48 | this_leaf->size = (ecx.split.number_of_sets + 1) * |
100 | 48 | (ebx.split.coherency_line_size + 1) * |
101 | 48 | (ebx.split.physical_line_partition + 1) * |
102 | 48 | (ebx.split.ways_of_associativity + 1); |
103 | 48 | return 0; |
104 | 48 | } |
105 | | |
106 | | static int find_num_cache_leaves(void) |
107 | 1 | { |
108 | 1 | unsigned int eax, ebx, ecx, edx; |
109 | 1 | union _cpuid4_leaf_eax cache_eax; |
110 | 1 | int i = -1; |
111 | 1 | |
112 | 5 | do { |
113 | 5 | ++i; |
114 | 5 | /* Do cpuid(4) loop to find out num_cache_leaves */ |
115 | 5 | cpuid_count(4, i, &eax, &ebx, &ecx, &edx); |
116 | 5 | cache_eax.full = eax; |
117 | 5 | } while (cache_eax.split.type != CACHE_TYPE_NULL); |
118 | 1 | return i; |
119 | 1 | } |
120 | | |
121 | | unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) |
122 | 12 | { |
123 | 12 | unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ |
124 | 12 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ |
125 | 12 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ |
126 | 12 | unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; |
127 | 12 | |
128 | 12 | if (c->cpuid_level > 3) { |
129 | 12 | static int is_initialized; |
130 | 12 | |
131 | 12 | if (is_initialized == 0) { |
132 | 1 | /* Init num_cache_leaves from boot CPU */ |
133 | 1 | num_cache_leaves = find_num_cache_leaves(); |
134 | 1 | is_initialized++; |
135 | 1 | } |
136 | 12 | |
137 | 12 | /* |
138 | 12 | * Whenever possible use cpuid(4), deterministic cache |
139 | 12 | * parameters cpuid leaf to find the cache details |
140 | 12 | */ |
141 | 60 | for (i = 0; i < num_cache_leaves; i++) { |
142 | 48 | struct cpuid4_info this_leaf; |
143 | 48 | |
144 | 48 | int retval; |
145 | 48 | |
146 | 48 | retval = cpuid4_cache_lookup(i, &this_leaf); |
147 | 48 | if (retval >= 0) { |
148 | 48 | switch(this_leaf.eax.split.level) { |
149 | 24 | case 1: |
150 | 24 | if (this_leaf.eax.split.type == |
151 | 24 | CACHE_TYPE_DATA) |
152 | 12 | new_l1d = this_leaf.size/1024; |
153 | 12 | else if (this_leaf.eax.split.type == |
154 | 12 | CACHE_TYPE_INST) |
155 | 12 | new_l1i = this_leaf.size/1024; |
156 | 24 | break; |
157 | 12 | case 2: |
158 | 12 | new_l2 = this_leaf.size/1024; |
159 | 12 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; |
160 | 12 | index_msb = get_count_order(num_threads_sharing); |
161 | 12 | l2_id = c->apicid >> index_msb; |
162 | 12 | break; |
163 | 12 | case 3: |
164 | 12 | new_l3 = this_leaf.size/1024; |
165 | 12 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; |
166 | 12 | index_msb = get_count_order(num_threads_sharing); |
167 | 12 | l3_id = c->apicid >> index_msb; |
168 | 12 | break; |
169 | 0 | default: |
170 | 0 | break; |
171 | 48 | } |
172 | 48 | } |
173 | 48 | } |
174 | 12 | } |
175 | 12 | /* |
176 | 12 | * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for |
177 | 12 | * trace cache |
178 | 12 | */ |
179 | 12 | if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { |
180 | 0 | /* supports eax=2 call */ |
181 | 0 | int i, j, n; |
182 | 0 | int regs[4]; |
183 | 0 | unsigned char *dp = (unsigned char *)regs; |
184 | 0 | int only_trace = 0; |
185 | 0 |
|
186 | 0 | if (num_cache_leaves != 0 && c->x86 == 15) |
187 | 0 | only_trace = 1; |
188 | 0 |
|
189 | 0 | /* Number of times to iterate */ |
190 | 0 | n = cpuid_eax(2) & 0xFF; |
191 | 0 |
|
192 | 0 | for ( i = 0 ; i < n ; i++ ) { |
193 | 0 | cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); |
194 | 0 |
|
195 | 0 | /* If bit 31 is set, this is an unknown format */ |
196 | 0 | for ( j = 0 ; j < 3 ; j++ ) { |
197 | 0 | if ( regs[j] < 0 ) regs[j] = 0; |
198 | 0 | } |
199 | 0 |
|
200 | 0 | /* Byte 0 is level count, not a descriptor */ |
201 | 0 | for ( j = 1 ; j < 16 ; j++ ) { |
202 | 0 | unsigned char des = dp[j]; |
203 | 0 | unsigned char k = 0; |
204 | 0 |
|
205 | 0 | /* look up this descriptor in the table */ |
206 | 0 | while (cache_table[k].descriptor != 0) |
207 | 0 | { |
208 | 0 | if (cache_table[k].descriptor == des) { |
209 | 0 | if (only_trace && cache_table[k].cache_type != LVL_TRACE) |
210 | 0 | break; |
211 | 0 | switch (cache_table[k].cache_type) { |
212 | 0 | case LVL_1_INST: |
213 | 0 | l1i += cache_table[k].size; |
214 | 0 | break; |
215 | 0 | case LVL_1_DATA: |
216 | 0 | l1d += cache_table[k].size; |
217 | 0 | break; |
218 | 0 | case LVL_2: |
219 | 0 | l2 += cache_table[k].size; |
220 | 0 | break; |
221 | 0 | case LVL_3: |
222 | 0 | l3 += cache_table[k].size; |
223 | 0 | break; |
224 | 0 | case LVL_TRACE: |
225 | 0 | trace += cache_table[k].size; |
226 | 0 | break; |
227 | 0 | } |
228 | 0 |
|
229 | 0 | break; |
230 | 0 | } |
231 | 0 |
|
232 | 0 | k++; |
233 | 0 | } |
234 | 0 | } |
235 | 0 | } |
236 | 0 | } |
237 | 12 | |
238 | 12 | if (new_l1d) |
239 | 12 | l1d = new_l1d; |
240 | 12 | |
241 | 12 | if (new_l1i) |
242 | 12 | l1i = new_l1i; |
243 | 12 | |
244 | 12 | if (new_l2) { |
245 | 12 | l2 = new_l2; |
246 | 12 | } |
247 | 12 | |
248 | 12 | if (new_l3) { |
249 | 12 | l3 = new_l3; |
250 | 12 | } |
251 | 12 | |
252 | 12 | if (opt_cpu_info) { |
253 | 0 | if (trace) |
254 | 0 | printk("CPU: Trace cache: %dK uops", trace); |
255 | 0 | else if ( l1i ) |
256 | 0 | printk("CPU: L1 I cache: %dK", l1i); |
257 | 0 |
|
258 | 0 | if (l1d) |
259 | 0 | printk(", L1 D cache: %dK\n", l1d); |
260 | 0 | else |
261 | 0 | printk("\n"); |
262 | 0 |
|
263 | 0 | if (l2) |
264 | 0 | printk("CPU: L2 cache: %dK\n", l2); |
265 | 0 |
|
266 | 0 | if (l3) |
267 | 0 | printk("CPU: L3 cache: %dK\n", l3); |
268 | 0 | } |
269 | 12 | |
270 | 12 | c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); |
271 | 12 | |
272 | 12 | return l2; |
273 | 12 | } |