debuggers.hg

changeset 20660:b7cf749e14fc

memory hotadd 1/7: Setup m2p table for hot-added memory

When new memory added to the system, we need to update the m2p table
to cover the new memory range.

When memory add, it is difficult to allocate continous pages, so we
allocate the memory from the new added memory range. This also improve
the locality in numa situation.

We don't support 1G mapping for hot memory, because AFAIK currently
hot-plug memory will not be that large.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Dec 11 08:53:57 2009 +0000 (2009-12-11)
parents 2e5032921b07
children adb62ca21d31
files xen/arch/x86/x86_64/mm.c
line diff
     1.1 --- a/xen/arch/x86/x86_64/mm.c	Fri Dec 11 08:52:17 2009 +0000
     1.2 +++ b/xen/arch/x86/x86_64/mm.c	Fri Dec 11 08:53:57 2009 +0000
     1.3 @@ -189,6 +189,246 @@ void __init pfn_pdx_hole_setup(unsigned 
     1.4      ma_top_mask         = pfn_top_mask << PAGE_SHIFT;
     1.5  }
     1.6  
     1.7 +/*
     1.8 + * Allocate page table pages for m2p table
     1.9 + */
    1.10 +struct mem_hotadd_info
    1.11 +{
    1.12 +    unsigned long spfn;
    1.13 +    unsigned long epfn;
    1.14 +    unsigned long cur;
    1.15 +};
    1.16 +
    1.17 +int hotadd_mem_valid(unsigned long pfn, struct mem_hotadd_info *info)
    1.18 +{
    1.19 +    return (pfn < info->epfn && pfn >= info->spfn);
    1.20 +}
    1.21 +
    1.22 +static unsigned long alloc_hotadd_mfn(struct mem_hotadd_info *info)
    1.23 +{
    1.24 +    unsigned mfn;
    1.25 +
    1.26 +    ASSERT((info->cur + ( 1UL << PAGETABLE_ORDER) < info->epfn) &&
    1.27 +            info->cur >= info->spfn);
    1.28 +
    1.29 +    mfn = info->cur;
    1.30 +    info->cur += (1UL << PAGETABLE_ORDER);
    1.31 +    return mfn;
    1.32 +}
    1.33 +
    1.34 +#define M2P_NO_MAPPED   0
    1.35 +#define M2P_2M_MAPPED   1
    1.36 +#define M2P_1G_MAPPED   2
    1.37 +static int m2p_mapped(unsigned long spfn)
    1.38 +{
    1.39 +    unsigned long va;
    1.40 +    l3_pgentry_t *l3_ro_mpt;
    1.41 +    l2_pgentry_t *l2_ro_mpt;
    1.42 +
    1.43 +    va = RO_MPT_VIRT_START + spfn * sizeof(*machine_to_phys_mapping);
    1.44 +    l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(va)]);
    1.45 +
    1.46 +    switch ( l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
    1.47 +             (_PAGE_PRESENT |_PAGE_PSE))
    1.48 +    {
    1.49 +        case _PAGE_PSE|_PAGE_PRESENT:
    1.50 +            return M2P_1G_MAPPED;
    1.51 +            break;
    1.52 +        /* Check for next level */
    1.53 +        case _PAGE_PRESENT:
    1.54 +            break;
    1.55 +        default:
    1.56 +            return M2P_NO_MAPPED;
    1.57 +            break;
    1.58 +    }
    1.59 +    l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]);
    1.60 +
    1.61 +    if (l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT)
    1.62 +        return M2P_2M_MAPPED;
    1.63 +
    1.64 +    return M2P_NO_MAPPED;
    1.65 +}
    1.66 +
    1.67 +/*
    1.68 + * Allocate and map the compatibility mode machine-to-phys table.
    1.69 + * spfn/epfn: the pfn ranges to be setup
    1.70 + * free_s/free_e: the pfn ranges that is free still
    1.71 + */
    1.72 +static int setup_compat_m2p_table(struct mem_hotadd_info *info)
    1.73 +{
    1.74 +    unsigned long i, va, smap, emap, rwva, epfn = info->epfn;
    1.75 +    unsigned int n, memflags;
    1.76 +    l3_pgentry_t *l3_ro_mpt = NULL;
    1.77 +    l2_pgentry_t *l2_ro_mpt = NULL;
    1.78 +    struct page_info *l1_pg;
    1.79 +
    1.80 +    smap = info->spfn & (~((1UL << (L2_PAGETABLE_SHIFT - 2)) -1));
    1.81 +
    1.82 +    /*
    1.83 +     * Notice: For hot-added memory, only range below m2p_compat_vstart
    1.84 +     * will be filled up (assuming memory is discontinous when booting).
    1.85 +     */
    1.86 +    if   ((smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2)) )
    1.87 +        return 0;
    1.88 +
    1.89 +    if (epfn > (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START))
    1.90 +        epfn = (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2;
    1.91 +
    1.92 +    emap = ( (epfn + ((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1 )) &
    1.93 +                ~((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1) );
    1.94 +
    1.95 +    va = HIRO_COMPAT_MPT_VIRT_START +
    1.96 +         smap * sizeof(*compat_machine_to_phys_mapping);
    1.97 +    l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(va)]);
    1.98 +
    1.99 +    ASSERT(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & _PAGE_PRESENT);
   1.100 +
   1.101 +    l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]);
   1.102 +
   1.103 +#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int))
   1.104 +#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
   1.105 +             sizeof(*compat_machine_to_phys_mapping))
   1.106 +    BUILD_BUG_ON((sizeof(*frame_table) & -sizeof(*frame_table)) % \
   1.107 +                 sizeof(*compat_machine_to_phys_mapping));
   1.108 +
   1.109 +    for ( i = smap; i < emap; i += (1UL << (L2_PAGETABLE_SHIFT - 2)) )
   1.110 +    {
   1.111 +        va = HIRO_COMPAT_MPT_VIRT_START +
   1.112 +              i * sizeof(*compat_machine_to_phys_mapping);
   1.113 +
   1.114 +        rwva = RDWR_COMPAT_MPT_VIRT_START +
   1.115 +                i * sizeof(*compat_machine_to_phys_mapping);
   1.116 +
   1.117 +        if (l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT)
   1.118 +            continue;
   1.119 +
   1.120 +        for ( n = 0; n < CNT; ++n)
   1.121 +            if ( mfn_valid(i + n * PDX_GROUP_COUNT) )
   1.122 +                break;
   1.123 +        if ( n == CNT )
   1.124 +            continue;
   1.125 +
   1.126 +        memflags = MEMF_node(phys_to_nid(i << PAGE_SHIFT));
   1.127 +
   1.128 +        l1_pg = mfn_to_page(alloc_hotadd_mfn(info));
   1.129 +        map_pages_to_xen(rwva,
   1.130 +                    page_to_mfn(l1_pg),
   1.131 +                    1UL << PAGETABLE_ORDER,
   1.132 +                    PAGE_HYPERVISOR);
   1.133 +        memset((void *)rwva, 0x55, 1UL << L2_PAGETABLE_SHIFT);
   1.134 +        /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */
   1.135 +        l2e_write(&l2_ro_mpt[l2_table_offset(va)], l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT));
   1.136 +    }
   1.137 +#undef CNT
   1.138 +#undef MFN
   1.139 +    return 0;
   1.140 +}
   1.141 +
   1.142 +/*
   1.143 + * Allocate and map the machine-to-phys table.
   1.144 + * The L3 for RO/RWRW MPT and the L2 for compatible MPT should be setup already
   1.145 + */
   1.146 +int setup_m2p_table(struct mem_hotadd_info *info)
   1.147 +{
   1.148 +    unsigned long i, va, smap, emap;
   1.149 +    unsigned int n, memflags;
   1.150 +    l2_pgentry_t *l2_ro_mpt = NULL;
   1.151 +    l3_pgentry_t *l3_ro_mpt = NULL;
   1.152 +    struct page_info *l1_pg, *l2_pg;
   1.153 +    int ret = 0;
   1.154 +
   1.155 +    ASSERT(l4e_get_flags(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)])
   1.156 +            & _PAGE_PRESENT);
   1.157 +    l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]);
   1.158 +
   1.159 +    smap = (info->spfn & (~((1UL << (L2_PAGETABLE_SHIFT - 3)) -1)));
   1.160 +    emap = ((info->epfn + ((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1 )) &
   1.161 +                ~((1UL << (L2_PAGETABLE_SHIFT - 3)) -1));
   1.162 +
   1.163 +    va = RO_MPT_VIRT_START + smap * sizeof(*machine_to_phys_mapping);
   1.164 +
   1.165 +#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long))
   1.166 +#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
   1.167 +             sizeof(*machine_to_phys_mapping))
   1.168 +
   1.169 +    BUILD_BUG_ON((sizeof(*frame_table) & -sizeof(*frame_table)) % \
   1.170 +                 sizeof(*machine_to_phys_mapping));
   1.171 +
   1.172 +    i = smap;
   1.173 +    while ( i < emap )
   1.174 +    {
   1.175 +        switch ( m2p_mapped(i) )
   1.176 +        {
   1.177 +        case M2P_1G_MAPPED:
   1.178 +            i = ( i & ~((1UL << (L3_PAGETABLE_SHIFT - 3)) - 1)) +
   1.179 +                (1UL << (L3_PAGETABLE_SHIFT - 3));
   1.180 +            continue;
   1.181 +        case M2P_2M_MAPPED:
   1.182 +            i = (i & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1)) +
   1.183 +                (1UL << (L2_PAGETABLE_SHIFT - 3));
   1.184 +            continue;
   1.185 +        default:
   1.186 +            break;
   1.187 +        }
   1.188 +
   1.189 +        va = RO_MPT_VIRT_START + i * sizeof(*machine_to_phys_mapping);
   1.190 +        memflags = MEMF_node(phys_to_nid(i << PAGE_SHIFT));
   1.191 +
   1.192 +        for ( n = 0; n < CNT; ++n)
   1.193 +            if ( mfn_valid(i + n * PDX_GROUP_COUNT) )
   1.194 +                break;
   1.195 +        if ( n == CNT )
   1.196 +            l1_pg = NULL;
   1.197 +        else
   1.198 +        {
   1.199 +            l1_pg = mfn_to_page(alloc_hotadd_mfn(info));
   1.200 +            map_pages_to_xen(
   1.201 +                        RDWR_MPT_VIRT_START + i * sizeof(unsigned long),
   1.202 +                        page_to_mfn(l1_pg),
   1.203 +                        1UL << PAGETABLE_ORDER,
   1.204 +                        PAGE_HYPERVISOR);
   1.205 +            memset((void *)(RDWR_MPT_VIRT_START + i * sizeof(unsigned long)),
   1.206 +                   0x55, 1UL << L2_PAGETABLE_SHIFT);
   1.207 +
   1.208 +            ASSERT(!(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
   1.209 +                  _PAGE_PSE));
   1.210 +            if ( l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
   1.211 +              _PAGE_PRESENT )
   1.212 +                l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]) +
   1.213 +                  l2_table_offset(va);
   1.214 +            else
   1.215 +            {
   1.216 +                l2_pg = alloc_domheap_page(NULL, memflags);
   1.217 +
   1.218 +                if (!l2_pg)
   1.219 +                {
   1.220 +                    ret = -ENOMEM;
   1.221 +                    goto error;
   1.222 +                }
   1.223 +
   1.224 +                l2_ro_mpt = page_to_virt(l2_pg);
   1.225 +                clear_page(l2_ro_mpt);
   1.226 +                l3e_write(&l3_ro_mpt[l3_table_offset(va)],
   1.227 +                  l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
   1.228 +               l2_ro_mpt += l2_table_offset(va);
   1.229 +            }
   1.230 +
   1.231 +            /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
   1.232 +            l2e_write(l2_ro_mpt, l2e_from_page(l1_pg,
   1.233 +                   /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
   1.234 +        }
   1.235 +        if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
   1.236 +            l2_ro_mpt = NULL;
   1.237 +        i += ( 1UL << (L2_PAGETABLE_SHIFT - 3));
   1.238 +    }
   1.239 +#undef CNT
   1.240 +#undef MFN
   1.241 +
   1.242 +    ret = setup_compat_m2p_table(info);
   1.243 +error:
   1.244 +    return ret;
   1.245 +}
   1.246 +
   1.247  void __init paging_init(void)
   1.248  {
   1.249      unsigned long i, mpt_size, va;