]> xenbits.xen.org Git - xenclient/kernel.git/commitdiff
This interface provides a mapping for each page in an address space to its maps2-patches/maps2-make-proc-pid-clear_refs-option-under-config_embedded.patch
authorMatt Mackall <mpm@selenic.com>
Tue, 6 Jan 2009 12:06:06 +0000 (12:06 +0000)
committerMatt Mackall <mpm@selenic.com>
Tue, 6 Jan 2009 12:06:06 +0000 (12:06 +0000)
physical page frame number, allowing precise determination of what pages are
mapped and what pages are shared between processes.

[akpm@linux-foundation.org: warning fix]
Signed-off-by: Matt Mackall <mpm@selenic.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/proc/base.c     |    8 +
 fs/proc/internal.h |    2
 fs/proc/task_mmu.c |  209 +++++++++++++++++++++++++++++++++++++++++++
 init/Kconfig       |   10 ++
 4 files changed, 228 insertions(+), 1 deletion(-)

fs/proc/base.c
fs/proc/internal.h
fs/proc/task_mmu.c
init/Kconfig

index 084724a6e5adf19fda9854adeaa27929a96aae33..50168decbbb0254a33ede45b5f254522a237dbf1 100644 (file)
@@ -124,6 +124,9 @@ enum pid_directory_inos {
 #ifdef CONFIG_PROC_SMAPS
        PROC_TGID_SMAPS,
 #endif
+#ifdef CONFIG_PROC_PAGEMAP
+       PROC_TGID_PAGEMAP,
+#endif
 #endif
 #ifdef CONFIG_SCHEDSTATS
        PROC_TGID_SCHEDSTAT,
@@ -175,6 +178,9 @@ enum pid_directory_inos {
 #ifdef CONFIG_PROC_SMAPS
        PROC_TID_SMAPS,
 #endif
+#ifdef CONFIG_PROC_PAGEMAP
+       PROC_TID_PAGEMAP,
+#endif
 #endif
 #ifdef CONFIG_SCHEDSTATS
        PROC_TID_SCHEDSTAT,
@@ -243,6 +249,9 @@ static struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_PROC_SMAPS
        E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUSR),
 #endif
+#ifdef CONFIG_PROC_PAGEMAP
+       E(PROC_TGID_PAGEMAP,   "pagemap", S_IFREG|S_IRUSR),
+#endif
 #endif
 #ifdef CONFIG_SECURITY
        E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
@@ -297,6 +306,9 @@ static struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_PROC_SMAPS
        E(PROC_TID_SMAPS,      "smaps",   S_IFREG|S_IRUSR),
 #endif
+#ifdef CONFIG_PROC_PAGEMAP
+       E(PROC_TID_PAGEMAP,    "pagemap", S_IFREG|S_IRUSR),
+#endif
 #endif
 #ifdef CONFIG_SECURITY
        E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
@@ -918,7 +930,7 @@ out_no_task:
 }
 #endif
 
-static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
+loff_t mem_lseek(struct file * file, loff_t offset, int orig)
 {
        switch (orig) {
        case 0:
@@ -2001,6 +2013,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
                        inode->i_fop = &proc_smaps_operations;
                        break;
 #endif
+#ifdef CONFIG_PROC_PAGEMAP
+               case PROC_TID_PAGEMAP:
+               case PROC_TGID_PAGEMAP:
+                       inode->i_fop = &proc_pagemap_operations;
+                       break;
+#endif
 #endif
                case PROC_TID_MOUNTSTATS:
                case PROC_TGID_MOUNTSTATS:
index 983edb76a786cae1e966cef0b7cd5e905e82da3b..6e9d3c4ae5c20bc29b635a9ff6c6a30af4de3b5d 100644 (file)
@@ -39,11 +39,13 @@ extern int proc_tgid_stat(struct task_struct *, char *);
 extern int proc_pid_status(struct task_struct *, char *);
 extern int proc_pid_statm(struct task_struct *, char *);
 extern int proc_pid_limits(struct task_struct *, char *);
+extern loff_t mem_lseek(struct file * file, loff_t offset, int orig);
 
 extern struct file_operations proc_maps_operations;
 extern struct file_operations proc_numa_maps_operations;
 extern struct file_operations proc_smaps_operations;
 extern struct file_operations proc_clear_refs_operations;
+extern struct file_operations proc_pagemap_operations;
 
 void free_proc_entry(struct proc_dir_entry *de);
 
index 55a29799aa6a1156e3524c287718af9e16916b09..068348b1525d485496b00f85be41a842ca4637ee 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/seq_file.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/ptrace.h>
 #include <linux/mempolicy.h>
 
 #include <asm/elf.h>
@@ -541,3 +542,211 @@ struct file_operations proc_numa_maps_operations = {
 };
 #endif
 
+#ifdef CONFIG_PROC_PAGEMAP
+struct pagemapread {
+       struct mm_struct *mm;
+       unsigned long next;
+       unsigned long *buf;
+       pte_t *ptebuf;
+       unsigned long pos;
+       size_t count;
+       int index;
+       char __user *out;
+};
+
+static int flush_pagemap(struct pagemapread *pm)
+{
+       int n = min(pm->count, pm->index * sizeof(unsigned long));
+       if (copy_to_user(pm->out, pm->buf, n))
+               return -EFAULT;
+       pm->out += n;
+       pm->pos += n;
+       pm->count -= n;
+       pm->index = 0;
+       cond_resched();
+       return 0;
+}
+
+static int add_to_pagemap(unsigned long addr, unsigned long pfn,
+                         struct pagemapread *pm)
+{
+       pm->buf[pm->index++] = pfn;
+       pm->next = addr + PAGE_SIZE;
+       if (pm->index * sizeof(unsigned long) >= PAGE_SIZE ||
+           pm->index * sizeof(unsigned long) >= pm->count)
+               return flush_pagemap(pm);
+       return 0;
+}
+
+static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+                            void *private)
+{
+       struct pagemapread *pm = private;
+       pte_t *pte;
+       int err;
+
+       pte = pte_offset_map(pmd, addr);
+
+#ifdef CONFIG_HIGHPTE
+       /* copy PTE directory to temporary buffer and unmap it */
+       memcpy(pm->ptebuf, pte, PAGE_ALIGN((unsigned long)pte) - (unsigned long)pte);
+       pte_unmap(pte);
+       pte = pm->ptebuf;
+#endif
+
+       for (; addr != end; pte++, addr += PAGE_SIZE) {
+               if (addr < pm->next)
+                       continue;
+               if (!pte_present(*pte))
+                       err = add_to_pagemap(addr, -1, pm);
+               else
+                       err = add_to_pagemap(addr, pte_pfn(*pte), pm);
+               if (err)
+                       return err;
+       }
+
+#ifndef CONFIG_HIGHPTE
+       pte_unmap(pte - 1);
+#endif
+
+       return 0;
+}
+
+static int pagemap_fill(struct pagemapread *pm, unsigned long end)
+{
+       int ret;
+
+       while (pm->next != end) {
+               ret = add_to_pagemap(pm->next, -1UL, pm);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+static struct mm_walk pagemap_walk = { .pmd_entry = pagemap_pte_range };
+
+/*
+ * /proc/pid/pagemap - an array mapping virtual pages to pfns
+ *
+ * For each page in the address space, this file contains one long
+ * representing the corresponding physical page frame number (PFN) or
+ * -1 if the page isn't present. This allows determining precisely
+ * which pages are mapped and comparing mapped pages between
+ * processes.
+ *
+ * Efficient users of this interface will use /proc/pid/maps to
+ * determine which areas of memory are actually mapped and llseek to
+ * skip over unmapped regions.
+ *
+ * The first 4 bytes of this file form a simple header:
+ *
+ * first byte:   0 for big endian, 1 for little
+ * second byte:  page shift (eg 12 for 4096 byte pages)
+ * third byte:   entry size in bytes (currently either 4 or 8)
+ * fourth byte:  header size
+ */
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+                           size_t count, loff_t *ppos)
+{
+       struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+       unsigned long src = *ppos;
+       unsigned long *page;
+       unsigned long addr, end, vend, svpfn, evpfn;
+       struct mm_struct *mm;
+       struct vm_area_struct *vma;
+       struct pagemapread pm;
+       int ret = -ESRCH;
+
+       if (!task)
+               goto out_no_task;
+
+       ret = -EACCES;
+       if (!ptrace_may_attach(task))
+               goto out;
+
+       ret = -EIO;
+       svpfn = src / sizeof(unsigned long) - 1;
+       addr = PAGE_SIZE * svpfn;
+       if ((svpfn + 1) * sizeof(unsigned long) != src)
+               goto out;
+       evpfn = min((src + count) / sizeof(unsigned long),
+                   ((~0UL) >> PAGE_SHIFT) + 1);
+       count = (evpfn - svpfn) * sizeof(unsigned long);
+       end = PAGE_SIZE * evpfn;
+
+       ret = -ENOMEM;
+       page = kzalloc(PAGE_SIZE, GFP_USER);
+       if (!page)
+               goto out;
+
+#ifdef CONFIG_HIGHPTE
+       pm.ptebuf = kzalloc(PAGE_SIZE, GFP_USER);
+       if (!pm.ptebuf)
+               goto out_free;
+#endif
+
+       ret = 0;
+       mm = get_task_mm(task);
+       if (!mm)
+               goto out_freepte;
+
+       pm.mm = mm;
+       pm.next = addr;
+       pm.buf = page;
+       pm.pos = src;
+       pm.count = count;
+       pm.index = 0;
+       pm.out = buf;
+
+       if (svpfn == -1) {
+               add_to_pagemap(pm.next, 0, &pm);
+               ((char *)page)[0] = (ntohl(1) != 1);
+               ((char *)page)[1] = PAGE_SHIFT;
+               ((char *)page)[2] = sizeof(unsigned long);
+               ((char *)page)[3] = sizeof(unsigned long);
+       }
+
+       down_read(&mm->mmap_sem);
+       vma = find_vma(mm, pm.next);
+       while (pm.count > 0 && vma) {
+               if (!ptrace_may_attach(task)) {
+                       ret = -EIO;
+                       goto out_mm;
+               }
+               vend = min(vma->vm_start - 1, end - 1) + 1;
+               ret = pagemap_fill(&pm, vend);
+               if (ret || !pm.count)
+                       break;
+               vend = min(vma->vm_end - 1, end - 1) + 1;
+               ret = walk_page_range(mm, vma->vm_start, vend,
+                                     &pagemap_walk, &pm);
+               vma = vma->vm_next;
+       }
+       up_read(&mm->mmap_sem);
+
+       ret = pagemap_fill(&pm, end);
+
+       *ppos = pm.pos;
+       if (!ret)
+               ret = pm.pos - src;
+
+out_mm:
+       mmput(mm);
+out_freepte:
+#ifdef CONFIG_HIGHPTE
+       kfree(pm.ptebuf);
+out_free:
+#endif
+       kfree(page);
+out:
+       put_task_struct(task);
+out_no_task:
+       return ret;
+}
+
+struct file_operations proc_pagemap_operations = {
+       .llseek         = mem_lseek, /* borrow this */
+       .read           = pagemap_read,
+};
+#endif
index a4d0c1266b04e41b94eaf83801c87dfc04e8a87c..63bc2c3ffa2dad81e459e39b12c3200f96335c14 100644 (file)
@@ -426,6 +426,16 @@ config PROC_CLEAR_REFS
           working set size. Disabling this interface will reduce
           the size of the kernel for small machines.
 
+config PROC_PAGEMAP
+       default y
+       bool "Enable /proc/pid/pagemap support" if EMBEDDED && PROC_FS && MMU
+       help
+         The /proc/pid/pagemap interface allows reading the
+          kernel's virtual memory to page frame mapping to determine which
+          individual pages a process has mapped and which pages it shares
+          with other processes. Disabling this interface will reduce the
+          size of the kernel for small machines.
+
 endmenu                # General setup
 
 config RT_MUTEXES