Linux中进程内存RSS与cgroup内存的RSS统计 – 差异


(1)用户空间的匿名映射页(Anonymous pages in User Mode address spaces),比如调用malloc分配的内存,以及使用MAP_ANONYMOUS的mmap;当系统内存不够时,内核可以将这部分内存交换出去;
(2)用户空间的文件映射页(Mapped pages in User Mode address spaces),包含map file和map tmpfs;前者比如指定文件的mmap,后者比如IPC共享内存;当系统内存不够时,内核可以回收这些页,但回收之前可能需要与文件同步数据;
(3)文件缓存(page in page cache of disk file);发生在程序通过普通的read/write读写文件时,当系统内存不够时,内核可以回收这些页,但回收之前可能需要与文件同步数据;
(4)buffer pages,属于page cache;比如读取块设备文件。

其中(1)和(2)是算作进程的RSS,(3)和(4)属于page cache。



/proc/[pid]/stat(23) vsize  %lu        Virtual memory size in bytes.(24) rss  %ld        Resident Set Size: number of pages the process has        in real memory.  This is just the pages which count        toward text, data, or stack space.  This does not        include pages which have not been demand-loaded in,        or which are swapped out.


#define get_mm_rss(mm)                    \    (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))

即RSS=file_rss + anon_rss

/proc/[pid]/statmProvides information about memory usage, measured in pages.The columns are:   size       (1) total program size             (same as VmSize in /proc/[pid]/status)  resident   (2) resident set size             (same as VmRSS in /proc/[pid]/status)  share      (3) shared pages (i.e., backed by a file)  text       (4) text (code)  lib        (5) library (unused in Linux 2.6)  data       (6) data + stack  dt         (7) dirty pages (unused in Linux 2.6)


int task_statm(struct mm_struct *mm, int *shared, int *text,           int *data, int *resident){    *shared = get_mm_counter(mm, file_rss);    *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))                                >> PAGE_SHIFT;    *data = mm->total_vm - mm->shared_vm;    *resident = *shared + get_mm_counter(mm, anon_rss);    return mm->total_vm;}


static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,        unsigned long address, pmd_t *pmd,        pgoff_t pgoff, unsigned int flags, pte_t orig_pte){    if (flags & FAULT_FLAG_WRITE) {        if (!(vma->vm_flags & VM_SHARED)) {            anon = 1;///anon page...        if (anon) {            inc_mm_counter(mm, anon_rss);            page_add_new_anon_rmap(page, vma, address);        } else {            inc_mm_counter(mm, file_rss);            page_add_file_rmap(page);

cgroup 的内存统计

stat filememory.stat file includes following statistics # per-memory cgroup local status cache        - # of bytes of page cache memory.rss        - # of bytes of anonymous and swap cache memory (includes        transparent hugepages).rss_huge    - # of bytes of anonymous transparent hugepages.mapped_file    - # of bytes of mapped file (includes tmpfs/shmem)pgpgin        - # of charging events to the memory cgroup. The charging        event happens each time a page is accounted as either mapped        anon page(RSS) or cache page(Page Cache) to the cgroup.pgpgout        - # of uncharging events to the memory cgroup. The uncharging        event happens each time a page is unaccounted from the cgroup.swap        - # of bytes of swap usagedirty        - # of bytes that are waiting to get written back to the disk.writeback    - # of bytes of file/anon cache that are queued for syncing to        disk.inactive_anon    - # of bytes of anonymous and swap cache memory on inactive        LRU list.active_anon    - # of bytes of anonymous and swap cache memory on active        LRU list.inactive_file    - # of bytes of file-backed memory on inactive LRU list.active_file    - # of bytes of file-backed memory on active LRU list.unevictable    - # of bytes of memory that cannot be reclaimed (mlocked etc).


static voidmem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s){    s64 val;     /* per cpu stat */    val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_CACHE);    s->stat[MCS_CACHE] += val * PAGE_SIZE;    val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);    s->stat[MCS_RSS] += val * PAGE_SIZE;    val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_FILE_MAPPED);    s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE;    val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT);    s->stat[MCS_PGPGIN] += val;    val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);    s->stat[MCS_PGPGOUT] += val;    if (do_swap_account) {        val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT);        s->stat[MCS_SWAP] += val * PAGE_SIZE;    }     /* per zone stat */    val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);    s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE;    val = mem_cgroup_get_local_zonestat(mem, LRU_ACTIVE_ANON);    s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE;    val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_FILE);    s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE;    val = mem_cgroup_get_local_zonestat(mem, LRU_ACTIVE_FILE);    s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;    val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE);    s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;}


struct mem_cgroup {...    /*     * statistics. This must be placed at the end of memcg.     */    struct mem_cgroup_stat stat;   //  统计数据}; /* memory cgroup 统计值   * Statistics for memory cgroup. */enum mem_cgroup_stat_index {    /*     * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.     */    MEM_CGROUP_STAT_CACHE,        /* # of pages charged as cache */    MEM_CGROUP_STAT_RSS,       /* # of pages charged as anon rss */    MEM_CGROUP_STAT_FILE_MAPPED,  /* # of pages charged as file rss */    MEM_CGROUP_STAT_PGPGIN_COUNT,    /* # of pages paged in */    MEM_CGROUP_STAT_PGPGOUT_COUNT,    /* # of pages paged out */    MEM_CGROUP_STAT_EVENTS,    /* sum of pagein + pageout for internal use */    MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */     MEM_CGROUP_STAT_NSTATS,}; struct mem_cgroup_stat_cpu {    s64 count[MEM_CGROUP_STAT_NSTATS];} ____cacheline_aligned_in_smp; struct mem_cgroup_stat {    struct mem_cgroup_stat_cpu cpustat[0];};

rss and cache

cache    - # of bytes of page cache memory. rss    - # of bytes of anonymous and swap cache memory (includes transparent hugepages).  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,                     struct page_cgroup *pc,                     long size){...    cpustat = &stat->cpustat[cpu];    if (PageCgroupCache(pc))        __mem_cgroup_stat_add_safe(cpustat,            MEM_CGROUP_STAT_CACHE, numpages);    else        __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS,            numpages); static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,                       struct page_cgroup *pc,                       enum charge_type ctype,                       int page_size){     switch (ctype) {    case MEM_CGROUP_CHARGE_TYPE_CACHE:    case MEM_CGROUP_CHARGE_TYPE_SHMEM: //file cache + shm        SetPageCgroupCache(pc);        SetPageCgroupUsed(pc);        break;    case MEM_CGROUP_CHARGE_TYPE_MAPPED:        ClearPageCgroupCache(pc);        SetPageCgroupUsed(pc);        break;    default:        break;    }    //  更新统计值    mem_cgroup_charge_statistics(mem, pc, page_size);  int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,                gfp_t gfp_mask){...    if (page_is_file_cache(page))        return mem_cgroup_charge_common(page, mm, gfp_mask,                MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); ///file cache     /* shmem */    if (PageSwapCache(page)) {        ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);        if (!ret)            __mem_cgroup_commit_charge_swapin(page, mem,                    MEM_CGROUP_CHARGE_TYPE_SHMEM);    } else        ret = mem_cgroup_charge_common(page, mm, gfp_mask, ///shm memory                    MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);

可以看到,cache包含共享内存和file cache

mapped_file – # of bytes of mapped file (includes tmpfs/shmem)

void mem_cgroup_update_file_mapped(struct page *page, int val){...    __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, val);__do_fault –> page_add_file_rmap –> mem_cgroup_update_file_mapped。inactive_anoninactive_anon    - # of bytes of anonymous and swap cache memory on inactive LRU list.static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,    struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type){...        lru_cache_add_anon(page); /** * lru_cache_add: add a page to the page lists * @page: the page to add */static inline void lru_cache_add_anon(struct page *page){    __lru_cache_add(page, LRU_INACTIVE_ANON);}


inactive_file – # of bytes of file-backed memory on inactive LRU list.文件使用的page cache(不包含共享内存)

static inline void lru_cache_add_file(struct page *page){    __lru_cache_add(page, LRU_INACTIVE_FILE);}add_to_page_cache_lru –> lru_cache_add_file.


#include <stdio.h>#include <string.h>#include <stdlib.h>#include <errno.h>#include <unistd.h>#include <sys/stat.h>#include <sys/types.h>#include <sys/ipc.h>#include <sys/shm.h>#define BUF_SIZE 4000000000   #define MYKEY 26  int main(int argc,char **argv){    int shmid;    char *shmptr;     if((shmid = shmget(MYKEY,BUF_SIZE,IPC_CREAT)) ==-1){        fprintf(stderr,"Create Share Memory Error0m~Z%s\n\a",strerror(errno));        exit(1);    }     if((shmptr =shmat(shmid,0,0))==(void *)-1){        printf("shmat error!\n");        exit(1);    }     memset(shmptr,'\0',1000000000);     printf("sleep...\n");    while(1)        sleep(1);     exit(0);}

执行程序前后,cgroup memory.stat的值:

# cat memory.stat cache 1121185792rss 23678976rss_huge 0mapped_file 14118912inactive_anon 1002643456active_anon 23687168inactive_file 46252032active_file 72282112


# cat memory.stat cache 2121187328rss 23760896rss_huge 0mapped_file 1014124544inactive_anon 2002608128active_anon 23736320inactive_file 46247936active_file 72286208 #ipcs -m0x0000001a 229380     root       0          4000000000 1



(1)进程rss与cgroup rss的区别
进程的RSS为进程使用的所有物理内存(file_rss+anon_rss),即Anonymous pages+Mapped apges(包含共享内存)。
cgroup RSS为(anonymous and swap cache memory),不包含共享内存。
两者都不包含file cache。
(2)cgroup cache包含file cache和共享内存。

未经允许不得转载:99ya » Linux中进程内存RSS与cgroup内存的RSS统计 – 差异