Skip to content

Instantly share code, notes, and snippets.

@jhoblitt
Created July 7, 2023 15:35
Show Gist options
  • Save jhoblitt/ceabcb9b1f0cda68c508f9ea117fa5de to your computer and use it in GitHub Desktop.
Save jhoblitt/ceabcb9b1f0cda68c508f9ea117fa5de to your computer and use it in GitHub Desktop.
A ceph 17.2.6 mds suspected of leaking memory...
[root@rook-ceph-mds-auxtel-a-dfdfb685f-sdpmd ceph]# ceph daemon mds.auxtel-a heap stats 
mds.auxtel-a tcmalloc heap stats:------------------------------------------------
MALLOC:    11869592120 (11319.7 MiB) Bytes in use by application
MALLOC: +            0 (    0.0 MiB) Bytes in page heap freelist
MALLOC: +    582126792 (  555.2 MiB) Bytes in central cache freelist
MALLOC: +      6150656 (    5.9 MiB) Bytes in transfer cache freelist
MALLOC: +      3751680 (    3.6 MiB) Bytes in thread cache freelists
MALLOC: +     75235328 (   71.8 MiB) Bytes in malloc metadata
MALLOC:   ------------
MALLOC: =  12536856576 (11956.1 MiB) Actual memory used (physical + swap)
MALLOC: +      2801664 (    2.7 MiB) Bytes released to OS (aka unmapped)
MALLOC:   ------------
MALLOC: =  12539658240 (11958.8 MiB) Virtual address space used
MALLOC:
MALLOC:        1200339              Spans in use
MALLOC:             17              Thread heaps in use
MALLOC:           8192              Tcmalloc page size
------------------------------------------------
Call ReleaseFreeMemory() to release freelist memory to the OS (via madvise()).
Bytes released to the OS take up virtual address space but no physical memory.
[root@rook-ceph-mds-auxtel-a-dfdfb685f-sdpmd ceph]# ceph daemon mds.auxtel-a cache status
{
    "pool": {
        "items": 141542469,
        "bytes": 9153539281
    }
}
[root@rook-ceph-mds-auxtel-a-dfdfb685f-sdpmd ceph]# ceph daemon mds.auxtel-a config show | grep memory_limit
    "mds_cache_memory_limit": "4294967296",
[root@rook-ceph-mds-auxtel-a-dfdfb685f-sdpmd ceph]# ceph daemon mds.auxtel-a perf dump
{
    "AsyncMessenger::Worker-0": {
        "msgr_recv_messages": 1407910,
        "msgr_send_messages": 1407905,
        "msgr_recv_bytes": 15532421095,
        "msgr_send_bytes": 437857212,
        "msgr_created_connections": 1636,
        "msgr_active_connections": 6,
        "msgr_running_total_time": 232.643308860,
        "msgr_running_send_time": 98.478563497,
        "msgr_running_recv_time": 75.291137307,
        "msgr_running_fast_dispatch_time": 43.114229336,
        "msgr_send_messages_queue_lat": {
            "avgcount": 1407901,
            "sum": 60.714685888,
            "avgtime": 0.000043124
        },
        "msgr_handle_ack_lat": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "AsyncMessenger::Worker-1": {
        "msgr_recv_messages": 837175,
        "msgr_send_messages": 836833,
        "msgr_recv_bytes": 12952005314,
        "msgr_send_bytes": 329765397,
        "msgr_created_connections": 2184,
        "msgr_active_connections": 6,
        "msgr_running_total_time": 161.194909024,
        "msgr_running_send_time": 68.448497344,
        "msgr_running_recv_time": 53.023834457,
        "msgr_running_fast_dispatch_time": 27.146349335,
        "msgr_send_messages_queue_lat": {
            "avgcount": 836832,
            "sum": 29.463634580,
            "avgtime": 0.000035208
        },
        "msgr_handle_ack_lat": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "AsyncMessenger::Worker-2": {
        "msgr_recv_messages": 784766,
        "msgr_send_messages": 969251,
        "msgr_recv_bytes": 11421098512,
        "msgr_send_bytes": 590377305,
        "msgr_created_connections": 2431,
        "msgr_active_connections": 6,
        "msgr_running_total_time": 152.263133905,
        "msgr_running_send_time": 68.612540046,
        "msgr_running_recv_time": 45.885822822,
        "msgr_running_fast_dispatch_time": 26.471051292,
        "msgr_send_messages_queue_lat": {
            "avgcount": 969245,
            "sum": 43.294864647,
            "avgtime": 0.000044668
        },
        "msgr_handle_ack_lat": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "cct": {
        "total_workers": 1,
        "unhealthy_workers": 0
    },
    "finisher-MDSRank": {
        "queue_len": 0,
        "complete_latency": {
            "avgcount": 4593955,
            "sum": 204.809726064,
            "avgtime": 0.000044582
        }
    },
    "finisher-PurgeQueue": {
        "queue_len": 0,
        "complete_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "mds": {
        "request": 0,
        "reply": 0,
        "reply_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "slow_reply": 0,
        "forward": 0,
        "dir_fetch": 0,
        "dir_commit": 0,
        "dir_split": 0,
        "dir_merge": 0,
        "inodes": 3463605,
        "inodes_top": 2419085,
        "inodes_bottom": 1044509,
        "inodes_pin_tail": 11,
        "inodes_pinned": 7768,
        "inodes_expired": 1524927,
        "inodes_with_caps": 0,
        "caps": 0,
        "subtrees": 680,
        "traverse": 0,
        "traverse_hit": 0,
        "traverse_forward": 0,
        "traverse_discover": 0,
        "traverse_dir_fetch": 0,
        "traverse_remote_ino": 0,
        "traverse_lock": 0,
        "load_cent": 0,
        "q": 0,
        "exported": 0,
        "exported_inodes": 0,
        "imported": 0,
        "imported_inodes": 0,
        "openino_dir_fetch": 0,
        "openino_backtrace_fetch": 0,
        "openino_peer_discover": 0,
        "root_rfiles": 0,
        "root_rbytes": 0,
        "root_rsnaps": 0,
        "scrub_backtrace_fetch": 0,
        "scrub_set_tag": 0,
        "scrub_backtrace_repaired": 0,
        "scrub_inotable_repaired": 0,
        "scrub_dir_inodes": 0,
        "scrub_dir_base_inodes": 0,
        "scrub_dirfrag_rstats": 0,
        "scrub_file_inodes": 0,
        "handle_inode_file_caps": 0,
        "ceph_cap_op_revoke": 0,
        "ceph_cap_op_grant": 0,
        "ceph_cap_op_trunc": 0,
        "ceph_cap_op_flushsnap_ack": 0,
        "ceph_cap_op_flush_ack": 0,
        "handle_client_caps": 0,
        "handle_client_caps_dirty": 0,
        "handle_client_cap_release": 0,
        "process_request_cap_release": 0
    },
    "mds_cache": {
        "num_strays": 0,
        "num_strays_delayed": 0,
        "num_strays_enqueuing": 0,
        "strays_created": 0,
        "strays_enqueued": 0,
        "strays_reintegrated": 0,
        "strays_migrated": 0,
        "num_recovering_processing": 0,
        "num_recovering_enqueued": 0,
        "num_recovering_prioritized": 0,
        "recovery_started": 0,
        "recovery_completed": 0,
        "ireq_enqueue_scrub": 0,
        "ireq_exportdir": 0,
        "ireq_flush": 0,
        "ireq_fragmentdir": 0,
        "ireq_fragstats": 0,
        "ireq_inodestats": 0
    },
    "mds_log": {
        "evadd": 0,
        "evex": 0,
        "evtrm": 0,
        "ev": 0,
        "evexg": 0,
        "evexd": 0,
        "segadd": 0,
        "segex": 0,
        "segtrm": 0,
        "seg": 165,
        "segexg": 0,
        "segexd": 0,
        "expos": 40870637119,
        "wrpos": 0,
        "rdpos": 41178680660,
        "jlat": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "replayed": 18618916
    },
    "mds_mem": {
        "ino": 3462922,
        "ino+": 4599961,
        "ino-": 1137039,
        "dir": 5155,
        "dir+": 18953,
        "dir-": 13798,
        "dn": 3463605,
        "dn+": 6321926,
        "dn-": 2858321,
        "cap": 0,
        "cap+": 0,
        "cap-": 0,
        "rss": 12258708,
        "heap": 207132
    },
    "mds_server": {
        "dispatch_client_request": 0,
        "dispatch_server_request": 0,
        "handle_client_request": 0,
        "handle_client_session": 0,
        "handle_peer_request": 0,
        "req_create_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_getattr_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_getfilelock_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_link_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_lookup_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_lookuphash_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_lookupino_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_lookupname_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_lookupparent_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_lookupsnap_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_lssnap_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_mkdir_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_mknod_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_mksnap_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_open_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_readdir_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_rename_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_renamesnap_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_rmdir_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_rmsnap_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_rmxattr_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_setattr_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_setdirlayout_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_setfilelock_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_setlayout_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_setxattr_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_symlink_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_unlink_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "cap_revoke_eviction": 0,
        "cap_acquisition_throttle": 0,
        "req_getvxattr_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "mds_sessions": {
        "session_count": 1,
        "session_add": 294,
        "session_remove": 293,
        "sessions_open": 1,
        "sessions_stale": 0,
        "total_load": 0,
        "average_load": 0,
        "avg_session_uptime": 1295576
    },
    "mempool": {
        "bloom_filter_bytes": 0,
        "bloom_filter_items": 0,
        "bluestore_alloc_bytes": 0,
        "bluestore_alloc_items": 0,
        "bluestore_cache_data_bytes": 0,
        "bluestore_cache_data_items": 0,
        "bluestore_cache_onode_bytes": 0,
        "bluestore_cache_onode_items": 0,
        "bluestore_cache_meta_bytes": 0,
        "bluestore_cache_meta_items": 0,
        "bluestore_cache_other_bytes": 0,
        "bluestore_cache_other_items": 0,
        "bluestore_Buffer_bytes": 0,
        "bluestore_Buffer_items": 0,
        "bluestore_Extent_bytes": 0,
        "bluestore_Extent_items": 0,
        "bluestore_Blob_bytes": 0,
        "bluestore_Blob_items": 0,
        "bluestore_SharedBlob_bytes": 0,
        "bluestore_SharedBlob_items": 0,
        "bluestore_inline_bl_bytes": 0,
        "bluestore_inline_bl_items": 0,
        "bluestore_fsck_bytes": 0,
        "bluestore_fsck_items": 0,
        "bluestore_txc_bytes": 0,
        "bluestore_txc_items": 0,
        "bluestore_writing_deferred_bytes": 0,
        "bluestore_writing_deferred_items": 0,
        "bluestore_writing_bytes": 0,
        "bluestore_writing_items": 0,
        "bluefs_bytes": 0,
        "bluefs_items": 0,
        "bluefs_file_reader_bytes": 0,
        "bluefs_file_reader_items": 0,
        "bluefs_file_writer_bytes": 0,
        "bluefs_file_writer_items": 0,
        "buffer_anon_bytes": 274679,
        "buffer_anon_items": 91,
        "buffer_meta_bytes": 0,
        "buffer_meta_items": 0,
        "osd_bytes": 0,
        "osd_items": 0,
        "osd_mapbl_bytes": 0,
        "osd_mapbl_items": 0,
        "osd_pglog_bytes": 0,
        "osd_pglog_items": 0,
        "osdmap_bytes": 91480,
        "osdmap_items": 2133,
        "osdmap_mapping_bytes": 0,
        "osdmap_mapping_items": 0,
        "pgmap_bytes": 0,
        "pgmap_items": 0,
        "mds_co_bytes": 9153539281,
        "mds_co_items": 141542469,
        "unittest_1_bytes": 0,
        "unittest_1_items": 0,
        "unittest_2_bytes": 0,
        "unittest_2_items": 0
    },
    "objecter": {
        "op_active": 0,
        "op_laggy": 0,
        "op_send": 2795947,
        "op_send_bytes": 21,
        "op_resend": 0,
        "op_reply": 2795947,
        "oplen_avg": {
            "avgcount": 2795947,
            "sum": 2795948
        },
        "op": 2795947,
        "op_r": 2795947,
        "op_w": 0,
        "op_rmw": 0,
        "op_pg": 0,
        "osdop_stat": 1851403,
        "osdop_create": 0,
        "osdop_read": 944542,
        "osdop_write": 0,
        "osdop_writefull": 0,
        "osdop_writesame": 0,
        "osdop_append": 0,
        "osdop_zero": 0,
        "osdop_truncate": 0,
        "osdop_delete": 0,
        "osdop_mapext": 0,
        "osdop_sparse_read": 0,
        "osdop_clonerange": 0,
        "osdop_getxattr": 1,
        "osdop_setxattr": 0,
        "osdop_cmpxattr": 0,
        "osdop_rmxattr": 0,
        "osdop_resetxattrs": 0,
        "osdop_call": 0,
        "osdop_watch": 0,
        "osdop_notify": 0,
        "osdop_src_cmpxattr": 0,
        "osdop_pgls": 0,
        "osdop_pgls_filter": 0,
        "osdop_other": 0,
        "linger_active": 0,
        "linger_send": 0,
        "linger_resend": 0,
        "linger_ping": 0,
        "poolop_active": 0,
        "poolop_send": 0,
        "poolop_resend": 0,
        "poolstat_active": 0,
        "poolstat_send": 0,
        "poolstat_resend": 0,
        "statfs_active": 0,
        "statfs_send": 0,
        "statfs_resend": 0,
        "command_active": 0,
        "command_send": 0,
        "command_resend": 0,
        "map_epoch": 103356,
        "map_full": 0,
        "map_inc": 1763,
        "osd_sessions": 14,
        "osd_session_open": 5660,
        "osd_session_close": 5646,
        "osd_laggy": 0,
        "omap_wr": 0,
        "omap_rd": 2,
        "omap_del": 0
    },
    "oft": {
        "omap_total_objs": 0,
        "omap_total_kv_pairs": 0,
        "omap_total_updates": 0,
        "omap_total_removes": 0
    },
    "purge_queue": {
        "pq_executing_ops": 0,
        "pq_executing_ops_high_water": 0,
        "pq_executing": 0,
        "pq_executing_high_water": 0,
        "pq_executed": 0,
        "pq_item_in_journal": 0
    },
    "throttle-msgr_dispatch_throttler-mds": {
        "val": 0,
        "max": 104857600,
        "get_started": 0,
        "get": 3029847,
        "get_sum": 39622312702,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 3029847,
        "take": 0,
        "take_sum": 0,
        "put": 3029847,
        "put_sum": 39622312702,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "throttle-objecter_bytes": {
        "val": 0,
        "max": 104857600,
        "get_started": 0,
        "get": 0,
        "get_sum": 0,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 0,
        "take": 2795947,
        "take_sum": 39047892197,
        "put": 2795947,
        "put_sum": 39047892197,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "throttle-objecter_ops": {
        "val": 0,
        "max": 1024,
        "get_started": 0,
        "get": 0,
        "get_sum": 0,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 0,
        "take": 2795947,
        "take_sum": 2795947,
        "put": 2795947,
        "put_sum": 2795947,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "throttle-write_buf_throttle": {
        "val": 0,
        "max": 3758096384,
        "get_started": 0,
        "get": 0,
        "get_sum": 0,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 0,
        "take": 0,
        "take_sum": 0,
        "put": 0,
        "put_sum": 0,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "throttle-write_buf_throttle-0x560dd74620a0": {
        "val": 0,
        "max": 3758096384,
        "get_started": 0,
        "get": 0,
        "get_sum": 0,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 0,
        "take": 0,
        "take_sum": 0,
        "put": 0,
        "put_sum": 0,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    }
}
[root@rook-ceph-mds-auxtel-a-dfdfb685f-sdpmd ceph]# ceph daemon mds.auxtel-a dump_mempools 
{
    "mempool": {
        "by_pool": {
            "bloom_filter": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_alloc": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_cache_data": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_cache_onode": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_cache_meta": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_cache_other": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_Buffer": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_Extent": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_Blob": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_SharedBlob": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_inline_bl": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_fsck": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_txc": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_writing_deferred": {
                "items": 0,
                "bytes": 0
            },
            "bluestore_writing": {
                "items": 0,
                "bytes": 0
            },
            "bluefs": {
                "items": 0,
                "bytes": 0
            },
            "bluefs_file_reader": {
                "items": 0,
                "bytes": 0
            },
            "bluefs_file_writer": {
                "items": 0,
                "bytes": 0
            },
            "buffer_anon": {
                "items": 91,
                "bytes": 274679
            },
            "buffer_meta": {
                "items": 0,
                "bytes": 0
            },
            "osd": {
                "items": 0,
                "bytes": 0
            },
            "osd_mapbl": {
                "items": 0,
                "bytes": 0
            },
            "osd_pglog": {
                "items": 0,
                "bytes": 0
            },
            "osdmap": {
                "items": 2133,
                "bytes": 91480
            },
            "osdmap_mapping": {
                "items": 0,
                "bytes": 0
            },
            "pgmap": {
                "items": 0,
                "bytes": 0
            },
            "mds_co": {
                "items": 141542469,
                "bytes": 9153539281
            },
            "unittest_1": {
                "items": 0,
                "bytes": 0
            },
            "unittest_2": {
                "items": 0,
                "bytes": 0
            }
        },
        "total": {
            "items": 141544693,
            "bytes": 9153905440
        }
    }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment