如何理解RGW性能监控及源码实现,很多新手对此不是很清楚,为了帮助大家解决这个难题,下面小编将为大家详细讲解,有这方面需求的人可以来学习下,希望你能有所收获。
性能计数器启动过程
#src/rgw/rgw_main.cc
int main(int argc, const char **argv)
...
if (g_conf->daemonize) {
global_init_daemonize(g_ceph_context);
}
Mutex mutex("main");
SafeTimer init_timer(g_ceph_context, mutex);
init_timer.init();
mutex.Lock();
init_timer.add_event_after(g_conf->rgw_init_timeout, new C_InitTimeout);
mutex.Unlock();
// Enable the perf counter before starting the service thread
g_ceph_context->enable_perf_counter();
...
r = rgw_perf_start(g_ceph_context); #启用rgw计数器
性能计数器参数说明
#src/rgw/rgw_common.cc
int rgw_perf_start(CephContext *cct)
{
PerfCountersBuilder plb(cct, cct->_conf->name.to_str(), l_rgw_first, l_rgw_last);
plb.add_u64_counter(l_rgw_req, "req", "Requests"); #处理成功请求数量
plb.add_u64_counter(l_rgw_failed_req, "failed_req", "Aborted requests"); #处理失败请求数量
plb.add_u64_counter(l_rgw_get, "get", "Gets"); #GET请求数量
plb.add_u64_counter(l_rgw_get_b, "get_b", "Size of gets");
plb.add_time_avg(l_rgw_get_lat, "get_initial_lat", "Get latency");
plb.add_u64_counter(l_rgw_put, "put", "Puts"); #PUT请求数量
plb.add_u64_counter(l_rgw_put_b, "put_b", "Size of puts");
plb.add_time_avg(l_rgw_put_lat, "put_initial_lat", "Put latency");
plb.add_u64(l_rgw_qlen, "qlen", "Queue length");
plb.add_u64(l_rgw_qactive, "qactive", "Active requests queue");
plb.add_u64_counter(l_rgw_cache_hit, "cache_hit", "Cache hits"); #用于缓存RGW元数据的缓存命中次数
plb.add_u64_counter(l_rgw_cache_miss, "cache_miss", "Cache miss"); #未命中次数
plb.add_u64_counter(l_rgw_keystone_token_cache_hit, "keystone_token_cache_hit", "Keystone token cache hits");
plb.add_u64_counter(l_rgw_keystone_token_cache_miss, "keystone_token_cache_miss", "Keystone token cache miss");
perfcounter = plb.create_perf_counters();
cct->get_perfcounters_collection()->add(perfcounter);
return 0;
}
性能计数器类型定义如下
#src/rgw/rgw_common.cc
enum {
l_rgw_first = 15000,
l_rgw_req,
l_rgw_failed_req,
l_rgw_get,
l_rgw_get_b,
l_rgw_get_lat,
l_rgw_put,
l_rgw_put_b,
l_rgw_put_lat,
l_rgw_qlen,
l_rgw_qactive,
l_rgw_cache_hit,
l_rgw_cache_miss,
l_rgw_keystone_token_cache_hit,
l_rgw_keystone_token_cache_miss,
l_rgw_last,
};
方式1
root@demo# ceph --admin-daemon /var/run/ceph-client.radosgw.en-zone1.asok perf dump
{
"cct": {
"total_workers": 32,
"unhealthy_workers": 0
},
"client.radosgw.en-zone1": {
"req": 2,
"failed_req": 2,
"get": 0,
"get_b": 0,
"get_initial_lat": {
"avgcount": 0,
"sum": 0.000000000
},
"put": 0,
"put_b": 0,
"put_initial_lat": {
"avgcount": 0,
"sum": 0.000000000
},
"qlen": 0,
"qactive": 0,
"cache_hit": 0,
"cache_miss": 2,
"keystone_token_cache_hit": 0,
"keystone_token_cache_miss": 0
}
方式2 (使用Dumpling以上版本)
root@demo# ceph daemon client.radosgw.en-zone1 perf dump
{
"cct": {
"total_workers": 32,
"unhealthy_workers": 0
},
"client.radosgw.en-zone1": {
"req": 2,
"failed_req": 2,
"get": 0,
"get_b": 0,
"get_initial_lat": {
"avgcount": 0,
"sum": 0.000000000
},
"put": 0,
"put_b": 0,
"put_initial_lat": {
"avgcount": 0,
"sum": 0.000000000
},
"qlen": 0,
"qactive": 0,
"cache_hit": 0,
"cache_miss": 2,
"keystone_token_cache_hit": 0,
"keystone_token_cache_miss": 0
}
性能计数器一般人可能不太关注,但是将计数器数据推送到一些监控系统里面,同时添加一些告警策略,能够显著提升运维质量。同时通过源码层面去扩展计数器类型,从ceph内部去实现一些自定义数据的统计,也是一个不错的功能亮点。
看完上述内容是否对您有帮助呢?如果还想对相关知识有进一步的了解或阅读更多相关文章,请关注天达云行业资讯频道,感谢您对天达云的支持。