Azure AKS集群监控告警表达式配置

ops/2024/9/22 21:26:10/

背景需求

Azure AKS集群中,需要对部署的服务进行监控和告警,需要创建并启用预警规则,而这里怎么去监控每个pod级别的CPU和内存,需要自己写搜索查询
在这里插入图片描述

解决方法

搜索和查询的语句如下,需要自己替换其中的部分信息,其中的clusterID需要在AKS集群中的概述-右侧JSON视图-id,其中id就是我们要的clusterID,然后替换下面的clusterID字段内容。最后的 percentage 也需要根据实际情况来设置对应值。

#内存
let metricUsageCounterName = 'memoryRssBytes';
let metricLimitCounterName = 'memoryLimitBytes';
let clusterID = "/subscriptions/xxxxxxxxxxx";
let CachedFilteredPerfTable = Perf
| where ObjectName == 'K8SContainer'
| where InstanceName startswith clusterID
| project Node = Computer, TimeGenerated, CounterName, CounterValue, InstanceName;
let LimitsTable = CachedFilteredPerfTable
| where CounterName =~ metricLimitCounterName
| summarize arg_max(TimeGenerated, *) by Node, InstanceName
| project Node, InstanceName, LimitsValue = CounterValue, TimeGenerated;
let MetaDataTable = KubePodInventory
| where isnotempty(ClusterName) | where isnotempty(Namespace) | where isnotempty(Computer)
| where ClusterId =~ clusterID
| project TimeGenerated, ClusterId, Namespace, ControllerName, Node = Computer, Pod = Name, ContainerInstance = ContainerName, ContainerID, ControllerKind = ControllerKind
| summarize arg_max(TimeGenerated, *) by Node, ContainerInstance
| project Namespace, ControllerName, Node, Pod, ContainerInstance, InstanceName = strcat(ClusterId, '/', ContainerInstance), ContainerID, ControllerKind, 
ContainerName = tostring(split(ContainerInstance, '/')[1]), LastPodInventoryTimeGenerated = TimeGenerated, ClusterId
| join kind=leftouter (LimitsTable) on Node, InstanceName
| project Namespace, ControllerName, Node, Pod, InstanceName, ContainerID, LimitsValue, ControllerKind, ContainerName, ContainerInstance, LastPodInventoryTimeGenerated, ClusterId;
let AggregationTable = CachedFilteredPerfTable
| where CounterName =~ metricUsageCounterName
| project TimeGenerated, Node, InstanceName, CounterValue
| summarize  Aggregation = percentile(CounterValue, 95) by Node, InstanceName 
| project Node, InstanceName, Aggregation;
MetaDataTable
| join kind= leftouter( AggregationTable ) on Node, InstanceName
| order by ContainerName asc, ContainerName
| extend ContainerIdentity = strcat(ContainerName, '|', Pod)
| extend percentage = Aggregation/LimitsValue * 100
| project ContainerIdentity, percentage, Aggregation, LimitsValue, Node, ControllerName, ControllerKind, ContainerID, ContainerInstance, InstanceName, Namespace, LastPodInventoryTimeGenerated, ClusterId
| where percentage > 80#CPU
let metricUsageCounterName = 'cpuUsageNanoCores';
let metricLimitCounterName = 'cpuLimitNanoCores'; 
let clusterID = "/subscriptions/xxxxxxxxxxx";
let CachedFilteredPerfTable = Perf
| where ObjectName == 'K8SContainer'
| where InstanceName startswith clusterID
| project Node = Computer, TimeGenerated, CounterName, CounterValue, InstanceName;
let LimitsTable = CachedFilteredPerfTable
| where CounterName =~ metricLimitCounterName
| summarize arg_max(TimeGenerated, *) by Node, InstanceName
| project Node, InstanceName, LimitsValue = CounterValue/1000000, TimeGenerated;
let MetaDataTable = KubePodInventory
| where isnotempty(ClusterName) | where isnotempty(Namespace) | where isnotempty(Computer)
| where ClusterId =~ clusterID
| project TimeGenerated, ClusterId, Namespace, ControllerName, Node = Computer, Pod = Name, ContainerInstance = ContainerName, ContainerID, ControllerKind = ControllerKind
| summarize arg_max(TimeGenerated, *) by Node, ContainerInstance
| project Namespace, ControllerName, Node, Pod, ContainerInstance, InstanceName = strcat(ClusterId, '/', ContainerInstance), ContainerID, ControllerKind, 
ContainerName = tostring(split(ContainerInstance, '/')[1]), LastPodInventoryTimeGenerated = TimeGenerated, ClusterId
| join kind=leftouter (LimitsTable) on Node, InstanceName
| project Namespace, ControllerName, Node, Pod, InstanceName, ContainerID, LimitsValue, ControllerKind, ContainerName, ContainerInstance, LastPodInventoryTimeGenerated, ClusterId;
let AggregationTable = CachedFilteredPerfTable
| where CounterName =~ metricUsageCounterName
| project TimeGenerated, Node, InstanceName, CounterValue = CounterValue/1000000
| summarize  Aggregation = percentile(CounterValue, 95) by Node, InstanceName 
| project Node, InstanceName, Aggregation;
MetaDataTable
| join kind= leftouter( AggregationTable ) on Node, InstanceName
| order by ContainerName asc, ContainerName
| extend ContainerIdentity = strcat(ContainerName, '|', Pod)
| extend percentage = Aggregation/LimitsValue * 100
| project ContainerIdentity, percentage, Aggregation, LimitsValue, Node, ControllerName, ControllerKind, ContainerID, ContainerInstance, InstanceName, Namespace, LastPodInventoryTimeGenerated, ClusterId
| where percentage > 80

http://www.ppmy.cn/ops/13610.html

相关文章

Linux读写文件

前言 学习了文件系统,就能理解为什么说Linux下一切皆文件。 语言层面的操作 在c语言的学习中我们可以使用fopen()函数对文件进行操作。 int main() {//FILE * fp fopen("./log.txt", "w");//FILE * fp fopen("./log.txt", "…

深度学习框架比较:TensorFlow vs PyTorch

TensorFlow和PyTorch是目前最流行的两个深度学习框架。它们都提供了强大的功能和灵活性,用于构建和训练复杂的深度学习模型。但是,它们在设计理念、用户体验和特定用例支持方面存在一些差异。以下是TensorFlow和PyTorch之间的比较,涵盖了它们…

微信小程序获取蓝牙信标

/*** 搜索设备界面*/ import Dialog from vant/weapp/dialog/dialog; Page({data: {list: []},onPullDownRefresh: function () {wx.request({url: https://wwz.jingyi.icu/app/Explain/index,data: {scenic_id: 3},method: POST,success: (res) > {console.log(res);let th…

《ElementPlus 与 ElementUI 差异集合》el-select 差异点,如:高、宽、body插入等

宽度 Element UI 父元素不限制宽度时,默认有个宽度 207px; 父元素有固定宽度时,以父元素宽度为准; Element Plus 父元素不限制宽度时,默认100%; 父元素有固定宽度时,以父元素宽度为准&#x…

第二期书生浦语大模型训练营第四次笔记

大模型微调技术 大模型微调是一种通过在预训练模型的基础上,有针对性地微调部分参数以适应特定任务需求的方法。 微调预训练模型的方法 微调所有层:将预训练模型的所有层都参与微调,以适应新的任务。 微调顶层:只微调预训练模型…

STL-list的使用及其模拟实现

在C标准库中,list 是一个双向链表容器,用于存储一系列元素。与 vector 和 deque 等容器不同,list 使用带头双向循环链表的数据结构来组织元素,因此list插入删除的效率非常高。 list的使用 list的构造函数 list迭代器 list的成员函…

(七)Idea编辑器集成Tomcat

1. 点击桌面上Idea快捷方式打开Idea编辑器,假如没有创建项目的话打开Idea编辑器后的界面展示如下图所示 2. 点击界面左侧菜单中的自定义 3. 然后点击界面中的“所有设置...”,然后点击“构建、执行、部署”,选择其中的“应用程序服务器” 4. 点击“”按钮…

回归预测 | Matlab实现SA-BP模拟退火算法优化BP神经网络多变量回归预测

回归预测 | Matlab实现SA-BP模拟退火算法优化BP神经网络多变量回归预测 目录 回归预测 | Matlab实现SA-BP模拟退火算法优化BP神经网络多变量回归预测预测效果基本描述程序设计参考资料 预测效果 基本描述 1.Matlab实现SA-BP模拟退火算法优化BP神经网络多变量回归预测&#xff0…