From bbdb821277851283452d1566be716db3b3ed9ed1 Mon Sep 17 00:00:00 2001 From: fuwx Date: Tue, 22 Apr 2025 17:28:47 +0800 Subject: [PATCH] update workflow --- .../workflows/zh/告警有效性确认.yml | 779 ++- .../workflows/zh/告警简单根因分析.yml | 5108 +++++++++++------ 2 files changed, 3920 insertions(+), 1967 deletions(-) diff --git a/api/init_data/workflows/zh/告警有效性确认.yml b/api/init_data/workflows/zh/告警有效性确认.yml index cedbc1d741..429f864c69 100644 --- a/api/init_data/workflows/zh/告警有效性确认.yml +++ b/api/init_data/workflows/zh/告警有效性确认.yml @@ -310,17 +310,6 @@ workflow: targetHandle: target type: custom zIndex: 0 - - data: - isInIteration: false - sourceType: tool - targetType: variable-aggregator - id: 17424339242280-source-1741162531836-target - source: '17424339242280' - sourceHandle: source - target: '1741162531836' - targetHandle: target - type: custom - zIndex: 0 - data: isInIteration: false sourceType: variable-aggregator @@ -590,6 +579,7 @@ workflow: sourceType: question-classifier targetType: tool id: 1741158559444-1741158765960-1742979394268-target + selected: false source: '1741158559444' sourceHandle: '1741158765960' target: '1742979394268' @@ -761,6 +751,72 @@ workflow: targetHandle: target type: custom zIndex: 0 + - data: + isInIteration: false + sourceType: tool + targetType: code + id: 17424339242280-source-1744599772545-target + source: '17424339242280' + sourceHandle: source + target: '1744599772545' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: code + targetType: if-else + id: 1744599772545-source-1744599891369-target + source: '1744599772545' + sourceHandle: source + target: '1744599891369' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: if-else + targetType: code + id: 1744599891369-true-1744339618961-target + source: '1744599891369' + sourceHandle: 'true' + target: '1744339618961' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: if-else + targetType: end + id: 1744599891369-false-1744599930141-target + source: '1744599891369' + sourceHandle: 'false' + target: '1744599930141' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: code + targetType: variable-aggregator + id: 1744339618961-source-1744627472534-target + source: '1744339618961' + sourceHandle: source + target: '1744627472534' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: variable-aggregator + targetType: end + id: 1744627472534-source-1744343454355-target + source: '1744627472534' + sourceHandle: source + target: '1744343454355' + targetHandle: target + type: custom + zIndex: 0 nodes: - data: desc: '' @@ -815,10 +871,10 @@ workflow: id: '1741157526222' position: x: 30 - y: 578 + y: 586 positionAbsolute: x: 30 - y: 578 + y: 586 selected: false sourcePosition: right targetPosition: left @@ -838,10 +894,10 @@ workflow: id: '1741157560922' position: x: 3374 - y: 578 + y: 586 positionAbsolute: x: 3374 - y: 578 + y: 586 selected: false sourcePosition: right targetPosition: left @@ -917,9 +973,11 @@ workflow: 容器被kill' - id: '1744093223338' - name: '应用程指标 + name: '应用指标 - 日志错误数相关' + 日志错误数相关 + + log error count' desc: '' instruction: '请你从告警事件描述 @@ -940,14 +998,14 @@ workflow: type: question-classifier vision: enabled: false - height: 756 + height: 772 id: '1741158559444' position: x: 942 - y: 578 + y: 586 positionAbsolute: x: 942 - y: 578 + y: 586 selected: false sourcePosition: right targetPosition: left @@ -1865,10 +1923,10 @@ workflow: id: '1741159085487' position: x: 2158 - y: 578 + y: 586 positionAbsolute: x: 2158 - y: 578 + y: 586 selected: false sourcePosition: right targetPosition: left @@ -1881,8 +1939,6 @@ workflow: title: 变量聚合提供给大模型分析 type: variable-aggregator variables: - - - '1741328045214' - - text - - '1742979394268' - text - - '1741328076453' @@ -1911,14 +1967,14 @@ workflow: - text - - '1741328213545' - text - height: 416 + height: 394 id: '1741162531836' position: x: 1854 - y: 1293 + y: 1207 positionAbsolute: x: 1854 - y: 1293 + y: 1207 selected: false sourcePosition: right targetPosition: left @@ -1952,10 +2008,10 @@ workflow: id: '1741311244655' position: x: 2462 - y: 578 + y: 586 positionAbsolute: x: 2462 - y: 578 + y: 586 selected: false sourcePosition: right targetPosition: left @@ -1983,10 +2039,10 @@ workflow: id: '1741311294320' position: x: 2766 - y: 578 + y: 586 positionAbsolute: x: 2766 - y: 578 + y: 586 selected: false sourcePosition: right targetPosition: left @@ -2003,14 +2059,16 @@ workflow: - result - - '1742366303110' - class_name - height: 130 + - - '1744339618961' + - abnormal + height: 152 id: '1741311394752' position: x: 3070 - y: 578 + y: 586 positionAbsolute: x: 3070 - y: 578 + y: 586 selected: false sourcePosition: right targetPosition: left @@ -2152,10 +2210,10 @@ workflow: id: '1741328045214' position: x: 1246 - y: 635 + y: 2125 positionAbsolute: x: 1246 - y: 635 + y: 2125 selected: false sourcePosition: right targetPosition: left @@ -2297,10 +2355,10 @@ workflow: id: '1741328076453' position: x: 1246 - y: 729 + y: 643 positionAbsolute: x: 1246 - y: 729 + y: 643 selected: false sourcePosition: right targetPosition: left @@ -2442,10 +2500,10 @@ workflow: id: '1741328197695' position: x: 1246 - y: 917 + y: 831 positionAbsolute: x: 1246 - y: 917 + y: 831 selected: false sourcePosition: right targetPosition: left @@ -2559,10 +2617,10 @@ workflow: id: '1741328213545' position: x: 1246 - y: 1011 + y: 925 positionAbsolute: x: 1246 - y: 1011 + y: 925 selected: false sourcePosition: right targetPosition: left @@ -2676,10 +2734,10 @@ workflow: id: '1741328346220' position: x: 1246 - y: 1105 + y: 1019 positionAbsolute: x: 1246 - y: 1105 + y: 1019 selected: false sourcePosition: right targetPosition: left @@ -2793,10 +2851,10 @@ workflow: id: '1741328423453' position: x: 1246 - y: 1199 + y: 1113 positionAbsolute: x: 1246 - y: 1199 + y: 1113 selected: false sourcePosition: right targetPosition: left @@ -2910,10 +2968,10 @@ workflow: id: '1741328466394' position: x: 1246 - y: 1293 + y: 1207 positionAbsolute: x: 1246 - y: 1293 + y: 1207 selected: false sourcePosition: right targetPosition: left @@ -3027,10 +3085,10 @@ workflow: id: '1741328742817' position: x: 1246 - y: 1387 + y: 1301 positionAbsolute: x: 1246 - y: 1387 + y: 1301 selected: false sourcePosition: right targetPosition: left @@ -3144,10 +3202,10 @@ workflow: id: '1741328773213' position: x: 1246 - y: 1481 + y: 1395 positionAbsolute: x: 1246 - y: 1481 + y: 1395 selected: false sourcePosition: right targetPosition: left @@ -3261,10 +3319,10 @@ workflow: id: '1741328799030' position: x: 1246 - y: 1575 + y: 1489 positionAbsolute: x: 1246 - y: 1575 + y: 1489 selected: false sourcePosition: right targetPosition: left @@ -3406,10 +3464,10 @@ workflow: id: '1741329632798' position: x: 1246 - y: 823 + y: 737 positionAbsolute: x: 1246 - y: 823 + y: 737 selected: false sourcePosition: right targetPosition: left @@ -3479,10 +3537,10 @@ workflow: id: '1742366141890' position: x: 2462 - y: 1332 + y: 1246 positionAbsolute: x: 2462 - y: 1332 + y: 1246 selected: false sourcePosition: right targetPosition: left @@ -3526,10 +3584,10 @@ workflow: id: '1742366303110' position: x: 2766 - y: 1236 + y: 1150 positionAbsolute: x: 2766 - y: 1236 + y: 1150 selected: false sourcePosition: right targetPosition: left @@ -3569,10 +3627,10 @@ workflow: id: '1742433446760' position: x: 638 - y: 578 + y: 586 positionAbsolute: x: 638 - y: 578 + y: 586 selected: false sourcePosition: right targetPosition: left @@ -3714,10 +3772,10 @@ workflow: id: '17424339242280' position: x: 1550 - y: 635 + y: 2125 positionAbsolute: x: 1550 - y: 635 + y: 2125 selected: false sourcePosition: right targetPosition: left @@ -3730,8 +3788,6 @@ workflow: title: 聚合历史指标数据 type: variable-aggregator variables: - - - '17424339242280' - - text - - '17429797664440' - text - - '17424348570980' @@ -3760,14 +3816,14 @@ workflow: - text - - '17429835940550' - text - height: 416 + height: 394 id: '1742434016239' position: x: 2158 - y: 1184 + y: 1109 positionAbsolute: x: 2158 - y: 1184 + y: 1109 selected: false sourcePosition: right targetPosition: left @@ -3909,10 +3965,10 @@ workflow: id: '17424342365860' position: x: 1550 - y: 729 + y: 643 positionAbsolute: x: 1550 - y: 729 + y: 643 selected: false sourcePosition: right targetPosition: left @@ -4054,10 +4110,10 @@ workflow: id: '17424344697090' position: x: 1550 - y: 823 + y: 737 positionAbsolute: x: 1550 - y: 823 + y: 737 selected: false sourcePosition: right targetPosition: left @@ -4199,10 +4255,10 @@ workflow: id: '17424345080000' position: x: 1550 - y: 917 + y: 831 positionAbsolute: x: 1550 - y: 917 + y: 831 selected: false sourcePosition: right targetPosition: left @@ -4316,10 +4372,10 @@ workflow: id: '17424345315500' position: x: 1550 - y: 1011 + y: 925 positionAbsolute: x: 1550 - y: 1011 + y: 925 selected: false sourcePosition: right targetPosition: left @@ -4433,10 +4489,10 @@ workflow: id: '17424345826170' position: x: 1550 - y: 1105 + y: 1019 positionAbsolute: x: 1550 - y: 1105 + y: 1019 selected: false sourcePosition: right targetPosition: left @@ -4550,10 +4606,10 @@ workflow: id: '17424346884650' position: x: 1550 - y: 1199 + y: 1113 positionAbsolute: x: 1550 - y: 1199 + y: 1113 selected: false sourcePosition: right targetPosition: left @@ -4667,10 +4723,10 @@ workflow: id: '17424347242840' position: x: 1550 - y: 1293 + y: 1207 positionAbsolute: x: 1550 - y: 1293 + y: 1207 selected: false sourcePosition: right targetPosition: left @@ -4784,10 +4840,10 @@ workflow: id: '17424348206030' position: x: 1550 - y: 1387 + y: 1301 positionAbsolute: x: 1550 - y: 1387 + y: 1301 selected: false sourcePosition: right targetPosition: left @@ -4901,10 +4957,10 @@ workflow: id: '17424348526540' position: x: 1550 - y: 1481 + y: 1395 positionAbsolute: x: 1550 - y: 1481 + y: 1395 selected: false sourcePosition: right targetPosition: left @@ -5018,10 +5074,10 @@ workflow: id: '17424348570980' position: x: 1550 - y: 1575 + y: 1489 positionAbsolute: x: 1550 - y: 1575 + y: 1489 selected: false sourcePosition: right targetPosition: left @@ -5066,10 +5122,10 @@ workflow: id: '1742806924635' position: x: 334 - y: 578 + y: 586 positionAbsolute: x: 334 - y: 578 + y: 586 selected: false sourcePosition: right targetPosition: left @@ -5237,10 +5293,10 @@ workflow: id: '1742979394268' position: x: 1246 - y: 1857 + y: 1771 positionAbsolute: x: 1246 - y: 1857 + y: 1771 selected: false sourcePosition: right targetPosition: left @@ -5408,10 +5464,10 @@ workflow: id: '1742979595004' position: x: 1246 - y: 1951 + y: 1865 positionAbsolute: x: 1246 - y: 1951 + y: 1865 selected: false sourcePosition: right targetPosition: left @@ -5579,10 +5635,10 @@ workflow: id: '17429797453350' position: x: 1246 - y: 1669 + y: 1583 positionAbsolute: x: 1246 - y: 1669 + y: 1583 selected: false sourcePosition: right targetPosition: left @@ -5750,10 +5806,10 @@ workflow: id: '17429797664440' position: x: 1550 - y: 1669 + y: 1583 positionAbsolute: x: 1550 - y: 1669 + y: 1583 selected: false sourcePosition: right targetPosition: left @@ -5892,10 +5948,10 @@ workflow: id: '1742979828391' position: x: 1246 - y: 1763 + y: 1677 positionAbsolute: x: 1246 - y: 1763 + y: 1677 selected: false sourcePosition: right targetPosition: left @@ -6034,10 +6090,10 @@ workflow: id: '17429798797920' position: x: 1550 - y: 1763 + y: 1677 positionAbsolute: x: 1550 - y: 1763 + y: 1677 selected: false sourcePosition: right targetPosition: left @@ -6205,10 +6261,10 @@ workflow: id: '17429835599060' position: x: 1550 - y: 1857 + y: 1771 positionAbsolute: x: 1550 - y: 1857 + y: 1771 selected: false sourcePosition: right targetPosition: left @@ -6376,10 +6432,10 @@ workflow: id: '17429835940550' position: x: 1550 - y: 1951 + y: 1865 positionAbsolute: x: 1550 - y: 1951 + y: 1865 selected: false sourcePosition: right targetPosition: left @@ -6546,10 +6602,10 @@ workflow: id: '1744093337863' position: x: 1246 - y: 2063 + y: 1977 positionAbsolute: x: 1246 - y: 2063 + y: 1977 selected: false sourcePosition: right targetPosition: left @@ -6562,18 +6618,18 @@ workflow: - '1744093531084' - output variable: text - selected: true + selected: false title: 结束 type: end height: 90 id: '1744093385250' position: x: 2158 - y: 2117 + y: 2031 positionAbsolute: x: 2158 - y: 2117 - selected: true + y: 2031 + selected: false sourcePosition: right targetPosition: left type: custom @@ -6590,9 +6646,11 @@ workflow: name: deepseek-chat provider: langgenius/deepseek/deepseek prompt_template: - - role: system + - id: a1de005a-fbe2-44a6-9b2c-bb021ae6b3a4 + role: system text: 你是一个智能助手,帮助用户解决可观测性领域的问题 - - role: user + - id: 897a817e-14a2-4609-9081-430a6fd0a6dd + role: user text: '# 目的 当前应用一直产生日志错误级别日志, @@ -6618,10 +6676,10 @@ workflow: id: '1744093428286' position: x: 1550 - y: 2045 + y: 1959 positionAbsolute: x: 1550 - y: 2045 + y: 1959 selected: false sourcePosition: right targetPosition: left @@ -6638,16 +6696,495 @@ workflow: id: '1744093531084' position: x: 1854 - y: 2244 + y: 2147 + positionAbsolute: + x: 1854 + y: 2147 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + code: "import requests\nimport json\n\nAPO_URL = \"http://apo-backend-svc:8080\"\ + \n\nAPO_VM = \"http://apo-victoria-metrics-single-server-svc:8428\"\n\n\ + DEBUG_OUTPUT = False\n\n\nclass Utils:\n @staticmethod\n def get_step(start_time:\ + \ int, end_time: int) -> dict:\n time_diff = end_time - start_time\n\ + \n SECOND = 1000000 # microseconds\n MINUTE = 60 * SECOND\n\ + \ HOUR = 60 * MINUTE\n\n step = SECOND # default step is\ + \ 1 second\n stepStr = \"1s\"\n if time_diff <= 15 * MINUTE:\n\ + \ step = 30 * SECOND\n stepStr = \"30s\"\n \ + \ elif time_diff <= 30 * MINUTE:\n step = 1 * MINUTE\n \ + \ stepStr = \"1m\"\n elif time_diff <= 1 * HOUR:\n \ + \ step = 2 * MINUTE\n stepStr = \"2m\"\n elif time_diff\ + \ <= 1.5 * HOUR:\n step = 3 * MINUTE\n stepStr = \"\ + 3m\"\n elif time_diff <= 3 * HOUR:\n step = 6 * MINUTE\n\ + \ stepStr = \"6m\"\n elif time_diff <= 6 * HOUR:\n \ + \ step = 12 * MINUTE\n stepStr = \"12m\"\n elif\ + \ time_diff <= 12 * HOUR:\n step = 24 * MINUTE\n stepStr\ + \ = \"24m\"\n elif time_diff <= 15 * HOUR:\n step = 30\ + \ * MINUTE\n stepStr = \"30m\"\n elif time_diff <= 30\ + \ * HOUR:\n step = 1 * HOUR\n stepStr = \"1h\"\n \ + \ else:\n step = ((time_diff + 30 * SECOND - 1) // (30\ + \ * SECOND)) * SECOND\n custom = (time_diff + 30 * SECOND - 1)\ + \ // (30 * SECOND)\n stepStr = f\"{custom}s\"\n\n return\ + \ {\n \"step\": step,\n \"stepStr\": stepStr,\n \ + \ }\n\n @staticmethod\n def get_avg(data_json) -> dict:\n \ + \ results = data_json[\"data\"][\"result\"]\n\n stats = {}\n\n\ + \ for entry in results:\n tid = entry[\"metric\"][\"tid\"\ + ]\n values = [float(value[1]) for value in entry[\"values\"]]\n\ + \n stats[tid] = {\n \"avg\": sum(values) / len(values),\n\ + \ }\n\n return stats\n\n @staticmethod\n def extract_values(thread_data:\ + \ dict):\n return [float(item[1]) for item in thread_data[\"values\"\ + ]]\n\n @staticmethod\n def extract_rtt_values(rtt_data):\n \ + \ \"\"\"从RTT数据中提取时间戳和值\"\"\"\n chart_data = rtt_data[\"chart\"][\"\ + chartData\"]\n return list(chart_data.values())\n\n @staticmethod\n\ + \ def net_rtt_trend_analysis(current_metrics: dict, pid: str) -> dict:\n\ + \ rtt_threshold = 0.05\n majority_point_threshold = 0.5\n\n\ + \ num_rtt_sets = len(current_metrics[\"result\"][\"timeseries\"])\n\ + \ anomalous_rtt_sets = 0\n rtt_details = {}\n\n for\ + \ item in current_metrics[\"result\"][\"timeseries\"] or []:\n \ + \ if item[\"labels\"][\"pid\"] != pid:\n continue\n \ + \ legend = item[\"legend\"]\n points = Utils.extract_rtt_values(item)\n\ + \ num_points = len(points)\n\n # 统计高于阈值的点数\n \ + \ exceed_count = sum(1 for p in points if p > rtt_threshold)\n \ + \ exceed_ratio = exceed_count / num_points # 超过阈值的比例\n\n \ + \ # 判断是否“大多数情况”异常\n is_anomaly = exceed_ratio >= majority_point_threshold\n\ + \ if is_anomaly:\n anomalous_rtt_sets += 1\n\n\ + \ rtt_details[legend] = {\n \"points\": points,\n\ + \ \"exceed_count\": exceed_count,\n \"exceed_ratio\"\ + : exceed_ratio,\n \"is_anomaly\": is_anomaly,\n \ + \ }\n\n # 判断大部分RTT数据集是否异常\n anomaly_ratio = anomalous_rtt_sets\ + \ / num_rtt_sets if num_rtt_sets > 0 else 0\n is_majority_anomalous\ + \ = False\n if anomalous_rtt_sets != 0:\n is_majority_anomalous\ + \ = True\n\n result = {\n \"total\": num_rtt_sets,\n \ + \ \"anomalous\": anomalous_rtt_sets,\n \"anomaly_ratio\"\ + : anomaly_ratio,\n \"details\": rtt_details,\n }\n \ + \ return {\"abnormal\": is_majority_anomalous, \"result\": result}\n\ + \n @staticmethod\n def thread_polaris_trend_analysis(current_metrics:\ + \ dict, avg_metrics: dict) -> dict:\n exceed_threshold = 20\n \ + \ majority_point_threshold = 0.5\n majority_thread_threshold\ + \ = 0.4\n\n thread_details = {}\n anomalous_threads = 0\n\ + \ num_threads = len(current_metrics[\"data\"][\"result\"])\n\n \ + \ for item in current_metrics[\"data\"][\"result\"] or []:\n \ + \ tid = item[\"metric\"][\"tid\"]\n points = Utils.extract_values(item)\n\ + \ num_points = len(points)\n avg = avg_metrics.get(tid,\ + \ {}).get(\"avg\", 0)\n # 计算每个点相对于历史均值的百分比变化\n if\ + \ avg != 0:\n percent_changes = [(p - avg) / avg * 100 for\ + \ p in points]\n else:\n percent_changes = [float(\"\ + inf\") for _ in range(num_points)]\n\n # 统计高于均值且超过exceed_threshold的点数\n\ + \ exceed_count = sum(1 for pc in percent_changes if pc > exceed_threshold)\n\ + \ exceed_ratio = exceed_count / num_points # 超过阈值的比例\n\n \ + \ # 判断是否“大多数情况”异常升高\n is_anomaly_increase = exceed_ratio\ + \ >= majority_point_threshold\n if avg == 0:\n \ + \ is_anomaly_increase = False\n if is_anomaly_increase:\n \ + \ anomalous_threads += 1\n\n thread_details[tid]\ + \ = {\n \"points\": points,\n \"historical_avg\"\ + : avg,\n \"exceed_count\": exceed_count,\n \ + \ \"exceed_ratio\": exceed_ratio,\n \"percent_changes\"\ + : percent_changes,\n \"is_anomaly\": is_anomaly_increase,\n\ + \ }\n \n if num_threads == 0:\n anomaly_ratio\ + \ = 0\n else:\n anomaly_ratio = anomalous_threads / num_threads\n\ + \n is_majority_anomalous = anomaly_ratio >= majority_thread_threshold\n\ + \n result = {\n \"total\": num_threads,\n \"\ + anomalous\": anomalous_threads,\n \"anomaly_ratio\": anomaly_ratio,\n\ + \ \"details\": thread_details,\n }\n\n return {\n\ + \ \"abnormal\": is_majority_anomalous,\n \"result\"\ + : result,\n }\n\n @staticmethod\n def display_polaris_trend_analysis(analysis_dit:\ + \ dict, type: str) -> str:\n is_majority_anomalous = analysis_dit.get(\"\ + abnormal\", False)\n result = analysis_dit.get(\"result\", {})\n\ + \ output = []\n output.append(f\"\\n{type}线程耗时异常升高分析:\")\n\ + \ output.append(f\"大部分线程异常升高: {is_majority_anomalous}\")\n \ + \ output.append(\n f\"异常线程比例: {result['anomaly_ratio']:.2%}\ + \ ({result['anomalous']}/{result['total']})\"\n )\n if not\ + \ is_majority_anomalous:\n return \"\\n\".join(output)\n \ + \ for thread_id, details in result[\"details\"].items():\n \ + \ if details[\"historical_avg\"] == 0:\n continue\n \ + \ output.append(\n f\"{thread_id}: 历史均值={details.get('historical_avg',\ + \ 0):.2f}, \"\n f\"超过历史均值20%的点数={details['exceed_count']}/{len(details['points'])}\ + \ ({details['exceed_ratio']:.2%}), \"\n f\"是否异常升高={details['is_anomaly']}\"\ + \n )\n\n return \"\\n\".join(output)\n\n @staticmethod\n\ + \ def display_rtt_trend_analysis(analysis_dit: dict, type: str) -> str:\n\ + \ is_majority_anomalous = analysis_dit.get(\"abnormal\", False)\n\ + \ result = analysis_dit.get(\"result\", {})\n output = []\n\ + \ output.append(f\"\\n{type}异常分析:\")\n output.append(f\"是否存在异常网络RTT:\ + \ {is_majority_anomalous}\")\n if not is_majority_anomalous:\n \ + \ output.append(\"所有对外网络RTT均为正常范围(低于50ms)\")\n return\ + \ \"\\n\".join(output)\n for thread_id, details in result[\"details\"\ + ].items():\n output.append(\n f\"{thread_id}:\ + \ RTT阈值=50ms, \"\n f\"超过阈值的点数={details['exceed_count']}/{len(details['points'])}\ + \ ({details['exceed_ratio']:.2%}), \"\n f\"是否异常升高={details['is_anomaly']}\"\ + \n )\n\n return \"\\n\".join(output)\n\n\ndef get_pod_info(arg:\ + \ dict) -> list:\n timeseries = arg[\"result\"][\"timeseries\"]\n\n \ + \ seen = set()\n pod_info = []\n\n for item in timeseries:\n \ + \ labels = item[\"labels\"]\n namespace = labels.get(\"namespace\"\ + , \"\")\n pod = labels.get(\"pod\", \"\")\n pid = labels.get(\"\ + pid\", \"\")\n\n if pid == \"1\" or pod == \"\":\n continue\n\ + \ unique_key = (namespace, pod, pid)\n\n if unique_key not\ + \ in seen:\n seen.add(unique_key)\n info = {\"namespace\"\ + : namespace, \"pod\": pod, \"pid\": pid}\n pod_info.append(info)\n\ + \ return pod_info\n\n\ndef get_service_instances(start: int, end: int,\ + \ service: str):\n params = {\n \"metricName\": \"Originx 北极星指标\ + \ (服务层级) - 北极星指标 - 列出指定服务的所有实例\",\n \"params\": {\n \"\ + service_name\": service,\n },\n \"startTime\": start,\n \ + \ \"endTime\": end,\n \"step\": Utils.get_step(start, end).get(\"\ + step\"),\n }\n resp = requests.post(f\"{APO_URL}/api/metric/query\"\ + , json=params)\n return get_pod_info(resp.json())\n\n\ndef get_pod_rtt(start:\ + \ int, end: int, namespace: str, pod: str):\n params = {\n \"\ + metricName\": \"基础设施情况 - 容器网络 - 与下游服务RTT\",\n \"params\": {\"pod\"\ + : pod, \"namespace\": namespace},\n \"startTime\": start,\n \ + \ \"endTime\": end,\n \"step\": Utils.get_step(start, end).get(\"\ + step\"),\n }\n resp = requests.post(f\"{APO_URL}/api/metric/query\"\ + , json=params)\n return resp.json()\n\n\ndef get_thread_metrics(start:\ + \ int, end: int, namespace: str, pod: str, type: str):\n query = (\n\ + \ 'increase(originx_thread_polaris_nanoseconds_sum{pod=\"'\n \ + \ + pod\n + '\", type=\"'\n + type\n + '\"}[1m])'\n\ + \ )\n\n res = requests.get(\n APO_VM + \"/prometheus/api/v1/query_range\"\ + ,\n params={\n \"query\": query,\n \"start\"\ + : start / 1000,\n \"end\": end / 1000,\n \"step\"\ + : Utils.get_step(start, end).get(\"stepStr\"),\n },\n )\n return\ + \ res.json()\n\n\ndef polaris_trend_analysis(\n start: int, end: int,\ + \ namespace: str, pod: str, type: str\n) -> dict:\n hour = 3600 * 1000000\n\ + \ current = get_thread_metrics(\n start,\n end,\n \ + \ namespace,\n pod,\n type,\n )\n avg = Utils.get_avg(\n\ + \ get_thread_metrics(\n start - hour,\n start,\n\ + \ namespace,\n pod,\n type,\n )\n\ + \ )\n result = Utils.thread_polaris_trend_analysis(current, avg)\n\ + \ check = result.get(\"abnormal\", False)\n conclusion = Utils.display_polaris_trend_analysis(result,\ + \ type)\n return {\n \"abnormal\": check,\n \"conclusion\"\ + : conclusion,\n }\n\n\ndef net_rtt_trend_analysis(\n start: int, end:\ + \ int, namespace: str, pod: str, pid: str\n) -> dict:\n rtt = get_pod_rtt(\n\ + \ start,\n end,\n namespace,\n pod,\n )\n\ + \ res = Utils.net_rtt_trend_analysis(rtt, pid)\n check = res.get(\"\ + abnormal\", False)\n conclusion = Utils.display_rtt_trend_analysis(res,\ + \ \"rtt\")\n return {\n \"abnormal\": check,\n \"conclusion\"\ + : conclusion,\n }\n\n\ndef check_dependency(start: int, end: int, namespace:\ + \ str, pod: str) -> dict:\n \"\"\"\n net, epoll\n \"\"\"\n net_check\ + \ = polaris_trend_analysis(start, end, namespace, pod, \"net\")\n epoll_check\ + \ = polaris_trend_analysis(start, end, namespace, pod, \"epoll\")\n return\ + \ {\n \"abnormal\": net_check.get(\"abnormal\", False)\n or\ + \ epoll_check.get(\"abnormal\", False),\n \"conclusion\": net_check.get(\"\ + conclusion\", \"\")\n + epoll_check.get(\"conclusion\", \"\"),\n\ + \ }\n\n\ndef check_self_infra(start: int, end: int, namespace: str, pod:\ + \ str, pid: str):\n \"\"\"\n cpu, runq, rtt\n \"\"\"\n cpu_check\ + \ = polaris_trend_analysis(start, end, namespace, pod, \"cpu\")\n runq_check\ + \ = polaris_trend_analysis(start, end, namespace, pod, \"runq\")\n rtt_check\ + \ = net_rtt_trend_analysis(start, end, namespace, pod, pid)\n return\ + \ {\n \"abnormal\": cpu_check.get(\"abnormal\", False)\n or\ + \ runq_check.get(\"abnormal\", False)\n or rtt_check.get(\"abnormal\"\ + , False),\n \"conclusion\": cpu_check.get(\"conclusion\", \"\")\n\ + \ + runq_check.get(\"conclusion\", \"\")\n + rtt_check.get(\"\ + conclusion\", \"\"),\n }\n\n\ndef analyze_instance(start: int, end: int,\ + \ namespace: str, pod: str, pid: str) -> dict:\n dep_check = check_dependency(\n\ + \ start,\n end,\n namespace,\n pod,\n )\n\ + \ output = \"\"\n check = True\n if not dep_check.get(\"abnormal\"\ + , False):\n output += \"\\n结论:对外调用耗时正常,是自身问题, 不可以忽略\"\n return\ + \ {\n \"abnormal\": False,\n \"conclusion\": dep_check.get(\"\ + conclusion\", \"\")\n + output,\n }\n\n infra_check = check_self_infra(\n\ + \ start,\n end,\n namespace,\n pod,\n \ + \ pid,\n )\n check \n if infra_check.get(\"abnormal\", False):\n\ + \ output += \"\\n结论:对外调用耗时异常,同时应用程序自身耗时也异常, 告警不可忽略\"\n check\ + \ = False\n else:\n output += \"\\n结论:对外调用耗时异常,但是自身应用正常,可以忽略该告警\"\ + \n check = True\n\n return {\n \"abnormal\": check,\n \ + \ \"conclusion\": dep_check.get(\"conclusion\", \"\")\n + infra_check.get(\"\ + conclusion\", \"\")\n + output,\n }\n\n\n\n\n\ndef main(service:\ + \ str, start: int, end: int) -> dict:\n items = get_service_instances(\n\ + \ start, end, service\n )\n\n vaild = \"true\"\n\n output\ + \ = []\n for item in items:\n res = analyze_instance(\n \ + \ start,\n end,\n item[\"namespace\"],\n \ + \ item[\"pod\"],\n item[\"pid\"],\n )\n \ + \ con = res.get(\"conclusion\", \"\")\n if res.get(\"abnormal\",\ + \ False) is False:\n vaild = \"false\"\n pod = item['pod']\n\ + \ output.append(f'\\n{pod}{con}')\n \n return {\n \"\ + abnormal\": str(vaild),\n \"conclusion\": \"\\n\".join(output),\n\ + \ }" + code_language: python3 + desc: '' + outputs: + abnormal: + children: null + type: string + conclusion: + children: null + type: string + selected: false + title: 判断是依赖还是自身问题 + type: code + variables: + - value_selector: + - '1742806924635' + - service + variable: service + - value_selector: + - '1741157526222' + - startTime + variable: start + - value_selector: + - '1741157526222' + - endTime + variable: end + height: 54 + id: '1744339618961' + position: + x: 2462 + y: 2182 + positionAbsolute: + x: 2462 + y: 2182 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + desc: '' + outputs: + - value_selector: + - '1744339618961' + - abnormal + variable: text + - value_selector: + - '1744627472534' + - output + variable: output + selected: false + title: 结束 3 + type: end + height: 116 + id: '1744343454355' + position: + x: 3070 + y: 2143 + positionAbsolute: + x: 3070 + y: 2143 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + code: "from datetime import datetime\nimport requests\n\nAPO_URL = \"http://apo-backend-svc:8080\"\ + \n\nAPO_VM = \"http://apo-victoria-metrics-single-server-svc:8428\"\n\n\ + class Utils:\n @staticmethod\n def get_step(start_time: int, end_time:\ + \ int) -> dict:\n time_diff = end_time - start_time\n\n SECOND\ + \ = 1000000 # microseconds\n MINUTE = 60 * SECOND\n HOUR\ + \ = 60 * MINUTE\n\n step = SECOND # default step is 1 second\n \ + \ stepStr = \"1s\"\n if time_diff <= 15 * MINUTE:\n \ + \ step = 30 * SECOND\n stepStr = \"30s\"\n elif time_diff\ + \ <= 30 * MINUTE:\n step = 1 * MINUTE\n stepStr =\ + \ \"1m\"\n elif time_diff <= 1 * HOUR:\n step = 2 * MINUTE\n\ + \ stepStr = \"2m\"\n elif time_diff <= 1.5 * HOUR:\n \ + \ step = 3 * MINUTE\n stepStr = \"3m\"\n elif\ + \ time_diff <= 3 * HOUR:\n step = 6 * MINUTE\n stepStr\ + \ = \"6m\"\n elif time_diff <= 6 * HOUR:\n step = 12 *\ + \ MINUTE\n stepStr = \"12m\"\n elif time_diff <= 12 *\ + \ HOUR:\n step = 24 * MINUTE\n stepStr = \"24m\"\n\ + \ elif time_diff <= 15 * HOUR:\n step = 30 * MINUTE\n\ + \ stepStr = \"30m\"\n elif time_diff <= 30 * HOUR:\n \ + \ step = 1 * HOUR\n stepStr = \"1h\"\n else:\n\ + \ step = ((time_diff + 30 * SECOND - 1) // (30 * SECOND)) * SECOND\n\ + \ custom = (time_diff + 30 * SECOND - 1) // (30 * SECOND)\n \ + \ stepStr = f\"{custom}s\"\n\n return {\n \"\ + step\": step,\n \"stepStr\": stepStr,\n }\n\n\ndef get_service_latency(start:\ + \ int, end: int, service: str, endpoint: str):\n params = {\n \ + \ \"metricName\": \"Originx 北极星指标 (服务层级) - RED指标 - 平均响应时间\",\n \"\ + params\": {\"service_name\": service, \"content_key\": endpoint},\n \ + \ \"startTime\": start,\n \"endTime\": end,\n \"step\"\ + : Utils.get_step(start, end).get(\"step\"),\n }\n resp = requests.post(f\"\ + {APO_URL}/api/metric/query\", json=params)\n return resp.json()\n\ndef\ + \ timestamp_to_readable(ts):\n \"\"\"将微秒时间戳转换为可读格式\"\"\"\n return\ + \ datetime.fromtimestamp(int(ts) / 1000000).strftime('%Y-%m-%d %H:%M:%S')\n\ + \ndef analyze_trend(data, time_window=15*60):\n \"\"\"分析15分钟内的趋势\"\"\"\ + \n points = []\n for ts in data['result']['timeseries']:\n \ + \ for timestamp, value in ts['chart']['chartData'].items():\n \ + \ points.append((int(timestamp), float(value)))\n \n if not points:\n\ + \ return \"No data available.\"\n \n points.sort() # 按时间戳排序\n\ + \ trend = []\n current_group = []\n prev_value = points[0][1]\n\ + \ \n for ts, value in points:\n if not current_group:\n \ + \ current_group.append((ts, value))\n elif abs(value - prev_value)\ + \ < 0.01 or (value == 0 and prev_value == 0): # 连续相似值\n current_group.append((ts,\ + \ value))\n else:\n trend.append(current_group)\n \ + \ current_group = [(ts, value)]\n prev_value = value\n \ + \ \n if current_group:\n trend.append(current_group)\n \n \ + \ # 生成趋势描述\n description = []\n for group in trend:\n start_ts,\ + \ start_val = group[0]\n end_ts, end_val = group[-1]\n avg_val\ + \ = sum(v for _, v in group) / len(group)\n \n if avg_val\ + \ == 0:\n description.append(\n f\"{len(group)}个数据点({timestamp_to_readable(start_ts)}\ + \ - \"\n f\"{timestamp_to_readable(end_ts)})值为0,完全正常\"\n\ + \ )\n elif avg_val < 0.01:\n description.append(\n\ + \ f\"{timestamp_to_readable(start_ts)}出现轻微波动({avg_val:.6f})\"\ + \n )\n elif avg_val > 0.1:\n description.append(f\"\ + 显著峰值在{timestamp_to_readable(start_ts)} - \"\n \ + \ f\"{timestamp_to_readable(end_ts)}期间:\")\n for ts, val in\ + \ group:\n description.append(f\" {timestamp_to_readable(ts)}:\ + \ {val:.3f}\")\n else:\n description.append(\n \ + \ f\"{timestamp_to_readable(start_ts)}值约为{avg_val:.6f}\"\n \ + \ )\n \n return description\n\ndef analyze_history(data):\n\ + \ \"\"\"分析历史数据特征\"\"\"\n values = []\n for ts in data['result']['timeseries']:\n\ + \ for _, value in ts['chart']['chartData'].items():\n \ + \ values.append(float(value))\n \n if not values:\n return\ + \ \"No historical data available.\"\n \n # 统计范围和峰值\n min_val =\ + \ min(values)\n max_val = max(values)\n avg_val = sum(values) / len(values)\n\ + \ high_values = sorted([v for v in values if v > avg_val * 2], reverse=True)\n\ + \ \n description = [\n \"历史数据显示该指标通常维持在很低的水平:\",\n f\"\ + 大多数值在{avg_val:.6f} - {avg_val * 1.2:.6f}之间\",\n f\"偶尔有较高值({', '.join(f'{v:.3f}'\ + \ for v in high_values[:2])})但持续时间很短\"\n ]\n \n return description,\ + \ avg_val\n\ndef analyze_latency(past_24h, recent_15m):\n \"\"\"综合分析并生成报告\"\ + \"\"\n # 分析15分钟趋势\n trend_desc = analyze_trend(recent_15m)\n \n\ + \ # 分析历史数据\n history_desc, history_max = analyze_history(past_24h)\n\ + \ \n # 获取15分钟内的最大值和最后几个点\n recent_values = []\n for ts in recent_15m['result']['timeseries']:\n\ + \ for timestamp, value in ts['chart']['chartData'].items():\n \ + \ recent_values.append((int(timestamp), float(value)))\n \n \ + \ recent_values.sort()\n recent_max = max(v for _, v in recent_values)\ + \ if recent_values else 0\n last_few = [v for _, v in recent_values[-3:]]\ + \ if len(recent_values) >= 3 else [v for _, v in recent_values]\n \n\ + \ # 判断告警有效性\n vaild = \"true\"\n alert_desc = []\n if recent_max\ + \ > history_max * 2: # 峰值远超历史\n alert_desc.append(\n \ + \ f\"当前15分钟内出现的{recent_max:.3f}峰值远高于历史平均({history_max:.3f})\"\n )\n\ + \ peak_duration = sum(1 for _, v in recent_values if v > history_max\ + \ * 2) * 0.5 # 假设每点3分钟\n alert_desc.append(f\"虽然指标在短时间内(约{peak_duration}分钟)出现了远高于历史水平的峰值,但:\"\ + )\n alert_desc.append(f\"峰值持续时间很短(约{peak_duration}分钟)\")\n \ + \ \n if all(v < 0.01 for v in last_few):\n alert_desc.append(\"\ + 随后指标迅速回落到正常水平(0附近)\")\n alert_desc.append(\"当前时刻指标已恢复正常\")\n\ + \ alert_conclusion = \"告警无效\"\n else:\n alert_desc.append(\"\ + 指标未完全恢复正常\")\n alert_conclusion = \"告警有效\"\n vaild\ + \ = \"false\"\n else:\n alert_desc.append(\"当前15分钟内无显著异常峰值\")\n\ + \ alert_conclusion = \"告警无效\"\n \n # 组合报告\n report = [\n\ + \ \"MetricsData趋势分析\",\n \"当前15分钟数据趋势:\"\n ] + [f\" {line}\"\ + \ for line in trend_desc] + [\n \"历史数据对比:\"\n ] + [f\" {line}\"\ + \ for line in history_desc] + [\n \"告警有效性判断:\"\n ] + [f\" {line}\"\ + \ for line in alert_desc] + [\n f\"结论:{alert_conclusion}\"\n ]\n\ + \ \n return {\n \"abnormal\": str(vaild),\n \"conclusion\"\ + : \"\\n\".join(report)+\"\\n\",\n }\n\ndef main(service: str, endpoint:\ + \ str, start: int, end: int) -> dict:\n current = get_service_latency(start,\ + \ end, service, endpoint)\n\n hour = 3600 * 1000000\n history = get_service_latency(\n\ + \ start - hour,\n start,\n service,\n endpoint,\n\ + \ )\n res = analyze_latency(history, current)\n return res\n" + code_language: python3 + desc: '' + outputs: + abnormal: + children: null + type: string + conclusion: + children: null + type: string + selected: true + title: 检测是否异常 + type: code + variables: + - value_selector: + - '1742806924635' + - service + variable: service + - value_selector: + - '1742806924635' + - endpoint + variable: endpoint + - value_selector: + - '1741157526222' + - startTime + variable: start + - value_selector: + - '1741157526222' + - endTime + variable: end + height: 54 + id: '1744599772545' + position: + x: 1854 + y: 2295 positionAbsolute: x: 1854 - y: 2244 + y: 2295 + selected: true + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + cases: + - case_id: 'true' + conditions: + - comparison_operator: contains + id: 386bdf86-7414-490d-9106-cd665b9bf6da + value: 'false' + varType: string + variable_selector: + - '1744599772545' + - abnormal + id: 'true' + logical_operator: and + desc: '' + selected: false + title: 条件分支 + type: if-else + height: 126 + id: '1744599891369' + position: + x: 2158 + y: 2161 + positionAbsolute: + x: 2158 + y: 2161 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + desc: '' + outputs: + - value_selector: + - '1744599772545' + - abnormal + variable: text + - value_selector: + - '1744599772545' + - conclusion + variable: output + selected: false + title: 结束 4 + type: end + height: 116 + id: '1744599930141' + position: + x: 2462 + y: 2276 + positionAbsolute: + x: 2462 + y: 2276 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + desc: '' + output_type: string + selected: false + title: 变量聚合器 4 + type: variable-aggregator + variables: + - - '1744599772545' + - conclusion + - - '1744339618961' + - conclusion + height: 130 + id: '1744627472534' + position: + x: 2766 + y: 2087 + positionAbsolute: + x: 2766 + y: 2087 selected: false sourcePosition: right targetPosition: left type: custom width: 244 viewport: - x: 0 - y: 0 + x: -974 + y: -959.0000000000002 zoom: 0.7 diff --git a/api/init_data/workflows/zh/告警简单根因分析.yml b/api/init_data/workflows/zh/告警简单根因分析.yml index 78340f9e9f..c1877ae76d 100644 --- a/api/init_data/workflows/zh/告警简单根因分析.yml +++ b/api/init_data/workflows/zh/告警简单根因分析.yml @@ -3,7 +3,7 @@ app: icon: 🤖 icon_background: '#FFEAD5' mode: workflow - name: 告警简单根因分析 + name: 告警分析(告警简单根因分析+可用性分析) use_icon_as_answer_icon: false dependencies: - current_identifier: null @@ -269,18 +269,6 @@ workflow: targetHandle: target type: custom zIndex: 0 - - data: - isInIteration: false - sourceType: code - targetType: if-else - id: 1742807803325-source-1742453019576-target - selected: false - source: '1742807803325' - sourceHandle: source - target: '1742453019576' - targetHandle: target - type: custom - zIndex: 0 - data: isInIteration: true iteration_id: '1741497176064' @@ -1010,18 +998,6 @@ workflow: targetHandle: target type: custom zIndex: 0 - - data: - isInIteration: false - sourceType: code - targetType: llm - id: 17443388433580-source-17430596469370-target - selected: false - source: '17443388433580' - sourceHandle: source - target: '17430596469370' - targetHandle: target - type: custom - zIndex: 0 - data: isInIteration: false sourceType: code @@ -1058,18 +1034,6 @@ workflow: targetHandle: target type: custom zIndex: 0 - - data: - isInIteration: false - sourceType: code - targetType: llm - id: 17443388438160-source-17430596469370-target - selected: false - source: '17443388438160' - sourceHandle: source - target: '17430596469370' - targetHandle: target - type: custom - zIndex: 0 - data: isInIteration: false sourceType: code @@ -1082,30 +1046,6 @@ workflow: targetHandle: target type: custom zIndex: 0 - - data: - isInIteration: false - sourceType: code - targetType: llm - id: 17443388443000-source-17430596469370-target - selected: false - source: '17443388443000' - sourceHandle: source - target: '17430596469370' - targetHandle: target - type: custom - zIndex: 0 - - data: - isInIteration: false - sourceType: code - targetType: llm - id: 1744290470304-source-17430596469370-target - selected: false - source: '1744290470304' - sourceHandle: source - target: '17430596469370' - targetHandle: target - type: custom - zIndex: 0 - data: isInIteration: true iteration_id: '1741497176064' @@ -1216,6 +1156,7 @@ workflow: sourceType: code targetType: code id: 1744206314118-source-1744342478278-target + selected: false source: '1744206314118' sourceHandle: source target: '1744342478278' @@ -1228,6 +1169,7 @@ workflow: sourceType: code targetType: code id: 1744342478278-source-1744342609856-target + selected: false source: '1744342478278' sourceHandle: source target: '1744342609856' @@ -1240,6 +1182,7 @@ workflow: sourceType: code targetType: llm id: 1744342609856-source-1741506766037-target + selected: false source: '1744342609856' sourceHandle: source target: '1741506766037' @@ -1252,6 +1195,7 @@ workflow: sourceType: code targetType: code id: 1744342426374-source-1744342800777-target + selected: false source: '1744342426374' sourceHandle: source target: '1744342800777' @@ -1264,6 +1208,7 @@ workflow: sourceType: code targetType: llm id: 1744342800777-source-1741506766037-target + selected: false source: '1744342800777' sourceHandle: source target: '1741506766037' @@ -1276,6 +1221,7 @@ workflow: sourceType: code targetType: code id: 1744342372882-source-1744342846753-target + selected: false source: '1744342372882' sourceHandle: source target: '1744342846753' @@ -1288,6 +1234,7 @@ workflow: sourceType: code targetType: llm id: 1744342846753-source-1741506766037-target + selected: false source: '1744342846753' sourceHandle: source target: '1741506766037' @@ -1300,6 +1247,7 @@ workflow: sourceType: code targetType: code id: 1744342309386-source-1744342890395-target + selected: false source: '1744342309386' sourceHandle: source target: '1744342890395' @@ -1312,6 +1260,7 @@ workflow: sourceType: code targetType: llm id: 1744342890395-source-1741506766037-target + selected: false source: '1744342890395' sourceHandle: source target: '1741506766037' @@ -1324,6 +1273,7 @@ workflow: sourceType: code targetType: code id: 1744342244843-source-1744342920172-target + selected: false source: '1744342244843' sourceHandle: source target: '1744342920172' @@ -1336,556 +1286,449 @@ workflow: sourceType: code targetType: llm id: 1744342920172-source-1741506766037-target + selected: false source: '1744342920172' sourceHandle: source target: '1741506766037' targetHandle: target type: custom zIndex: 1002 - nodes: - data: - desc: '' - selected: false - title: start - type: start - variables: - - label: startTime - max_length: 48 - options: [] - required: true - type: number - variable: startTime - - label: endTime - max_length: 48 - options: [] - required: true - type: number - variable: endTime - - label: params - max_length: 9999999 - options: [] - required: false - type: paragraph - variable: params - - label: nodeName - max_length: 256 - options: [] - required: false - type: text-input - variable: nodeName - - label: nodeIp - max_length: 48 - options: [] - required: false - type: text-input - variable: nodeIp - height: 193 - id: '1741227526517' - position: - x: 30 - y: 444 - positionAbsolute: - x: 30 - y: 444 + isInIteration: false + sourceType: code + targetType: tool + id: 1744878352649-source-1744878296971-target selected: false - sourcePosition: right - targetPosition: left + source: '1744878352649' + sourceHandle: source + target: '1744878296971' + targetHandle: target type: custom - width: 243 + zIndex: 0 - data: - desc: '' - error_handle_mode: terminated - height: 1408 - is_parallel: false - iterator_selector: - - '1741509454645' - - monitor - output_selector: - - '1742470455066' - - output - output_type: array[string] - parallel_nums: 10 - selected: false - start_node_id: 1741497176064start - title: iteration - type: iteration - width: 6503.360125927147 - height: 1408 - id: '1741497176064' - position: - x: 5262.456277280085 - y: 916 - positionAbsolute: - x: 5262.456277280085 - y: 916 + isInIteration: false + sourceType: tool + targetType: code + id: 1744878298576-source-1744878389686-target selected: false - sourcePosition: right - targetPosition: left + source: '1744878298576' + sourceHandle: source + target: '1744878389686' + targetHandle: target type: custom - width: 6503 - zIndex: 1 + zIndex: 0 - data: - desc: '' - isInIteration: true - selected: false - title: '' - type: iteration-start - draggable: false - height: 48 - id: 1741497176064start - parentId: '1741497176064' - position: - x: 24 - y: 68 - positionAbsolute: - x: 5286.456277280085 - y: 984 - selectable: false + isInIteration: false + sourceType: code + targetType: code + id: 1744878389686-source-17448784268890-target selected: false - sourcePosition: right - targetPosition: left - type: custom-iteration-start - width: 44 - zIndex: 1002 + source: '1744878389686' + sourceHandle: source + target: '17448784268890' + targetHandle: target + type: custom + zIndex: 0 - data: - code: "\ndef main(arg1: str) -> dict:\n data = json.loads(arg1)\n return\ - \ {\n \"pod\": data.get(\"pod\", \"\"),\n \"namespace\": data.get(\"\ - namespace\", \"\")\n }\n" - code_language: python3 - desc: '' - isInIteration: true - iteration_id: '1741497176064' - outputs: - namespace: - children: null - type: string - pod: - children: null - type: string - selected: false - title: get instance info - type: code - variables: - - value_selector: - - '1741497176064' - - item - variable: arg1 - height: 53 - id: '1741497181784' - parentId: '1741497176064' - position: - x: 247.70722025930104 - y: 65 - positionAbsolute: - x: 5510.163497539385 - y: 981 + isInIteration: false + sourceType: code + targetType: tool + id: 17448784268890-source-17448784142480-target selected: false - sourcePosition: right - targetPosition: left + source: '17448784268890' + sourceHandle: source + target: '17448784142480' + targetHandle: target type: custom - width: 243 - zIndex: 1002 + zIndex: 0 - data: - desc: '' - isInIteration: true - is_team_authorization: true - iteration_id: '1741497176064' - output_schema: null - paramSchemas: - - auto_generate: null - default: null - form: llm - human_description: - en_US: Specified pod name - ja_JP: Specified pod name - pt_BR: Specified pod name - zh_Hans: 指定的容器POD名称 - label: - en_US: pod - ja_JP: pod - pt_BR: pod - zh_Hans: pod - llm_description: Specified pod name - max: null - min: null - name: pod - options: [] - placeholder: null - precision: null - required: true - scope: null - template: null - type: string - - auto_generate: null - default: null - form: llm - human_description: - en_US: Specified namespace - ja_JP: Specified namespace - pt_BR: Specified namespace - zh_Hans: 指定的容器所在Namespace - label: - en_US: namespace - ja_JP: namespace - pt_BR: namespace - zh_Hans: namespace - llm_description: Specified namespace - max: null - min: null - name: namespace - options: [] - placeholder: null - precision: null - required: true - scope: null - template: null - type: string - - auto_generate: null - default: null - form: llm - human_description: - en_US: Data query start time - ja_JP: Data query start time - pt_BR: Data query start time - zh_Hans: 开始时间 (微秒) - label: - en_US: startTime - ja_JP: startTime - pt_BR: startTime - zh_Hans: startTime - llm_description: Data query start time - max: null - min: null - name: startTime - options: [] - placeholder: null - precision: null - required: true - scope: null - template: null - type: number - - auto_generate: null - default: null - form: llm - human_description: - en_US: Data query end time - ja_JP: Data query end time - pt_BR: Data query end time - zh_Hans: 结束时间 (微秒) - label: - en_US: endTime - ja_JP: endTime - pt_BR: endTime - zh_Hans: endTime - llm_description: Data query start time - max: null - min: null - name: endTime - options: [] - placeholder: null - precision: null - required: true - scope: null - template: null - type: number - params: - endTime: '' - namespace: '' - pod: '' - startTime: '' - provider_id: apo_select - provider_name: apo_select - provider_type: builtin - selected: false - title: Container CPU throttling duration (when using Containerd container - runtime, statistics by container and Pod) - tool_configurations: {} - tool_label: 容器CPU节流时长(使用Containerd容器运行时,按容器和Pod统计) - tool_name: 容器CPU节流时长(使用Containerd容器运行时,按容器和Pod统计) - tool_parameters: - endTime: - type: variable - value: - - '1741227526517' - - endTime - namespace: - type: mixed - value: '{{#1741497181784.namespace#}}' - pod: - type: mixed - value: '{{#1741497181784.pod#}}' - startTime: - type: variable - value: - - '1741227526517' - - startTime - type: tool - height: 53 - id: '1741502699500' - parentId: '1741497176064' - position: - x: 844.0068118865438 - y: 800.2561082923241 - positionAbsolute: - x: 6106.463089166628 - y: 1716.2561082923241 + isInIteration: false + sourceType: llm + targetType: variable-aggregator + id: 17448784398000-source-1744878466157-target selected: false - sourcePosition: right - targetPosition: left + source: '17448784398000' + sourceHandle: source + target: '1744878466157' + targetHandle: target type: custom - width: 243 - zIndex: 1002 + zIndex: 0 - data: - desc: '' - outputs: - - value_selector: - - '1741512806512' - - text - variable: text - - value_selector: [] - variable: '' - selected: false - title: End - type: end - height: 89 - id: '1741502839759' - position: - x: 12255 - y: 916 - positionAbsolute: - x: 12255 - y: 916 + isInIteration: false + sourceType: llm + targetType: variable-aggregator + id: 1744878372490-source-1744878466157-target selected: false - sourcePosition: right - targetPosition: left + source: '1744878372490' + sourceHandle: source + target: '1744878466157' + targetHandle: target type: custom - width: 243 + zIndex: 0 - data: - context: - enabled: false - variable_selector: [] - desc: '' - isInIteration: true - iteration_id: '1741497176064' - model: - completion_params: - temperature: 0.6 - mode: chat - name: deepseek-chat - provider: langgenius/deepseek/deepseek - prompt_template: - - id: cd35fff4-a037-4e72-af99-2ff8299fc5d2 - role: system - text: 你是可观测性领域的智能助手,协助用户分析解决问题。 - - id: 2b2ad468-efcd-4bcc-aeb9-d53d2f2770ad - role: user - text: "# 分析目标 \n基于跨资源类别的异常线程数据,判断应用实例 {{#1741497181784.pod#}}是否受告警事件 {{#1742807803325.alertName#}}\ - \ 影响,并进行初步根因定位。 \n\n# 输入数据规范 \n- **分资源类别数据**:线程延迟及关联的北极星指标(如网络类延迟对应网络RTT)。\ - \ \n- **异常数据点计数**:延迟超过同类型线程历史均值20%的实例数,需标注线程延迟均值。 \n\n# 分析规则 \n1. **主异常方向判定**\ - \ \n - 标注异常线程的延迟均值。 \n - 识别延迟增幅最显著的资源类型(如延迟从毫秒级跃升至秒级)。 \n - **线程数量优先原则**:若某资源类型(如网络/epoll)的异常线程数量最多,则归因至该类别。\ - \ \n *示例:网络/epoll类异常线程4条 vs CPU类2条 → 归因为网络问题。* \n\n2. **误判规避** \ - \ \n - 若CPU或运行队列(RunQ)延迟突增且无并发网络/epoll异常,优先归因为CPU资源争用。 \n\n3. **决胜优先级(降序)**\ - \ \n `CPU > 网络 > Epoll > 文件 > RunQ` \n\n# 根因归因与建议 \n- **CPU延迟突增**:检查代码过度嵌套问题,确认后执行版本回滚。\ - \ \n- **CPU抢占过高**:排查同节点进程的资源争用。 \n- **网络/Epoll延迟突增**: \n - 若RTT异常\ - \ → 网络链路问题; \n - 若RTT正常 → 下游服务延迟问题。 \n- **文件操作延迟升高**:审计文件句柄泄漏。 \n\n\ - # 输出格式 \n**应用实例**: {{#1741497181784.pod#}}\n**北极星指标趋势摘要** \n- 按指标维度描述显著变化,若无异常标注\"\ - 未观测到显著偏离\"。 \n\n**初步根因结论** \n- 明确告警事件对应用的影响状态。 \n- 基于北极星指标与分析规则输出单一归因结论。\ - \ \n *注:若网络/Epoll类异常但RTT正常,归因为下游服务延迟。* \n\n# 输入数据 \n- CPU:{{#1744342920172.result#}}\n\ - - 网络:{{#1744342846753.result#}}\n- 文件:{{#1744342890395.result#}} \n- Epoll:{{#1744342800777.result#}}\ - \ \n- RunQ:{{#1744342609856.result#}} \n\n# 输出准则 \n- 使用简洁的非技术表述,避免歧义。\ - \ \n- 结论需聚焦可执行建议(如\"检查代码嵌套\"而非\"可能存在性能问题\")。" - selected: false - title: llm analysis root cause - type: llm - variables: [] - vision: - enabled: false - height: 89 - id: '1741506766037' - parentId: '1741497176064' - position: - x: 3100.843852684703 - y: 613.2233202711288 - positionAbsolute: - x: 8363.300129964788 - y: 1529.2233202711288 + isInIteration: false + sourceType: variable-aggregator + targetType: end + id: 1744878466157-source-1744878476249-target selected: false - sourcePosition: right - targetPosition: left + source: '1744878466157' + sourceHandle: source + target: '1744878476249' + targetHandle: target type: custom - width: 243 - zIndex: 1002 + zIndex: 0 - data: - code: "import json\n\ndef main(arg: str) -> dict:\n data = json.loads(arg)\n\ - \ timeseries = data[\"data\"][\"timeseries\"]\n \n seen = set()\n\ - \ pod_info = []\n first_pod = None\n \n for item in timeseries:\n\ - \ labels = item[\"labels\"]\n namespace = labels.get(\"namespace\"\ - , \"\")\n pod = labels.get(\"pod\", \"\")\n \n unique_key\ - \ = (namespace, pod)\n \n if unique_key not in seen:\n \ - \ seen.add(unique_key)\n info = {\n \"\ - namespace\": namespace,\n \"pod\": pod\n }\n \ - \ if first_pod is None:\n first_pod = json.dumps(info)\n\ - \ else:\n pod_info.append(json.dumps(info))\n \ - \ \n return {\n \"first\": first_pod,\n \"monitor\"\ - : pod_info,\n }" - code_language: python3 - desc: '' - outputs: - first: - children: null - type: string - monitor: - children: null - type: array[string] - selected: false - title: ' get pod info array' - type: code - variables: - - value_selector: - - '1741597274153' - - output - variable: arg - height: 53 - id: '1741509454645' - position: - x: 1545 - y: 628 - positionAbsolute: - x: 1545 - y: 628 + isInIteration: false + sourceType: tool + targetType: code + id: 1744878296971-source-1744901266309-target selected: false - sourcePosition: right - targetPosition: left + source: '1744878296971' + sourceHandle: source + target: '1744901266309' + targetHandle: target type: custom - width: 243 + zIndex: 0 - data: - context: - enabled: false - variable_selector: [] - desc: '' - model: - completion_params: - temperature: 0.6 - mode: chat - name: deepseek-chat - provider: langgenius/deepseek/deepseek - prompt_template: - - id: 169877de-aa4a-44db-90ef-2cc8f68882c4 - role: system - text: 你是可观测性领域的智能助手,协助用户分析解决问题。 - - id: c9d29276-dfa5-4042-b1c6-562260f9cc7c - role: user - text: '# 目的 - - 基于服务层级聚合Pod信息并输出服务级概览报告。需确保各Pod具体运行状态的可视化呈现清晰直观。删除所有与futex锁相关的优化建议。 - - - # 注意事项 - - 1. 结论中若CPU相关告警事件综合影响系数较高,应优先指导处理CPU资源瓶颈 - - 2. 采用树状层级化输出格式,服务级汇总指标与Pod明细指标需通过缩进形成清晰继承关系 - - 3. 核心黄金指标(CPU、epoll、net、other/runq)命名严格保持原始形态 - - 4. 故障归因分析需保持单一维度,禁止构建跨维度关联(如GC与锁竞争) - - 5. 当网络栈耗时(net)异常增高但往返时延(RTT)处于基线范围时,需重点提示检查下游依赖服务 - - 6. 如果网络耗时增加,但 RTT 正常,建议用户检查下游问题 - - - # 结论数据源 - - {{#17430594012660.output#}} - - {{#1741497176064.output#}} - - ' + isInIteration: false + sourceType: code + targetType: llm + id: 1744901266309-source-1744878372490-target + selected: false + source: '1744901266309' + sourceHandle: source + target: '1744878372490' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: tool + targetType: code + id: 17448784142480-source-17449013347930-target + selected: false + source: '17448784142480' + sourceHandle: source + target: '17449013347930' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: code + targetType: if-else + id: 1742807803325-source-1742453019576-target + selected: false + source: '1742807803325' + sourceHandle: source + target: '1742453019576' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: question-classifier + targetType: tool + id: 17430590082510-1744901730034-1744878298576-target + selected: false + source: '17430590082510' + sourceHandle: '1744901730034' + target: '1744878298576' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: question-classifier + targetType: code + id: 17430590082510-1744901735377-1744878352649-target + selected: false + source: '17430590082510' + sourceHandle: '1744901735377' + target: '1744878352649' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: code + targetType: llm + id: 17449013347930-source-17448784398000-target + selected: false + source: '17449013347930' + sourceHandle: source + target: '17448784398000' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: code + targetType: code + id: 17443388433580-source-1744957721512-target + source: '17443388433580' + sourceHandle: source + target: '1744957721512' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: code + targetType: llm + id: 1744957721512-source-17430596469370-target + source: '1744957721512' + sourceHandle: source + target: '17430596469370' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: code + targetType: code + id: 17443388438160-source-17449580284790-target + source: '17443388438160' + sourceHandle: source + target: '17449580284790' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: code + targetType: llm + id: 17449580284790-source-17430596469370-target + source: '17449580284790' + sourceHandle: source + target: '17430596469370' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: code + targetType: llm + id: 1744290470304-source-17430596469370-target + source: '1744290470304' + sourceHandle: source + target: '17430596469370' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: code + targetType: code + id: 17443388443000-source-17449581781070-target + source: '17443388443000' + sourceHandle: source + target: '17449581781070' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + sourceType: code + targetType: llm + id: 17449581781070-source-17430596469370-target + source: '17449581781070' + sourceHandle: source + target: '17430596469370' + targetHandle: target + type: custom + zIndex: 0 + nodes: + - data: + desc: '' selected: false - title: summary data - type: llm - variables: [] - vision: - enabled: false - height: 89 - id: '1741512806512' + title: start + type: start + variables: + - label: startTime + max_length: 48 + options: [] + required: true + type: number + variable: startTime + - label: endTime + max_length: 48 + options: [] + required: true + type: number + variable: endTime + - label: params + max_length: 9999999 + options: [] + required: false + type: paragraph + variable: params + - label: nodeName + max_length: 256 + options: [] + required: false + type: text-input + variable: nodeName + - label: nodeIp + max_length: 48 + options: [] + required: false + type: text-input + variable: nodeIp + height: 194 + id: '1741227526517' position: - x: 11884 - y: 916 + x: 30 + y: 444 positionAbsolute: - x: 11884 - y: 916 + x: 30 + y: 444 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: desc: '' + error_handle_mode: terminated + height: 1408 + is_parallel: false + iterator_selector: + - '1741509454645' + - monitor + output_selector: + - '1742470455066' + - output + output_type: array[string] + parallel_nums: 10 selected: false - template: 服务名和节点均为空,无法分析根因。 - title: unsupport alert - type: template-transform - variables: [] - height: 53 - id: '1741592094819' + start_node_id: 1741497176064start + title: iteration + type: iteration + width: 6503.360125927147 + height: 1408 + id: '1741497176064' position: - x: 939 - y: 444 + x: 5481 + y: 904 positionAbsolute: - x: 939 - y: 444 + x: 5481 + y: 904 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 6503 + zIndex: 1 - data: desc: '' - outputs: - - value_selector: - - '1741592094819' - - output - variable: output + isInIteration: true selected: false - title: End - type: end - height: 89 - id: '1741592144815' + title: '' + type: iteration-start + draggable: false + height: 48 + id: 1741497176064start + parentId: '1741497176064' position: - x: 1242 - y: 444 + x: 24 + y: 68 positionAbsolute: - x: 1242 - y: 444 + x: 5505 + y: 972 + selectable: false selected: false sourcePosition: right targetPosition: left - type: custom - width: 243 + type: custom-iteration-start + width: 44 + zIndex: 1002 - data: + code: "\ndef main(arg1: str) -> dict:\n data = json.loads(arg1)\n return\ + \ {\n \"pod\": data.get(\"pod\", \"\"),\n \"namespace\": data.get(\"\ + namespace\", \"\")\n }\n" + code_language: python3 desc: '' - is_team_authorization: true - output_schema: null - paramSchemas: - - auto_generate: null - default: null - form: llm - human_description: - en_US: Specified service name - ja_JP: Specified service name - pt_BR: Specified service name - zh_Hans: 指定的服务名 - label: - en_US: service_name - ja_JP: service_name - pt_BR: service_name - zh_Hans: service_name - llm_description: Specified service name - max: null + isInIteration: true + iteration_id: '1741497176064' + outputs: + namespace: + children: null + type: string + pod: + children: null + type: string + selected: false + title: get instance info + type: code + variables: + - value_selector: + - '1741497176064' + - item + variable: arg1 + height: 54 + id: '1741497181784' + parentId: '1741497176064' + position: + x: 247.70722025930104 + y: 65 + positionAbsolute: + x: 5728.707220259301 + y: 969 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + desc: '' + isInIteration: true + is_team_authorization: true + iteration_id: '1741497176064' + output_schema: null + paramSchemas: + - auto_generate: null + default: null + form: llm + human_description: + en_US: Specified pod name + ja_JP: Specified pod name + pt_BR: Specified pod name + zh_Hans: 指定的容器POD名称 + label: + en_US: pod + ja_JP: pod + pt_BR: pod + zh_Hans: pod + llm_description: Specified pod name + max: null min: null - name: service_name + name: pod + options: [] + placeholder: null + precision: null + required: true + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: Specified namespace + ja_JP: Specified namespace + pt_BR: Specified namespace + zh_Hans: 指定的容器所在Namespace + label: + en_US: namespace + ja_JP: namespace + pt_BR: namespace + zh_Hans: namespace + llm_description: Specified namespace + max: null + min: null + name: namespace options: [] placeholder: null precision: null @@ -1943,381 +1786,321 @@ workflow: type: number params: endTime: '' - service_name: '' + namespace: '' + pod: '' startTime: '' provider_id: apo_select provider_name: apo_select provider_type: builtin selected: false - title: query pod info by service + title: Container CPU throttling duration (when using Containerd container + runtime, statistics by container and Pod) tool_configurations: {} - tool_label: 列出该服务下的所有实例 - tool_name: originx_service_instance + tool_label: 容器CPU节流时长(使用Containerd容器运行时,按容器和Pod统计) + tool_name: 容器CPU节流时长(使用Containerd容器运行时,按容器和Pod统计) tool_parameters: endTime: type: variable value: - '1741227526517' - endTime - service_name: + namespace: type: mixed - value: '{{#1742807803325.service#}}' + value: '{{#1741497181784.namespace#}}' + pod: + type: mixed + value: '{{#1741497181784.pod#}}' startTime: type: variable value: - '1741227526517' - startTime type: tool - height: 53 - id: '1741597223833' + height: 54 + id: '1741502699500' + parentId: '1741497176064' position: - x: 939 - y: 536 + x: 844.0068118865438 + y: 800.2561082923241 positionAbsolute: - x: 939 - y: 536 + x: 6325.006811886544 + y: 1704.2561082923241 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 + zIndex: 1002 - data: desc: '' - output_type: string - selected: false - title: summary instance - type: variable-aggregator - variables: - - - '1741597223833' - - text - - - '1741599658821' - - result - - - '1742629595400' + outputs: + - value_selector: + - '1741512806512' - text - height: 150 - id: '1741597274153' + variable: text + - value_selector: [] + variable: '' + selected: false + title: End + type: end + height: 90 + id: '1741502839759' position: - x: 1242 - y: 596.5 + x: 12347 + y: 904 positionAbsolute: - x: 1242 - y: 596.5 + x: 12347 + y: 904 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: - code: "\ndef main(arg1: str, arg2: str) -> dict:\n data = {\n \"\ - data\": {\n \"timeseries\": [\n {\n \ - \ \"labels\": {\n \"namespace\": arg2,\n\ - \ \"pod\": arg1,\n }\n \ - \ }\n ]\n }\n }\n return {\n \"result\"\ - : json.dumps(data),\n }\n" + context: + enabled: false + variable_selector: [] + desc: '' + isInIteration: true + iteration_id: '1741497176064' + model: + completion_params: + temperature: 0.6 + mode: chat + name: deepseek-chat + provider: langgenius/deepseek/deepseek + prompt_template: + - id: cd35fff4-a037-4e72-af99-2ff8299fc5d2 + role: system + text: 你是可观测性领域的智能助手,协助用户分析解决问题。 + - id: 2b2ad468-efcd-4bcc-aeb9-d53d2f2770ad + role: user + text: "# 分析目标 \n基于跨资源类别的异常线程数据,判断应用实例 {{#1741497181784.pod#}}是否受告警事件 {{#1742807803325.alertName#}}\ + \ 影响,并进行初步根因定位。 \n\n# 输入数据规范 \n- **分资源类别数据**:线程延迟及关联的北极星指标(如网络类延迟对应网络RTT)。\ + \ \n- **异常数据点计数**:延迟超过同类型线程历史均值20%的实例数,需标注线程延迟均值。 \n\n# 分析规则 \n1. **主异常方向判定**\ + \ \n - 标注异常线程的延迟均值。 \n - 识别延迟增幅最显著的资源类型(如延迟从毫秒级跃升至秒级)。 \n - **线程数量优先原则**:若某资源类型(如网络/epoll)的异常线程数量最多,则归因至该类别。\ + \ \n *示例:网络/epoll类异常线程4条 vs CPU类2条 → 归因为网络问题。* \n\n2. **误判规避** \ + \ \n - 若CPU或运行队列(RunQ)延迟突增且无并发网络/epoll异常,优先归因为CPU资源争用。 \n\n3. **决胜优先级(降序)**\ + \ \n `CPU > 网络 > Epoll > 文件 > RunQ` \n\n# 根因归因与建议 \n- **CPU延迟突增**:检查代码过度嵌套问题,确认后执行版本回滚。\ + \ \n- **CPU抢占过高**:排查同节点进程的资源争用。 \n- **网络/Epoll延迟突增**: \n - 若RTT异常\ + \ → 网络链路问题; \n - 若RTT正常 → 下游服务延迟问题。 \n- **文件操作延迟升高**:审计文件句柄泄漏。 \n\n\ + # 输出格式 \n**应用实例**: {{#1741497181784.pod#}}\n**北极星指标趋势摘要** \n- 按指标维度描述显著变化,若无异常标注\"\ + 未观测到显著偏离\"。 \n\n**初步根因结论** \n- 明确告警事件对应用的影响状态。 \n- 基于北极星指标与分析规则输出单一归因结论。\ + \ \n *注:若网络/Epoll类异常但RTT正常,归因为下游服务延迟。* \n\n# 输入数据 \n- CPU:{{#1744342920172.result#}}\n\ + - 网络:{{#1744342846753.result#}}\n- 文件:{{#1744342890395.result#}} \n- Epoll:{{#1744342800777.result#}}\ + \ \n- RunQ:{{#1744342609856.result#}} \n\n# 输出准则 \n- 使用简洁的非技术表述,避免歧义。\ + \ \n- 结论需聚焦可执行建议(如\"检查代码嵌套\"而非\"可能存在性能问题\")。" + selected: false + title: llm analysis root cause + type: llm + variables: [] + vision: + enabled: false + height: 90 + id: '1741506766037' + parentId: '1741497176064' + position: + x: 3100.843852684704 + y: 616.9747534648541 + positionAbsolute: + x: 8581.843852684704 + y: 1520.974753464854 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + code: "import json\n\ndef main(arg: str) -> dict:\n data = json.loads(arg)\n\ + \ timeseries = data[\"data\"][\"timeseries\"]\n \n seen = set()\n\ + \ pod_info = []\n first_pod = None\n \n for item in timeseries:\n\ + \ labels = item[\"labels\"]\n namespace = labels.get(\"namespace\"\ + , \"\")\n pod = labels.get(\"pod\", \"\")\n \n unique_key\ + \ = (namespace, pod)\n \n if unique_key not in seen:\n \ + \ seen.add(unique_key)\n info = {\n \"\ + namespace\": namespace,\n \"pod\": pod\n }\n \ + \ if first_pod is None:\n first_pod = json.dumps(info)\n\ + \ else:\n pod_info.append(json.dumps(info))\n \ + \ \n return {\n \"first\": first_pod,\n \"monitor\"\ + : pod_info,\n }" code_language: python3 desc: '' outputs: - result: + first: children: null type: string + monitor: + children: null + type: array[string] selected: false - title: get pod info + title: ' get pod info array' type: code variables: - value_selector: - - '1742807803325' - - pod - variable: arg1 - - value_selector: - - '1742807803325' - - namespace - variable: arg2 - height: 53 - id: '1741599658821' + - '1741597274153' + - output + variable: arg + height: 54 + id: '1741509454645' position: - x: 939 + x: 1545 y: 628 positionAbsolute: - x: 939 + x: 1545 y: 628 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - - data: - cases: - - case_id: 'true' - conditions: - - comparison_operator: not empty - id: 22866ae7-2bfa-47e9-9c19-d31e8b63001f - value: '' - varType: string - variable_selector: - - '1742807803325' - - pod - - comparison_operator: not empty - id: add89997-787b-4d71-a187-3e9f8e8f25a3 - value: '' - varType: string - variable_selector: - - '1742807803325' - - namespace - id: 'true' - logical_operator: and - - case_id: 2949ad86-bdc0-4b1d-bd84-6e01f41915eb - conditions: - - comparison_operator: not empty - id: 4dbbe62a-669f-4549-9028-c4ccd6c8175c - value: '' - varType: string - variable_selector: - - '1742807803325' - - service - id: 2949ad86-bdc0-4b1d-bd84-6e01f41915eb - logical_operator: and - - case_id: 74dc1f70-729f-47e1-a6c8-9f0a1d2a6ad9 - conditions: - - comparison_operator: not empty - id: a2f849d6-0c4f-40f0-9c91-f37cf81eb3f5 - value: '' - varType: string - variable_selector: - - '1741227526517' - - nodeName - id: 74dc1f70-729f-47e1-a6c8-9f0a1d2a6ad9 - logical_operator: and - desc: '' - selected: false - title: alert instance info - type: if-else - height: 247 - id: '1742453019576' - position: - x: 636 - y: 444 - positionAbsolute: - x: 636 - y: 444 - selected: false - sourcePosition: right - targetPosition: left - type: custom - width: 243 - - data: - classes: - - id: '1742462167200' - name: 'container - - memory' - - id: '1742463856746' - name: 'event type - - container was killed' - - id: '1742464231696' - name: unknow - desc: '' - instruction: You are an intelligent alert classification assistant. Your task - is to classify the given alert events. - instructions: '' - isInIteration: true - iteration_id: '1741497176064' - model: - completion_params: - temperature: 0.7 - mode: chat - name: deepseek-chat - provider: langgenius/deepseek/deepseek - query_variable_selector: - - '1742807803325' - - alertName - selected: false - title: alert event - topics: [] - type: question-classifier - vision: - enabled: false - height: 203 - id: '1742461755094' - parentId: '1741497176064' - position: - x: 76.97060259216505 - y: 363.9210031388636 - positionAbsolute: - x: 5339.42687987225 - y: 1279.9210031388636 - selected: false - sourcePosition: right - targetPosition: left - type: custom - width: 243 - zIndex: 1002 + width: 244 - data: context: enabled: false variable_selector: [] desc: '' - isInIteration: true - iteration_id: '1741497176064' model: completion_params: - temperature: 0.7 + temperature: 0.6 mode: chat name: deepseek-chat provider: langgenius/deepseek/deepseek prompt_template: - - id: 25386b5d-4509-477b-bdab-d835cd12b104 + - id: 169877de-aa4a-44db-90ef-2cc8f68882c4 role: system text: 你是可观测性领域的智能助手,协助用户分析解决问题。 - - id: 5f74b2ce-6208-423b-aa3b-f01f2cafb89b + - id: c9d29276-dfa5-4042-b1c6-562260f9cc7c role: user - text: "# 目的\n\n当前容器:{{#1741497181784.pod#}}\n\n由于内存增高产生告警,需分析原因并提供解决方案。\n\ - \n# 输出要求\n\n展示内存指标数据的趋势(使用简洁语言描述,便于快速理解)。 \n提供相关建议,包括: \n- 检查代码是否存在内存泄漏。\ - \ \n- 通过日志分析问题根因等实用方法。 \n\n# 数据来源\n\n容器内存数据:{{#1743059090157.text#}}" + text: '# 目的 + + 基于服务层级聚合Pod信息并输出服务级概览报告。需确保各Pod具体运行状态的可视化呈现清晰直观。 + + 同时在原有的结论基础上,针对异常线程最多的根因方向给出具体执行解决问题的建议和与问题相关的需要执行的命令。 + + 比如: + + 根因为CPU:使用top -H -p 查看占用,pidstat -t -p 1查看使用率等命令 + + 根因为NET:ss -s查看全局socket状态,ss -tunap查看所有TCP/UDP连接,nstat -z 查看重传等命令 + + 根因为EPOLL:strace -e epoll_wait,epoll_pwait -tt -T -p 追踪epoll调用耗时,perf + trace -e epoll:*查看epoll相关事件等命令 + + 根因为RUNQ:dstat -l 1 查看系统负载,mpstat -P ALL 1查看CPU的%idle和%util等命令 + + 根因为FILE:iotop -o -p 查看进程级磁盘IO,strace -e trace=file -tt -T -p 查看文件相关的系统调用等命令 + + + # 注意事项 + + 1. 结论中若CPU相关告警事件综合影响系数较高,应优先指导处理CPU资源瓶颈 + + 2. 采用树状层级化输出格式,服务级汇总指标与Pod明细指标需通过缩进形成清晰继承关系 + + 3. 核心黄金指标(CPU、epoll、net、other/runq)命名严格保持原始形态 + + 4. 故障归因分析需保持单一维度,禁止构建跨维度关联(如GC与锁竞争) + + 5. 当网络栈耗时(net)异常增高但往返时延(RTT)处于基线范围时,需重点提示检查下游依赖服务 + + 6. 如果网络耗时增加,但 RTT 正常,建议用户检查下游问题 + + + # 结论数据源 + + {{#17430594012660.output#}} + + {{#1741497176064.output#}} + + ' selected: false - title: memory anlysis + title: summary data type: llm variables: [] vision: enabled: false - height: 89 - id: '1742468584059' - parentId: '1741497176064' + height: 90 + id: '1741512806512' position: - x: 1003.8883599154669 - y: 136.40198127761005 + x: 12044 + y: 904 positionAbsolute: - x: 6266.3446371955515 - y: 1052.40198127761 + x: 12044 + y: 904 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 + width: 244 - data: - context: - enabled: false - variable_selector: [] desc: '' - isInIteration: true - iteration_id: '1741497176064' - model: - completion_params: - temperature: 0.7 - mode: chat - name: deepseek-chat - provider: langgenius/deepseek/deepseek - prompt_template: - - id: 893c9a67-e9e3-4c2c-8ed2-6301998d555b - role: system - text: 你是可观测性领域的智能助手,协助用户分析解决问题。 - - id: ccfb4eb1-f253-415d-9a60-9ecab7bd6af4 - role: user - text: 此告警事件属于event类型,影响 {{#1741497181784.pod#}}, 请给出一些建议。 selected: false - title: LLM 4 - type: llm + template: 服务名和节点均为空,无法分析根因。 + title: unsupport alert + type: template-transform variables: [] - vision: - enabled: false - height: 89 - id: '1742468652489' - parentId: '1741497176064' + height: 54 + id: '1741592094819' position: - x: 535.2725275358107 - y: 237.25999906213883 + x: 939 + y: 444 positionAbsolute: - x: 5797.728804815895 - y: 1153.2599990621388 + x: 939 + y: 444 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 + width: 244 - data: desc: '' - isInIteration: true - iteration_id: '1741497176064' - output_type: string + outputs: + - value_selector: + - '1741592094819' + - output + variable: output selected: false - title: summary - type: variable-aggregator - variables: - - - '1741506766037' - - text - - - '1742468584059' - - text - - - '1742468652489' - - text - height: 150 - id: '1742470455066' - parentId: '1741497176064' + title: End + type: end + height: 90 + id: '1741592144815' position: - x: 6244.360125927147 - y: 216.75503228886544 + x: 1242 + y: 444 positionAbsolute: - x: 11506.81640320723 - y: 1132.7550322888656 + x: 1242 + y: 444 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 + width: 244 - data: desc: '' - isInIteration: true is_team_authorization: true - iteration_id: '1741497176064' output_schema: null paramSchemas: - auto_generate: null default: null form: llm human_description: - en_US: Specified pod name - ja_JP: Specified pod name - pt_BR: Specified pod name - zh_Hans: 指定的容器POD名称 - label: - en_US: pod - ja_JP: pod - pt_BR: pod - zh_Hans: pod - llm_description: Specified pod name - max: null - min: null - name: pod - options: [] - placeholder: null - precision: null - required: false - scope: null - template: null - type: string - - auto_generate: null - default: null - form: llm - human_description: - en_US: Specified namespace - ja_JP: Specified namespace - pt_BR: Specified namespace - zh_Hans: 指定的容器所在Namespace + en_US: Specified service name + ja_JP: Specified service name + pt_BR: Specified service name + zh_Hans: 指定的服务名 label: - en_US: namespace - ja_JP: namespace - pt_BR: namespace - zh_Hans: namespace - llm_description: Specified namespace + en_US: service_name + ja_JP: service_name + pt_BR: service_name + zh_Hans: service_name + llm_description: Specified service name max: null min: null - name: namespace + name: service_name options: [] placeholder: null precision: null - required: false + required: true scope: null template: null type: string @@ -2371,202 +2154,339 @@ workflow: type: number params: endTime: '' - namespace: '' - pod: '' + service_name: '' startTime: '' provider_id: apo_select provider_name: apo_select provider_type: builtin selected: false - title: Query container rtt + title: query pod info by service tool_configurations: {} - tool_label: Query container rtt - tool_name: 查询容器网络与下游RTT + tool_label: 列出该服务下的所有实例 + tool_name: originx_service_instance tool_parameters: endTime: type: variable value: - '1741227526517' - endTime - namespace: - type: mixed - value: '{{#1741497181784.namespace#}}' - pod: + service_name: type: mixed - value: '{{#1741497181784.pod#}}' + value: '{{#1742807803325.service#}}' startTime: type: variable value: - '1741227526517' - startTime type: tool - height: 53 - id: '1742547917612' - parentId: '1741497176064' + height: 54 + id: '1741597223833' position: - x: 837.913049217997 - y: 567.6173152328563 + x: 939 + y: 536 positionAbsolute: - x: 6100.369326498081 - y: 1483.6173152328563 + x: 939 + y: 536 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 + width: 244 - data: desc: '' - is_team_authorization: true - output_schema: null - paramSchemas: - - auto_generate: null - default: null - form: llm - human_description: - en_US: Specified node name - ja_JP: Specified node name - pt_BR: Specified pod name - zh_Hans: 指定的主机名称 - label: - en_US: node_name - ja_JP: node_name - pt_BR: node_name - zh_Hans: node_name - llm_description: Specified pod name - max: null - min: null - name: node_name - options: [] - placeholder: null - precision: null - required: true - scope: null - template: null - type: string - - auto_generate: null - default: null - form: llm - human_description: - en_US: Specified Process ID - ja_JP: Specified Process ID - pt_BR: Specified Process ID - zh_Hans: 指定的进程ID - label: - en_US: pid - ja_JP: pid - pt_BR: pid - zh_Hans: pid - llm_description: Specified Process ID - max: null - min: null - name: pid - options: [] - placeholder: null - precision: null - required: false - scope: null - template: null - type: string - - auto_generate: null - default: null - form: llm - human_description: - en_US: Data query start time - ja_JP: Data query start time - pt_BR: Data query start time - zh_Hans: 开始时间 (微秒) - label: - en_US: startTime - ja_JP: startTime - pt_BR: startTime - zh_Hans: startTime - llm_description: Data query start time - max: null - min: null - name: startTime - options: [] - placeholder: null - precision: null - required: true - scope: null - template: null - type: number - - auto_generate: null - default: null - form: llm - human_description: - en_US: Data query end time - ja_JP: Data query end time - pt_BR: Data query end time - zh_Hans: 结束时间 (微秒) - label: - en_US: endTime - ja_JP: endTime - pt_BR: endTime - zh_Hans: endTime - llm_description: Data query start time - max: null - min: null - name: endTime - options: [] - placeholder: null - precision: null - required: true - scope: null - template: null - type: number - params: - endTime: '' - node_name: '' - pid: '' - startTime: '' - provider_id: apo_select - provider_name: apo_select - provider_type: builtin + output_type: string selected: false - title: get pod info in node - tool_configurations: {} - tool_label: Thread Polaris Metrics Process All monitor - tool_name: originx_service_monitor - tool_parameters: - endTime: - type: variable - value: - - '1741227526517' - - endTime - node_name: - type: mixed - value: '{{#1741227526517.nodeName#}}' - startTime: - type: variable - value: - - '1741227526517' - - startTime - type: tool - height: 53 - id: '1742629595400' + title: summary instance + type: variable-aggregator + variables: + - - '1741597223833' + - text + - - '1741599658821' + - result + - - '1742629595400' + - text + height: 152 + id: '1741597274153' + position: + x: 1242 + y: 596.5 + positionAbsolute: + x: 1242 + y: 596.5 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + code: "\ndef main(arg1: str, arg2: str) -> dict:\n data = {\n \"\ + data\": {\n \"timeseries\": [\n {\n \ + \ \"labels\": {\n \"namespace\": arg2,\n\ + \ \"pod\": arg1,\n }\n \ + \ }\n ]\n }\n }\n return {\n \"result\"\ + : json.dumps(data),\n }\n" + code_language: python3 + desc: '' + outputs: + result: + children: null + type: string + selected: false + title: get pod info + type: code + variables: + - value_selector: + - '1742807803325' + - pod + variable: arg1 + - value_selector: + - '1742807803325' + - namespace + variable: arg2 + height: 54 + id: '1741599658821' position: x: 939 - y: 720 + y: 628 positionAbsolute: x: 939 - y: 720 + y: 628 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + cases: + - case_id: 'true' + conditions: + - comparison_operator: not empty + id: 22866ae7-2bfa-47e9-9c19-d31e8b63001f + value: '' + varType: string + variable_selector: + - '1742807803325' + - pod + - comparison_operator: not empty + id: add89997-787b-4d71-a187-3e9f8e8f25a3 + value: '' + varType: string + variable_selector: + - '1742807803325' + - namespace + id: 'true' + logical_operator: and + - case_id: 2949ad86-bdc0-4b1d-bd84-6e01f41915eb + conditions: + - comparison_operator: not empty + id: 4dbbe62a-669f-4549-9028-c4ccd6c8175c + value: '' + varType: string + variable_selector: + - '1742807803325' + - service + id: 2949ad86-bdc0-4b1d-bd84-6e01f41915eb + logical_operator: and + - case_id: 74dc1f70-729f-47e1-a6c8-9f0a1d2a6ad9 + conditions: + - comparison_operator: not empty + id: a2f849d6-0c4f-40f0-9c91-f37cf81eb3f5 + value: '' + varType: string + variable_selector: + - '1741227526517' + - nodeName + id: 74dc1f70-729f-47e1-a6c8-9f0a1d2a6ad9 + logical_operator: and + desc: '' + selected: false + title: alert instance info + type: if-else + height: 248 + id: '1742453019576' + position: + x: 636 + y: 444 + positionAbsolute: + x: 636 + y: 444 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: + classes: + - id: '1742462167200' + name: 'container + + memory' + - id: '1742463856746' + name: 'event type + + container was killed' + - id: '1742464231696' + name: unknow desc: '' + instruction: You are an intelligent alert classification assistant. Your task + is to classify the given alert events. + instructions: '' isInIteration: true - is_team_authorization: true iteration_id: '1741497176064' - output_schema: null - paramSchemas: - - auto_generate: null - default: null - form: llm + model: + completion_params: + temperature: 0.7 + mode: chat + name: deepseek-chat + provider: langgenius/deepseek/deepseek + query_variable_selector: + - '1742807803325' + - alertName + selected: false + title: alert event + topics: [] + type: question-classifier + vision: + enabled: false + height: 204 + id: '1742461755094' + parentId: '1741497176064' + position: + x: 76.97060259216505 + y: 363.9210031388636 + positionAbsolute: + x: 5557.970602592165 + y: 1267.9210031388636 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + context: + enabled: false + variable_selector: [] + desc: '' + isInIteration: true + iteration_id: '1741497176064' + model: + completion_params: + temperature: 0.7 + mode: chat + name: deepseek-chat + provider: langgenius/deepseek/deepseek + prompt_template: + - id: 25386b5d-4509-477b-bdab-d835cd12b104 + role: system + text: 你是可观测性领域的智能助手,协助用户分析解决问题。 + - id: 5f74b2ce-6208-423b-aa3b-f01f2cafb89b + role: user + text: "# 目的\n\n当前容器:{{#1741497181784.pod#}}\n\n由于内存增高产生告警,需分析原因并提供解决方案。\n\ + \n# 输出要求\n\n展示内存指标数据的趋势(使用简洁语言描述,便于快速理解)。 \n提供相关建议,包括: \n- 检查代码是否存在内存泄漏。\ + \ \n- 通过日志分析问题根因等实用方法。 \n\n# 数据来源\n\n容器内存数据:{{#1743059090157.text#}}" + selected: false + title: memory anlysis + type: llm + variables: [] + vision: + enabled: false + height: 90 + id: '1742468584059' + parentId: '1741497176064' + position: + x: 1003.8883599154669 + y: 136.40198127761005 + positionAbsolute: + x: 6484.888359915467 + y: 1040.40198127761 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + context: + enabled: false + variable_selector: [] + desc: '' + isInIteration: true + iteration_id: '1741497176064' + model: + completion_params: + temperature: 0.7 + mode: chat + name: deepseek-chat + provider: langgenius/deepseek/deepseek + prompt_template: + - id: 893c9a67-e9e3-4c2c-8ed2-6301998d555b + role: system + text: 你是可观测性领域的智能助手,协助用户分析解决问题。 + - id: ccfb4eb1-f253-415d-9a60-9ecab7bd6af4 + role: user + text: 此告警事件属于event类型,影响 {{#1741497181784.pod#}}, 请给出一些建议。 + selected: false + title: LLM 4 + type: llm + variables: [] + vision: + enabled: false + height: 90 + id: '1742468652489' + parentId: '1741497176064' + position: + x: 535.2725275358107 + y: 237.25999906213883 + positionAbsolute: + x: 6016.272527535811 + y: 1141.2599990621388 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + desc: '' + isInIteration: true + iteration_id: '1741497176064' + output_type: string + selected: false + title: summary + type: variable-aggregator + variables: + - - '1741506766037' + - text + - - '1742468584059' + - text + - - '1742468652489' + - text + height: 152 + id: '1742470455066' + parentId: '1741497176064' + position: + x: 6244.360125927147 + y: 216.75503228886544 + positionAbsolute: + x: 11725.360125927147 + y: 1120.7550322888656 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + desc: '' + isInIteration: true + is_team_authorization: true + iteration_id: '1741497176064' + output_schema: null + paramSchemas: + - auto_generate: null + default: null + form: llm human_description: en_US: Specified pod name ja_JP: Specified pod name @@ -2691,97 +2611,47 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 - id: '1742798505742' + height: 54 + id: '1742547917612' parentId: '1741497176064' position: - x: 838.2707565556775 - y: 677.0915533804653 + x: 837.913049217997 + y: 567.6173152328563 positionAbsolute: - x: 6100.727033835762 - y: 1593.0915533804653 + x: 6318.9130492179975 + y: 1471.6173152328563 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - - data: - code: " \ndef get_value(data, keys): \n for key in keys: \n value = data.get(key)\ - \ \n if value is not None: \n return value \n return \"\" \n\n\ - import json \n\ndef main(arg: str) -> dict:\n data = json.loads(arg) \n\ - \ return { \n \"alertName\": get_value(data, [\"alertName\"]), \n\ - \ \"service\": get_value(data, [\"svc_name\", \"service\"]), \n \"endpoint\"\ - : get_value(data,[\"endpoint\", \"content_key\"]), \n \"pod\": get_value(data,[\"\ - pod\", \"src_pod\", \"pod_name\"]), \n \"namespace\": get_value(data,[\"\ - namespace\", \"src_namespace\"]), \n}" - code_language: python3 - desc: '' - outputs: - alertName: - children: null - type: string - endpoint: - children: null - type: string - namespace: - children: null - type: string - pod: - children: null - type: string - service: - children: null - type: string - selected: false - title: get alert event label info - type: code - variables: - - value_selector: - - '1741227526517' - - params - variable: arg - height: 53 - id: '1742807803325' - position: - x: 333 - y: 444 - positionAbsolute: - x: 333 - y: 444 - selected: false - sourcePosition: right - targetPosition: left - type: custom - width: 243 - data: desc: '' - isInIteration: true is_team_authorization: true - iteration_id: '1741497176064' output_schema: null paramSchemas: - auto_generate: null default: null form: llm human_description: - en_US: cAdvisor job name - ja_JP: cAdvisor job name - pt_BR: cAdvisor job name - zh_Hans: cAdvisor任务名称 + en_US: Specified node name + ja_JP: Specified node name + pt_BR: Specified pod name + zh_Hans: 指定的主机名称 label: - en_US: cAdvisor job name - ja_JP: cAdvisor job name - pt_BR: cAdvisor job name - zh_Hans: cAdvisor任务名称 - llm_description: cAdvisor job name + en_US: node_name + ja_JP: node_name + pt_BR: node_name + zh_Hans: node_name + llm_description: Specified pod name max: null min: null - name: cadvisor_job_name + name: node_name options: [] placeholder: null precision: null - required: false + required: true scope: null template: null type: string @@ -2789,19 +2659,19 @@ workflow: default: null form: llm human_description: - en_US: Namespace - ja_JP: Namespace - pt_BR: Namespace - zh_Hans: 命名空间 + en_US: Specified Process ID + ja_JP: Specified Process ID + pt_BR: Specified Process ID + zh_Hans: 指定的进程ID label: - en_US: Namespace - ja_JP: Namespace - pt_BR: Namespace - zh_Hans: 命名空间 - llm_description: Namespace + en_US: pid + ja_JP: pid + pt_BR: pid + zh_Hans: pid + llm_description: Specified Process ID max: null min: null - name: namespace + name: pid options: [] placeholder: null precision: null @@ -2813,23 +2683,119 @@ workflow: default: null form: llm human_description: - en_US: Pod name - ja_JP: Pod name - pt_BR: Pod name - zh_Hans: Pod名称 + en_US: Data query start time + ja_JP: Data query start time + pt_BR: Data query start time + zh_Hans: 开始时间 (微秒) label: - en_US: Pod name - ja_JP: Pod name - pt_BR: Pod name - zh_Hans: Pod名称 - llm_description: Pod name + en_US: startTime + ja_JP: startTime + pt_BR: startTime + zh_Hans: startTime + llm_description: Data query start time max: null min: null - name: pod + name: startTime options: [] placeholder: null precision: null - required: false + required: true + scope: null + template: null + type: number + - auto_generate: null + default: null + form: llm + human_description: + en_US: Data query end time + ja_JP: Data query end time + pt_BR: Data query end time + zh_Hans: 结束时间 (微秒) + label: + en_US: endTime + ja_JP: endTime + pt_BR: endTime + zh_Hans: endTime + llm_description: Data query start time + max: null + min: null + name: endTime + options: [] + placeholder: null + precision: null + required: true + scope: null + template: null + type: number + params: + endTime: '' + node_name: '' + pid: '' + startTime: '' + provider_id: apo_select + provider_name: apo_select + provider_type: builtin + selected: false + title: get pod info in node + tool_configurations: {} + tool_label: Thread Polaris Metrics Process All monitor + tool_name: originx_service_monitor + tool_parameters: + endTime: + type: variable + value: + - '1741227526517' + - endTime + node_name: + type: mixed + value: '{{#1741227526517.nodeName#}}' + startTime: + type: variable + value: + - '1741227526517' + - startTime + type: tool + height: 54 + id: '1742629595400' + position: + x: 939 + y: 720 + positionAbsolute: + x: 939 + y: 720 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + desc: '' + isInIteration: true + is_team_authorization: true + iteration_id: '1741497176064' + output_schema: null + paramSchemas: + - auto_generate: null + default: null + form: llm + human_description: + en_US: Specified pod name + ja_JP: Specified pod name + pt_BR: Specified pod name + zh_Hans: 指定的容器POD名称 + label: + en_US: pod + ja_JP: pod + pt_BR: pod + zh_Hans: pod + llm_description: Specified pod name + max: null + min: null + name: pod + options: [] + placeholder: null + precision: null + required: false scope: null template: null type: string @@ -2837,16 +2803,40 @@ workflow: default: null form: llm human_description: - en_US: Data query start time(Microsecond) - ja_JP: Data query start time(Microsecond) - pt_BR: Data query start time(Microsecond) + en_US: Specified namespace + ja_JP: Specified namespace + pt_BR: Specified namespace + zh_Hans: 指定的容器所在Namespace + label: + en_US: namespace + ja_JP: namespace + pt_BR: namespace + zh_Hans: namespace + llm_description: Specified namespace + max: null + min: null + name: namespace + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: Data query start time + ja_JP: Data query start time + pt_BR: Data query start time zh_Hans: 开始时间 (微秒) label: en_US: startTime ja_JP: startTime pt_BR: startTime zh_Hans: startTime - llm_description: Data query start time(Microsecond) + llm_description: Data query start time max: null min: null name: startTime @@ -2861,16 +2851,16 @@ workflow: default: null form: llm human_description: - en_US: Data query end time(Microsecond) - ja_JP: Data query end time(Microsecond) - pt_BR: Data query end time(Microsecond) + en_US: Data query end time + ja_JP: Data query end time + pt_BR: Data query end time zh_Hans: 结束时间 (微秒) label: en_US: endTime ja_JP: endTime pt_BR: endTime zh_Hans: endTime - llm_description: Data query end time(Microsecond) + llm_description: Data query start time max: null min: null name: endTime @@ -2882,7 +2872,6 @@ workflow: template: null type: number params: - cadvisor_job_name: '' endTime: '' namespace: '' pod: '' @@ -2891,12 +2880,10 @@ workflow: provider_name: apo_select provider_type: builtin selected: false - title: Container CPU usage rate (Containerd runtime, aggregated by container - and Pod) + title: Query container rtt tool_configurations: {} - tool_label: Container CPU usage rate (Containerd runtime, aggregated by container - and Pod) - tool_name: 容器CPU使用率(使用Containerd容器运行时,按容器和Pod统计) + tool_label: Query container rtt + tool_name: 查询容器网络与下游RTT tool_parameters: endTime: type: variable @@ -2915,21 +2902,69 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 - id: '1742980228913' + height: 54 + id: '1742798505742' parentId: '1741497176064' position: - x: 817.3433448742644 - y: 386.77445077825655 + x: 838.2707565556775 + y: 677.0915533804653 positionAbsolute: - x: 6079.799622154349 - y: 1302.7744507782566 + x: 6319.2707565556775 + y: 1581.0915533804653 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 + - data: + code: " \ndef get_value(data, keys): \n for key in keys: \n value = data.get(key)\ + \ \n if value is not None: \n return value \n return \"\" \n\n\ + import json \n\ndef main(arg: str) -> dict:\n data = json.loads(arg) \n\ + \ return { \n \"alertName\": get_value(data, [\"alertName\"]), \n\ + \ \"service\": get_value(data, [\"svc_name\", \"service\"]), \n \"endpoint\"\ + : get_value(data,[\"endpoint\", \"content_key\"]), \n \"pod\": get_value(data,[\"\ + pod\", \"src_pod\", \"pod_name\"]), \n \"namespace\": get_value(data,[\"\ + namespace\", \"src_namespace\"]), \n}" + code_language: python3 + desc: '' + outputs: + alertName: + children: null + type: string + endpoint: + children: null + type: string + namespace: + children: null + type: string + pod: + children: null + type: string + service: + children: null + type: string + selected: false + title: get alert event label info + type: code + variables: + - value_selector: + - '1741227526517' + - params + variable: arg + height: 54 + id: '1742807803325' + position: + x: 333 + y: 444 + positionAbsolute: + x: 333 + y: 444 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 - data: desc: '' isInIteration: true @@ -3067,12 +3102,12 @@ workflow: provider_name: apo_select provider_type: builtin selected: false - title: Container disk read time per second (Containerd runtime, aggregated - by container and Pod) + title: Container CPU usage rate (Containerd runtime, aggregated by container + and Pod) tool_configurations: {} - tool_label: Container disk read time per second (Containerd runtime, aggregated - by container and Pod) - tool_name: 容器磁盘读取耗时每秒(使用Containerd,按Pod和容器统计) + tool_label: Container CPU usage rate (Containerd runtime, aggregated by container + and Pod) + tool_name: 容器CPU使用率(使用Containerd容器运行时,按容器和Pod统计) tool_parameters: endTime: type: variable @@ -3091,20 +3126,20 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 - id: '1742980318484' + height: 54 + id: '1742980228913' parentId: '1741497176064' position: - x: 829.6661633624499 - y: 478.0639330308825 + x: 817.3433448742644 + y: 386.77445077825655 positionAbsolute: - x: 6092.1224406425345 - y: 1394.0639330308825 + x: 6298.343344874264 + y: 1290.7744507782566 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -3117,19 +3152,19 @@ workflow: default: null form: llm human_description: - en_US: Pod name - ja_JP: Pod name - pt_BR: Pod name - zh_Hans: Pod名称 + en_US: cAdvisor job name + ja_JP: cAdvisor job name + pt_BR: cAdvisor job name + zh_Hans: cAdvisor任务名称 label: - en_US: Pod name - ja_JP: Pod name - pt_BR: Pod name - zh_Hans: Pod名称 - llm_description: Pod name + en_US: cAdvisor job name + ja_JP: cAdvisor job name + pt_BR: cAdvisor job name + zh_Hans: cAdvisor任务名称 + llm_description: cAdvisor job name max: null min: null - name: pod + name: cadvisor_job_name options: [] placeholder: null precision: null @@ -3141,31 +3176,207 @@ workflow: default: null form: llm human_description: - en_US: Data query start time(Microsecond) - ja_JP: Data query start time(Microsecond) - pt_BR: Data query start time - zh_Hans: 开始时间 (微秒) + en_US: Namespace + ja_JP: Namespace + pt_BR: Namespace + zh_Hans: 命名空间 label: - en_US: startTime - ja_JP: startTime - pt_BR: startTime - zh_Hans: startTime - llm_description: Data query start time(Microsecond) + en_US: Namespace + ja_JP: Namespace + pt_BR: Namespace + zh_Hans: 命名空间 + llm_description: Namespace max: null min: null - name: startTime + name: namespace options: [] placeholder: null precision: null - required: true + required: false scope: null template: null - type: number + type: string - auto_generate: null default: null form: llm human_description: - en_US: Data query end time(Microsecond) + en_US: Pod name + ja_JP: Pod name + pt_BR: Pod name + zh_Hans: Pod名称 + label: + en_US: Pod name + ja_JP: Pod name + pt_BR: Pod name + zh_Hans: Pod名称 + llm_description: Pod name + max: null + min: null + name: pod + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: Data query start time(Microsecond) + ja_JP: Data query start time(Microsecond) + pt_BR: Data query start time(Microsecond) + zh_Hans: 开始时间 (微秒) + label: + en_US: startTime + ja_JP: startTime + pt_BR: startTime + zh_Hans: startTime + llm_description: Data query start time(Microsecond) + max: null + min: null + name: startTime + options: [] + placeholder: null + precision: null + required: true + scope: null + template: null + type: number + - auto_generate: null + default: null + form: llm + human_description: + en_US: Data query end time(Microsecond) + ja_JP: Data query end time(Microsecond) + pt_BR: Data query end time(Microsecond) + zh_Hans: 结束时间 (微秒) + label: + en_US: endTime + ja_JP: endTime + pt_BR: endTime + zh_Hans: endTime + llm_description: Data query end time(Microsecond) + max: null + min: null + name: endTime + options: [] + placeholder: null + precision: null + required: true + scope: null + template: null + type: number + params: + cadvisor_job_name: '' + endTime: '' + namespace: '' + pod: '' + startTime: '' + provider_id: apo_select + provider_name: apo_select + provider_type: builtin + selected: false + title: Container disk read time per second (Containerd runtime, aggregated + by container and Pod) + tool_configurations: {} + tool_label: Container disk read time per second (Containerd runtime, aggregated + by container and Pod) + tool_name: 容器磁盘读取耗时每秒(使用Containerd,按Pod和容器统计) + tool_parameters: + endTime: + type: variable + value: + - '1741227526517' + - endTime + namespace: + type: mixed + value: '{{#1741497181784.namespace#}}' + pod: + type: mixed + value: '{{#1741497181784.pod#}}' + startTime: + type: variable + value: + - '1741227526517' + - startTime + type: tool + height: 54 + id: '1742980318484' + parentId: '1741497176064' + position: + x: 829.6661633624499 + y: 478.0639330308825 + positionAbsolute: + x: 6310.66616336245 + y: 1382.0639330308825 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + desc: '' + isInIteration: true + is_team_authorization: true + iteration_id: '1741497176064' + output_schema: null + paramSchemas: + - auto_generate: null + default: null + form: llm + human_description: + en_US: Pod name + ja_JP: Pod name + pt_BR: Pod name + zh_Hans: Pod名称 + label: + en_US: Pod name + ja_JP: Pod name + pt_BR: Pod name + zh_Hans: Pod名称 + llm_description: Pod name + max: null + min: null + name: pod + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: Data query start time(Microsecond) + ja_JP: Data query start time(Microsecond) + pt_BR: Data query start time + zh_Hans: 开始时间 (微秒) + label: + en_US: startTime + ja_JP: startTime + pt_BR: startTime + zh_Hans: startTime + llm_description: Data query start time(Microsecond) + max: null + min: null + name: startTime + options: [] + placeholder: null + precision: null + required: true + scope: null + template: null + type: number + - auto_generate: null + default: null + form: llm + human_description: + en_US: Data query end time(Microsecond) ja_JP: Data query end time(Microsecond) pt_BR: Data query end time(Microsecond) zh_Hans: 结束时间 (微秒) @@ -3212,20 +3423,20 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '1742980720651' parentId: '1741497176064' position: x: 514.1495817191098 y: 381.2394319656114 positionAbsolute: - x: 5776.605858999194 - y: 1297.2394319656114 + x: 5995.14958171911 + y: 1285.2394319656114 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -3333,20 +3544,20 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '1742980748320' parentId: '1741497176064' position: x: 502.3624003022592 y: 478.98654261939305 positionAbsolute: - x: 5764.818677582343 - y: 1394.986542619393 + x: 5983.36240030226 + y: 1382.986542619393 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -3454,20 +3665,20 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '1742980780865' parentId: '1741497176064' position: x: 507.1563167397626 y: 565.5281468443313 positionAbsolute: - x: 5769.612594019847 - y: 1481.5281468443313 + x: 5988.156316739763 + y: 1469.5281468443313 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -3575,20 +3786,20 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '1742980837261' parentId: '1741497176064' position: x: 513.7505744250511 y: 675.3340235425774 positionAbsolute: - x: 5776.206851705136 - y: 1591.3340235425774 + x: 5994.750574425051 + y: 1579.3340235425774 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -3696,20 +3907,20 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '1742980885557' parentId: '1741497176064' position: x: 519.4457802875913 y: 800.367276200539 positionAbsolute: - x: 5781.902057567676 - y: 1716.367276200539 + x: 6000.445780287591 + y: 1704.367276200539 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: code: "\ndef main(arg1: str) -> dict:\n data = json.loads(arg1)\n return\ @@ -3734,7 +3945,7 @@ workflow: - '1741509454645' - first variable: arg1 - height: 53 + height: 54 id: '17430589567120' position: x: 1848 @@ -3746,7 +3957,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: classes: @@ -3759,7 +3970,11 @@ workflow: container was killed' - id: '1742464231696' - name: unknow type + name: 请求延时类相关或者未知类型 + - id: '1744901730034' + name: 请求错误率 + - id: '1744901735377' + name: 日志错误率 desc: '' instruction: 你是一个智能告警分类助手。你的任务是对给定的警报事件进行分类。 instructions: '' @@ -3780,7 +3995,7 @@ workflow: type: question-classifier vision: enabled: false - height: 203 + height: 276 id: '17430590082510' position: x: 2151 @@ -3792,7 +4007,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -3953,20 +4168,20 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '1743059090157' parentId: '1741497176064' position: x: 544.0386199547656 y: 125.67116500938346 positionAbsolute: - x: 5806.49489723485 - y: 1041.6711650093835 + x: 6025.038619954766 + y: 1029.6711650093835 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -4127,7 +4342,7 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '17430593614550' position: x: 4272 @@ -4139,7 +4354,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: context: @@ -4160,16 +4375,38 @@ workflow: text: 你是可观测性领域的智能助手,协助用户分析解决问题。 - id: 5f74b2ce-6208-423b-aa3b-f01f2cafb89b role: user - text: "# 目的\n\n当前容器:{{#17430589567120.pod#}}\n\n由于内存增高产生告警,需分析原因并提供解决方案。\n\ - \n# 输出要求\n\n展示内存指标数据的趋势(使用简洁语言描述,便于快速理解)。 \n提供相关建议,包括: \n- 检查代码是否存在内存泄漏。\ - \ \n- 通过日志分析问题根因等实用方法。 \n\n# 数据来源\n\n容器内存数据:{{#17430593614550.text#}}" + text: '# 目的 + + + 当前容器:{{#17430589567120.pod#}} + + + 由于内存增高产生告警,需分析原因并提供解决方案。 + + + # 输出要求 + + + 展示内存指标数据的趋势(使用简洁语言描述,便于快速理解)。 + + 提供相关建议,包括: + + - 检查代码是否存在内存泄漏。 + + - 通过日志分析问题根因等实用方法。 + + + # 数据来源 + + + 容器内存数据:{{#17430593614550.text#}}' selected: false title: memory analysis type: llm variables: [] vision: enabled: false - height: 89 + height: 90 id: '17430593816310' position: x: 4575 @@ -4181,7 +4418,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -4198,19 +4435,19 @@ workflow: - text - - '17430596469370' - text - height: 150 + height: 152 id: '17430594012660' position: - x: 4890 - y: 908 + x: 5178 + y: 904 positionAbsolute: - x: 4890 - y: 908 + x: 5178 + y: 904 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: context: @@ -4240,7 +4477,7 @@ workflow: variables: [] vision: enabled: false - height: 89 + height: 90 id: '17430594510140' position: x: 4575 @@ -4252,7 +4489,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -4360,7 +4597,7 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '17430595109950' position: x: 2757 @@ -4372,7 +4609,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -4534,7 +4771,7 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '17430595158080' position: x: 3060 @@ -4546,7 +4783,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: context: @@ -4567,41 +4804,41 @@ workflow: text: 你是可观测性领域的智能助手,协助用户分析解决问题。 - id: 2b2ad468-efcd-4bcc-aeb9-d53d2f2770ad role: user - text: "# 分析目标 \n基于跨资源类别的异常线程数据,判断应用实例 {{#17430589567120.pod#}} 是否受告警事件\ - \ {{#1742807803325.alertName#}} 影响,并进行初步根因定位。 \n\n# 输入数据规范 \n- **分资源类别数据**:线程延迟及关联的北极星指标(如网络类延迟对应网络RTT)。\ - \ \n- **异常数据点计数**:延迟超过同类型线程历史均值20%的实例数,需标注线程延迟均值。 \n\n# 分析规则 \n1. **主异常方向判定**\ - \ \n - 标注异常线程的延迟均值。 \n - 识别延迟增幅最显著的资源类型(如延迟从毫秒级跃升至秒级)。 \n - **线程数量优先原则**:若某资源类型(如网络/epoll)的异常线程数量最多,则归因至该类别。\ - \ \n *示例:网络/epoll类异常线程4条 vs CPU类2条 → 归因为网络问题。* \n\n2. **误判规避** \ - \ \n - 若CPU或运行队列(RunQ)延迟突增且无并发网络/epoll异常,优先归因为CPU资源争用。 \n\n3. **决胜优先级(降序)**\ - \ \n `CPU > 网络 > Epoll > 文件 > RunQ` \n\n# 根因归因与建议 \n- **CPU延迟突增**:检查代码过度嵌套问题,确认后执行版本回滚。\ - \ \n- **CPU抢占过高**:排查同节点进程的资源争用。 \n- **网络/Epoll延迟突增**: \n - 若RTT异常\ - \ → 网络链路问题; \n - 若RTT正常 → 下游服务延迟问题。 \n- **文件操作延迟升高**:审计文件句柄泄漏。 \n\n\ - # 输出格式 \n**应用实例**: {{#17430589567120.pod#}} \n**北极星指标趋势摘要** \n- 按指标维度描述显著变化,若无异常标注\"\ - 未观测到显著偏离\"。 \n\n**初步根因结论** \n- 明确告警事件对应用的影响状态。 \n- 基于北极星指标与分析规则输出单一归因结论。\ - \ \n *注:若网络/Epoll类异常但RTT正常,归因为下游服务延迟。* \n\n# 输入数据 \n- CPU:{{#17443388433580.result#}}\ - \ \n- 网络:{{#17443388438160.result#}} \n- 文件:{{#17443388421360.result#}}\ - \ \n- Epoll:{{#1744290470304.result#}} \n- RunQ:{{#17443388443000.result#}}\ - \ \n\n# 输出准则 \n- 使用简洁的非技术表述,避免歧义。 \n- 结论需聚焦可执行建议(如\"检查代码嵌套\"而非\"可能存在性能问题\"\ - )。" + text: "# 分析目标\n基于跨资源类别的异常线程数据,判断应用实例{{#17430589567120.pod#}}是否受告警事件{{#1742807803325.alertName#}}影响,并进行初步根因定位。\n\ + \n# 输入数据规范\n- **分资源类别数据**:线程延迟及关联的北极星指标(如网络类延迟加上对应的异常网络RTT数据,不要忽略RTT数据)。\n\ + - **异常数据点计数**:延迟超过同类型线程历史均值20%的实例数,需标注线程延迟均值。\n\n# 分析规则\n1. **主异常方向判定**\n\ + - 标注异常线程的延迟均值。\n- 识别延迟增幅最显著的资源类型(如延迟从毫秒级跃升至秒级)。\n- **线程数量优先原则**:若某资源类型(如网络/epoll)的异常线程数量最多,则归因至该类别。\n\ + *示例:网络/epoll类异常线程4条 vs CPU类2条 → 归因为网络问题。*\n\n2. **误判规避**\n- 若CPU或运行队列(RunQ)延迟突增且无并发网络/epoll异常,优先归因为CPU资源争用。\n\ + \n3. **决胜优先级(降序)**\n'CPU > 网络 > Epoll > 文件 > RunQ'\n\n# 根因归因与建议\n- **CPU延迟突增**:检查代码过度嵌套问题,确认后执行版本回滚。\n\ + - **CPU抢占过高**:排查同节点进程的资源争用。\n- **网络/Epoll延迟突增**:\n- 若RTT异常 → 网络链路问题;\n\ + - 若RTT正常 → 下游服务延迟问题。\n- **文件操作延迟升高**:审计文件句柄泄漏。\n\n# 输出格式\n**应用实例**:{{#17430589567120.pod#}}\ + \ \n**北极星指标趋势摘要**\n- 按指标维度描述显著变化,若无异常标注\"未观测到显著偏离\"。\n\n**初步根因结论**\n\ + - 明确告警事件对应用的影响状态。\n- 基于北极星指标与分析规则输出单一归因结论。\n*注:若网络/Epoll类异常但RTT正常,归因为下游服务延迟。*\n\ + 注: 如果对应方向有报告,需展示报告链接,可点击\n\n# 输入数据\n-cpu:{{#17443388433580.result#}} \ + \ \n- net:{{#17443388438160.result#}} \n- file:{{#17443388421360.result#}}\ + \ \n- epoll:{{#1744290470304.result#}} \n- RunQ:{{#17443388443000.result#}}\n\ + \n#报告链接(说明是报告即可,不用关联线程)\ncpu {{#1744957721512.result#}} \nnetwork方向 {{#17449580284790.result#}}\n\ + runq方向 {{#17449581781070.result#}}\n\n# 输出准则\n- 使用简洁的非技术表述,避免歧义。\n- 结论需聚焦可执行建议(如\"\ + 检查代码嵌套\"而非\"可能存在性能问题\")。" selected: false title: llm analysis root cause type: llm variables: [] vision: enabled: false - height: 89 + height: 90 id: '17430596469370' position: - x: 4575 - y: 904 + x: 4900.586223530482 + y: 863.2515699329357 positionAbsolute: - x: 4575 - y: 904 + x: 4900.586223530482 + y: 863.2515699329357 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -4709,7 +4946,7 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '17430597987060' position: x: 2757 @@ -4721,7 +4958,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -4886,7 +5123,7 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '17430598152780' position: x: 3060 @@ -4898,7 +5135,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -5006,7 +5243,7 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '17430598907140' position: x: 2454 @@ -5018,7 +5255,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -5154,7 +5391,7 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '17430598942980' position: x: 2757 @@ -5166,7 +5403,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -5274,7 +5511,7 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '17430610599980' position: x: 2454 @@ -5286,7 +5523,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -5422,7 +5659,7 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '17430610640640' position: x: 2757 @@ -5434,7 +5671,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -5542,7 +5779,7 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '17430610719970' position: x: 2757 @@ -5554,7 +5791,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -5690,7 +5927,7 @@ workflow: - '1741227526517' - startTime type: tool - height: 53 + height: 54 id: '17430610756270' position: x: 3060 @@ -5702,7 +5939,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -5836,7 +6073,7 @@ workflow: type: mixed value: cpu type: tool - height: 53 + height: 54 id: '1743149089034' position: x: 3363 @@ -5848,7 +6085,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: desc: '' is_team_authorization: true @@ -5981,7 +6218,7 @@ workflow: type: mixed value: file type: tool - height: 53 + height: 54 id: '17431493591600' position: x: 3363 @@ -5993,7 +6230,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: desc: '' is_team_authorization: true @@ -6126,7 +6363,7 @@ workflow: type: mixed value: net type: tool - height: 53 + height: 54 id: '17431493623970' position: x: 3363 @@ -6138,7 +6375,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: desc: '' is_team_authorization: true @@ -6271,7 +6508,7 @@ workflow: type: mixed value: epoll type: tool - height: 53 + height: 54 id: '17431493655610' position: x: 3363 @@ -6283,7 +6520,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: desc: '' is_team_authorization: true @@ -6416,7 +6653,7 @@ workflow: type: mixed value: runq type: tool - height: 53 + height: 54 id: '17431493682530' position: x: 3363 @@ -6428,7 +6665,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: desc: '' isInIteration: true @@ -6563,20 +6800,20 @@ workflow: type: mixed value: cpu type: tool - height: 53 + height: 54 id: '1743149606391' parentId: '1741497176064' position: x: 1174.8261261305506 y: 378.0882500223738 positionAbsolute: - x: 6437.282403410635 - y: 1294.0882500223738 + x: 6655.826126130551 + y: 1282.0882500223738 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -6712,20 +6949,20 @@ workflow: type: mixed value: net type: tool - height: 53 + height: 54 id: '1743150910103' parentId: '1741497176064' position: x: 1483.3553424127522 y: 574.6824876720586 positionAbsolute: - x: 6745.811619692837 - y: 1490.6824876720586 + x: 6964.355342412752 + y: 1478.6824876720586 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -6861,20 +7098,20 @@ workflow: type: mixed value: file type: tool - height: 53 + height: 54 id: '1743152043813' parentId: '1741497176064' position: x: 1156.6165679157248 y: 475.1754672301147 positionAbsolute: - x: 6419.072845195809 - y: 1391.1754672301147 + x: 6637.616567915725 + y: 1379.1754672301147 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -7010,20 +7247,20 @@ workflow: type: mixed value: epoll type: tool - height: 53 + height: 54 id: '1743152103020' parentId: '1741497176064' position: x: 1463.2885127206846 y: 673.674875984598 positionAbsolute: - x: 6725.744790000769 - y: 1589.674875984598 + x: 6944.288512720685 + y: 1577.674875984598 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: desc: '' @@ -7159,20 +7396,20 @@ workflow: type: mixed value: runq type: tool - height: 53 + height: 54 id: '1743152169252' parentId: '1741497176064' position: x: 1141.3922062676138 y: 805.1816882600981 positionAbsolute: - x: 6403.848483547698 - y: 1721.181688260098 + x: 6622.392206267614 + y: 1709.181688260098 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: code: "def main(data_json: str, avg_json: str) -> dict:\n data = json.loads(data_json)\n\ @@ -7209,7 +7446,7 @@ workflow: - '1743149089034' - text variable: avg_json - height: 53 + height: 54 id: '1744185676164' position: x: 3666 @@ -7221,7 +7458,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "def main(data_json: str, avg_json: str) -> dict:\n data = json.loads(data_json)\n\ \ timeseries = data.get('data', {}).get('timeseries', [])\n \n \ @@ -7257,7 +7494,7 @@ workflow: - '17431493591600' - text variable: avg_json - height: 53 + height: 54 id: '17441926182400' position: x: 3666 @@ -7269,7 +7506,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "def main(data_json: str, avg_json: str) -> dict:\n data = json.loads(data_json)\n\ \ timeseries = data.get('data', {}).get('timeseries', [])\n \n \ @@ -7305,7 +7542,7 @@ workflow: - '17431493623970' - text variable: avg_json - height: 53 + height: 54 id: '17441926217060' position: x: 3666 @@ -7317,7 +7554,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "def main(data_json: str, avg_json: str) -> dict:\n data = json.loads(data_json)\n\ \ timeseries = data.get('data', {}).get('timeseries', [])\n \n \ @@ -7353,7 +7590,7 @@ workflow: - '17431493655610' - text variable: avg_json - height: 53 + height: 54 id: '17441926243210' position: x: 3666 @@ -7365,7 +7602,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "def main(data_json: str, avg_json: str) -> dict:\n data = json.loads(data_json)\n\ \ timeseries = data.get('data', {}).get('timeseries', [])\n \n \ @@ -7401,7 +7638,7 @@ workflow: - '17431493682530' - text variable: avg_json - height: 53 + height: 54 id: '17441926286390' position: x: 3666 @@ -7413,7 +7650,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "def main(data_json: str, avg_json: str) -> dict:\n data = json.loads(data_json)\n\ \ timeseries = data.get('data', {}).get('timeseries', [])\n \n \ @@ -7451,20 +7688,20 @@ workflow: - '1743149606391' - text variable: avg_json - height: 53 + height: 54 id: '1744206107700' parentId: '1741497176064' position: x: 1488.8192785924111 y: 373.13327676062727 positionAbsolute: - x: 6751.275555872496 - y: 1289.1332767606273 + x: 6969.819278592411 + y: 1277.1332767606273 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: code: "def main(data_json: str, avg_json: str) -> dict:\n data = json.loads(data_json)\n\ @@ -7503,20 +7740,20 @@ workflow: - '1743152043813' - text variable: avg_json - height: 53 + height: 54 id: '1744206199211' parentId: '1741497176064' position: x: 1487.2988857309501 y: 474.870200391917 positionAbsolute: - x: 6749.755163011035 - y: 1390.870200391917 + x: 6968.29888573095 + y: 1378.870200391917 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: code: "def main(data_json: str, avg_json: str) -> dict:\n data = json.loads(data_json)\n\ @@ -7555,20 +7792,20 @@ workflow: - '1743150910103' - text variable: avg_json - height: 53 + height: 54 id: '1744206249528' parentId: '1741497176064' position: x: 1789.9724165332118 y: 575.9896449249493 positionAbsolute: - x: 7052.428693813296 - y: 1491.9896449249493 + x: 7270.972416533212 + y: 1479.9896449249493 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: code: "def main(data_json: str, avg_json: str) -> dict:\n data = json.loads(data_json)\n\ @@ -7607,20 +7844,20 @@ workflow: - '1743152103020' - text variable: avg_json - height: 53 + height: 54 id: '1744206286177' parentId: '1741497176064' position: x: 1789.4007677668633 y: 673.2187341513443 positionAbsolute: - x: 7051.857045046948 - y: 1589.2187341513443 + x: 7270.400767766863 + y: 1577.2187341513443 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: code: "def main(data_json: str, avg_json: str) -> dict:\n data = json.loads(data_json)\n\ @@ -7659,20 +7896,20 @@ workflow: - '1743152169252' - text variable: avg_json - height: 53 + height: 54 id: '1744206314118' parentId: '1741497176064' position: x: 1465.3401312758333 y: 804.853363697991 positionAbsolute: - x: 6727.796408555918 - y: 1720.853363697991 + x: 6946.340131275833 + y: 1708.853363697991 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: code: "def main(data_json: str) -> dict:\n timeseries = json.loads(data_json).get('data',\ @@ -7701,7 +7938,7 @@ workflow: - '17430610640640' - text variable: data_json - height: 53 + height: 54 id: '1744255998058' position: x: 3060 @@ -7713,7 +7950,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "def main(data_json: str) -> dict:\n timeseries = json.loads(data_json).get('data',\ \ {}).get('timeseries', [])\n \n normal = 0.05\n filtered =\ @@ -7741,7 +7978,7 @@ workflow: - '17430598942980' - text variable: data_json - height: 53 + height: 54 id: '17442560822670' position: x: 3060 @@ -7753,7 +7990,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='epoll')\ \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ @@ -7836,7 +8073,7 @@ workflow: - '1744255998058' - result variable: proof_json - height: 53 + height: 54 id: '1744287265980' position: x: 3969 @@ -7848,7 +8085,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ @@ -7882,7 +8119,7 @@ workflow: - '1744287265980' - result variable: data_json - height: 53 + height: 54 id: '1744290470304' position: x: 4272 @@ -7894,7 +8131,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='runq')\ \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ @@ -7984,7 +8221,7 @@ workflow: - '17430610756270' - text variable: proof_json - height: 53 + height: 54 id: '17443356883380' position: x: 3969 @@ -7996,7 +8233,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='net')\ \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ @@ -8079,7 +8316,7 @@ workflow: - '17442560822670' - result variable: proof_json - height: 53 + height: 54 id: '17443357536900' position: x: 3969 @@ -8091,7 +8328,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='file')\ \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ @@ -8181,7 +8418,7 @@ workflow: - '17430598152780' - text variable: proof_json - height: 53 + height: 54 id: '17443357893440' position: x: 3969 @@ -8193,7 +8430,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='cpu')\ \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ @@ -8283,7 +8520,7 @@ workflow: - '17430595158080' - text variable: proof_json - height: 53 + height: 54 id: '17443357899060' position: x: 3969 @@ -8295,7 +8532,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ @@ -8329,7 +8566,7 @@ workflow: - '17443357893440' - result variable: data_json - height: 53 + height: 54 id: '17443388421360' position: x: 4272 @@ -8341,7 +8578,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ @@ -8375,7 +8612,7 @@ workflow: - '17443357899060' - result variable: data_json - height: 53 + height: 54 id: '17443388433580' position: x: 4272 @@ -8387,7 +8624,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ @@ -8421,7 +8658,7 @@ workflow: - '17443357536900' - result variable: data_json - height: 53 + height: 54 id: '17443388438160' position: x: 4272 @@ -8433,7 +8670,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ @@ -8467,7 +8704,7 @@ workflow: - '17443356883380' - result variable: data_json - height: 53 + height: 54 id: '17443388443000' position: x: 4272 @@ -8479,7 +8716,7 @@ workflow: sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 - data: code: "def main(data_json: str) -> dict:\n timeseries = json.loads(data_json).get('data',\ \ {}).get('timeseries', [])\n \n normal = 0.05\n filtered =\ @@ -8509,20 +8746,20 @@ workflow: - '1742798505742' - text variable: data_json - height: 53 + height: 54 id: '1744342068305' parentId: '1741497176064' position: x: 1163.2885127206846 y: 673.674875984598 positionAbsolute: - x: 6425.744790000769 - y: 1589.674875984598 - selected: true + x: 6644.288512720685 + y: 1577.674875984598 + selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: code: "def main(data_json: str) -> dict:\n timeseries = json.loads(data_json).get('data',\ @@ -8553,20 +8790,20 @@ workflow: - '1742547917612' - text variable: data_json - height: 53 + height: 54 id: '1744342138380' parentId: '1741497176064' position: x: 1183.3553424127522 y: 574.6824876720586 positionAbsolute: - x: 6445.811619692837 - y: 1490.6824876720586 + x: 6664.355342412752 + y: 1478.6824876720586 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 + width: 244 zIndex: 1002 - data: code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='cpu')\ @@ -8641,700 +8878,1879 @@ workflow: \ abnormal threads. \" + related_str\n " code_language: python3 desc: '' - isInIteration: true - iteration_id: '1741497176064' + isInIteration: true + iteration_id: '1741497176064' + outputs: + result: + children: null + type: string + selected: false + title: it. cpu analyze + type: code + variables: + - value_selector: + - '1744206107700' + - result + variable: data_json + - value_selector: + - '1742980228913' + - text + variable: proof_json + height: 54 + id: '1744342244843' + parentId: '1741497176064' + position: + x: 1791.8192785924111 + y: 371.13327676062727 + positionAbsolute: + x: 7272.819278592411 + y: 1275.1332767606273 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='file')\ + \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ + \ \n analysis = analyze_abnormal_data(data, proof_data, type)\n \ + \ summary = get_summary(analysis=analysis, type=type)\n\n analysis['summary']\ + \ = summary\n return {\n \"result\": json.dumps(analysis)\n \ + \ }\n\ndef analyze_abnormal_data(abnormal_data, proof_data, type: str)\ + \ -> dict:\n if len(abnormal_data) == 0:\n return {}\n \n \ + \ if type == 'net' or type == 'epoll':\n return net_analyze(abnormal_data=abnormal_data,\ + \ proof_data=proof_data)\n elif type == 'runq':\n return runq_analyze(abnormal_data=abnormal_data)\n\ + \ else:\n return consistent_analyze(abnormal_data=abnormal_data,\ + \ proof_data=proof_data)\n\n\ndef consistent_analyze(abnormal_data: list,\ + \ proof_data: dict) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ + \n timeseries = proof_data.get('data', {}).get('timeseries', [])\n \ + \ proof_chart = {}\n if len(timeseries) > 0:\n proof_chart =\ + \ timeseries[0].get('chart', {}).get('chartData', {})\n related_threads\ + \ = find_similar_charts(abnormal_data=abnormal_data, proof_chart=proof_chart,\ + \ id_key='tid') \n \n return {\n 'abnormalAnalysis': analysis,\n\ + \ 'related': related_threads\n }\n\ndef find_similar_charts(abnormal_data,\ + \ proof_chart, id_key: str, threshold=0.8):\n if not abnormal_data or\ + \ not proof_chart:\n return []\n\n def cosine_sim(a, b):\n \ + \ dot_product = sum(x * y for x, y in zip(a, b))\n norm_a = sum(x**2\ + \ for x in a) ** 0.5\n norm_b = sum(y**2 for y in b) ** 0.5\n \ + \ if norm_a == 0 or norm_b == 0:\n return 0.0\n return\ + \ dot_product / (norm_a * norm_b)\n\n similar = []\n for data in abnormal_data:\n\ + \ chart = data['chart']\n tid = data[id_key]\n \n \ + \ common_ts = set(chart.keys()) & set(proof_chart.keys())\n \ + \ if not common_ts:\n continue\n \n sorted_ts\ + \ = sorted(common_ts)\n a = [chart[ts] for ts in sorted_ts]\n \ + \ b = [proof_chart[ts] for ts in sorted_ts]\n \n similarity\ + \ = cosine_sim(a, b)\n if similarity >= threshold:\n similar.append((tid,\ + \ similarity))\n \n similar.sort(key=lambda x: x[1], reverse=True)\n\ + \ return [item[0] for item in similar]\n\ndef net_analyze(abnormal_data:\ + \ list, proof_data: list) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ + \n related = []\n for data in abnormal_data:\n chart = data.get('chart',\ + \ {})\n\n tid = data.get('tid', '')\n if tid == '':\n \ + \ continue\n\n # find which downstream pod is related to this\ + \ thread\n res = find_similar_charts(proof_data, chart, id_key='dst_pod')\n\ + \n related.append({\n 'tid': tid,\n 'relatedItems':\ + \ res,\n })\n \n return {\n 'abnormalAnalysis': analysis,\n\ + \ 'related': related\n }\n\ndef runq_analyze(abnormal_data: list)\ + \ -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ + \n return {\n 'abnormalAnalysis': analysis,\n }\n\ndef get_analysis_data(abnormal_data):\n\ + \ \"\"\"Get abnormal count, chart, spikes, avg\"\"\"\n analysis =\ + \ []\n\n for data in abnormal_data:\n chart = data['chart']\n\ + \ abnormal_count = data['abnormalCount']\n unit = data['unit']\n\ + \ \n values = sorted(chart.values(), reverse=True)\n \ + \ spikes = values[:abnormal_count]\n analysis.append({\n \ + \ 'tid': data['tid'],\n 'abnormalCount': abnormal_count,\n\ + \ 'spikes': spikes,\n 'avg': data['avg'],\n \ + \ 'unit': unit\n })\n\n return analysis\n\ndef get_summary(analysis:\ + \ dict, type: str) -> str:\n \"\"\"\n 生成异常线程报告\n :param abnormal_count:\ + \ 异常线程数量 (int)\n :param related_items: 相关项列表 (list)\n :param item_type:\ + \ 相关项类型名称 (str, default: \"threads\")\n :return: 格式化报告字符串\n \"\"\"\ + \n if analysis == {}:\n good_str = f\"Thread consumption on {type}\ + \ is normal.\"\n return 'No abnormal data was observed. ' + good_str\n\ + \ \n item_type = ''\n item_str = ''\n if type == 'net' or type\ + \ == 'epoll':\n item_type = 'downstream pods'\n for item in\ + \ analysis.get(\"related\", []):\n tid = item.get(\"tid\", \"\ + \")\n if tid == '':\n continue\n\n \ + \ pods = item.get(\"relatedItems\", [])\n pods = item.get(\"\ + relatedItems\", [])\n if len(pods) == 0 :\n continue\n\ + \ \n pods_str = \", \".join(pods)\n item_str +=\ + \ f\"thread {tid}: affected by {pods_str}. The RTT between them raised about\ + \ 20% over 20% time samples.\"\n \n \n else:\n\ + \ item_type = 'threads'\n\n threads = analysis.get(\"related\"\ + , [])\n threads_str = \", \".join(threads)\n if len(threads_str)\ + \ > 0:\n item_str += threads_str + \". \"\n\n related_str\ + \ = ''\n if len(item_str) > 0:\n related_str = f\"The most related\ + \ {item_type} are {item_str}. \"\n return f\"There are {len(analysis['abnormalAnalysis'])}\ + \ abnormal threads. \" + related_str\n " + code_language: python3 + desc: '' + isInIteration: true + iteration_id: '1741497176064' + outputs: + result: + children: null + type: string + selected: false + title: it. file analyze + type: code + variables: + - value_selector: + - '1744206199211' + - result + variable: data_json + - value_selector: + - '1742980318484' + - text + variable: proof_json + height: 54 + id: '1744342309386' + parentId: '1741497176064' + position: + x: 1790.2988857309501 + y: 474.870200391917 + positionAbsolute: + x: 7271.29888573095 + y: 1378.870200391917 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='net')\ + \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ + \ \n analysis = analyze_abnormal_data(data, proof_data, type)\n \ + \ summary = get_summary(analysis=analysis, type=type)\n\n analysis['summary']\ + \ = summary\n return {\n \"result\": json.dumps(analysis)\n \ + \ }\n\ndef analyze_abnormal_data(abnormal_data, proof_data, type: str)\ + \ -> dict:\n \n if type == 'net' or type == 'epoll':\n return\ + \ net_analyze(abnormal_data=abnormal_data, proof_data=proof_data)\n elif\ + \ type == 'runq':\n return runq_analyze(abnormal_data=abnormal_data)\n\ + \ else:\n return consistent_analyze(abnormal_data=abnormal_data,\ + \ proof_data=proof_data)\n\n\ndef consistent_analyze(abnormal_data: list,\ + \ proof_data: dict) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ + \n timeseries = proof_data.get('data', {}).get('timeseries', [])\n \ + \ proof_chart = {}\n if len(timeseries) > 0:\n proof_chart =\ + \ timeseries[0].get('chart', {}).get('chartData', {})\n related_threads\ + \ = find_similar_charts(abnormal_data=abnormal_data, proof_chart=proof_chart,\ + \ id_key='tid') \n \n return {\n 'abnormalAnalysis': analysis,\n\ + \ 'related': related_threads\n }\n\ndef find_similar_charts(abnormal_data,\ + \ proof_chart, id_key: str, threshold=0.8):\n if not abnormal_data or\ + \ not proof_chart:\n return []\n\n def cosine_sim(a, b):\n \ + \ dot_product = sum(x * y for x, y in zip(a, b))\n norm_a = sum(x**2\ + \ for x in a) ** 0.5\n norm_b = sum(y**2 for y in b) ** 0.5\n \ + \ if norm_a == 0 or norm_b == 0:\n return 0.0\n return\ + \ dot_product / (norm_a * norm_b)\n\n similar = []\n for data in abnormal_data:\n\ + \ chart = data['chart']\n tid = data[id_key]\n \n \ + \ common_ts = set(chart.keys()) & set(proof_chart.keys())\n \ + \ if not common_ts:\n continue\n \n sorted_ts\ + \ = sorted(common_ts)\n a = [chart[ts] for ts in sorted_ts]\n \ + \ b = [proof_chart[ts] for ts in sorted_ts]\n \n similarity\ + \ = cosine_sim(a, b)\n if similarity >= threshold:\n similar.append((tid,\ + \ similarity))\n \n similar.sort(key=lambda x: x[1], reverse=True)\n\ + \ return [item[0] for item in similar]\n\ndef net_analyze(abnormal_data:\ + \ list, proof_data: list) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ + \n related = []\n for data in proof_data:\n dst_pod = data.get('dst_pod',\ + \ '')\n if dst_pod == '':\n continue\n\n related.append(dst_pod)\n\ + \ \n return {\n 'abnormalAnalysis': analysis,\n 'related':\ + \ related\n }\n\ndef runq_analyze(abnormal_data: list) -> dict:\n \ + \ analysis = get_analysis_data(abnormal_data=abnormal_data)\n\n return\ + \ {\n 'abnormalAnalysis': analysis,\n }\n\ndef get_analysis_data(abnormal_data):\n\ + \ \"\"\"Get abnormal count, chart, spikes, avg\"\"\"\n analysis =\ + \ []\n\n for data in abnormal_data:\n chart = data['chart']\n\ + \ abnormal_count = data['abnormalCount']\n unit = data['unit']\n\ + \ \n values = sorted(chart.values(), reverse=True)\n \ + \ spikes = values[:abnormal_count]\n analysis.append({\n \ + \ 'tid': data['tid'],\n 'abnormalCount': abnormal_count,\n\ + \ 'spikes': spikes,\n 'avg': data['avg'],\n \ + \ 'unit': unit\n })\n\n return analysis\n\ndef get_summary(analysis:\ + \ dict, type: str) -> str:\n \"\"\"\n 生成异常线程报告\n :param abnormal_count:\ + \ 异常线程数量 (int)\n :param related_items: 相关项列表 (list)\n :param item_type:\ + \ 相关项类型名称 (str, default: \"threads\")\n :return: 格式化报告字符串\n \"\"\"\ + \n \n item_type = ''\n item_str = ''\n if type == 'net' or type\ + \ == 'epoll':\n pods = analysis.get(\"related\", [])\n if\ + \ len(pods) > 0 :\n pods_str = \", \".join(pods)\n \ + \ item_str += f\"{pods_str}. The RTT raised about 20% than 0.05s over 20%\ + \ time samples. \"\n else:\n item_str = 'The RTT is normal.'\n\ + \ else:\n item_type = 'threads'\n\n threads = analysis.get(\"\ + related\", [])\n threads_str = \", \".join(threads)\n if len(threads_str)\ + \ > 0:\n item_str += threads_str + \". \"\n\n related_str\ + \ = ''\n if len(item_str) > 0:\n if type == 'net' or type == 'epoll':\n\ + \ related_str = item_str\n else:\n related_str\ + \ = f\"The most related {item_type} are {item_str} \"\n \n return\ + \ f\"There are {len(analysis['abnormalAnalysis'])} abnormal threads. \"\ + \ + related_str\n " + code_language: python3 + desc: '' + isInIteration: true + iteration_id: '1741497176064' + outputs: + result: + children: null + type: string + selected: false + title: it. net analyze + type: code + variables: + - value_selector: + - '1744206249528' + - result + variable: data_json + - value_selector: + - '1744342138380' + - result + variable: proof_json + height: 54 + id: '1744342372882' + parentId: '1741497176064' + position: + x: 2092.972416533212 + y: 575.9896449249493 + positionAbsolute: + x: 7573.972416533212 + y: 1479.9896449249493 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='epoll')\ + \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ + \ \n analysis = analyze_abnormal_data(data, proof_data, type)\n \ + \ summary = get_summary(analysis=analysis, type=type)\n\n analysis['summary']\ + \ = summary\n return {\n \"result\": json.dumps(analysis)\n \ + \ }\n\ndef analyze_abnormal_data(abnormal_data, proof_data, type: str)\ + \ -> dict:\n \n if type == 'net' or type == 'epoll':\n return\ + \ net_analyze(abnormal_data=abnormal_data, proof_data=proof_data)\n elif\ + \ type == 'runq':\n return runq_analyze(abnormal_data=abnormal_data)\n\ + \ else:\n return consistent_analyze(abnormal_data=abnormal_data,\ + \ proof_data=proof_data)\n\n\ndef consistent_analyze(abnormal_data: list,\ + \ proof_data: dict) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ + \n timeseries = proof_data.get('data', {}).get('timeseries', [])\n \ + \ proof_chart = {}\n if len(timeseries) > 0:\n proof_chart =\ + \ timeseries[0].get('chart', {}).get('chartData', {})\n related_threads\ + \ = find_similar_charts(abnormal_data=abnormal_data, proof_chart=proof_chart,\ + \ id_key='tid') \n \n return {\n 'abnormalAnalysis': analysis,\n\ + \ 'related': related_threads\n }\n\ndef find_similar_charts(abnormal_data,\ + \ proof_chart, id_key: str, threshold=0.8):\n if not abnormal_data or\ + \ not proof_chart:\n return []\n\n def cosine_sim(a, b):\n \ + \ dot_product = sum(x * y for x, y in zip(a, b))\n norm_a = sum(x**2\ + \ for x in a) ** 0.5\n norm_b = sum(y**2 for y in b) ** 0.5\n \ + \ if norm_a == 0 or norm_b == 0:\n return 0.0\n return\ + \ dot_product / (norm_a * norm_b)\n\n similar = []\n for data in abnormal_data:\n\ + \ chart = data['chart']\n tid = data[id_key]\n \n \ + \ common_ts = set(chart.keys()) & set(proof_chart.keys())\n \ + \ if not common_ts:\n continue\n \n sorted_ts\ + \ = sorted(common_ts)\n a = [chart[ts] for ts in sorted_ts]\n \ + \ b = [proof_chart[ts] for ts in sorted_ts]\n \n similarity\ + \ = cosine_sim(a, b)\n if similarity >= threshold:\n similar.append((tid,\ + \ similarity))\n \n similar.sort(key=lambda x: x[1], reverse=True)\n\ + \ return [item[0] for item in similar]\n\ndef net_analyze(abnormal_data:\ + \ list, proof_data: list) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ + \n related = []\n for data in proof_data:\n dst_pod = data.get('dst_pod',\ + \ '')\n if dst_pod == '':\n continue\n\n related.append(dst_pod)\n\ + \ \n return {\n 'abnormalAnalysis': analysis,\n 'related':\ + \ related\n }\n\ndef runq_analyze(abnormal_data: list) -> dict:\n \ + \ analysis = get_analysis_data(abnormal_data=abnormal_data)\n\n return\ + \ {\n 'abnormalAnalysis': analysis,\n }\n\ndef get_analysis_data(abnormal_data):\n\ + \ \"\"\"Get abnormal count, chart, spikes, avg\"\"\"\n analysis =\ + \ []\n\n for data in abnormal_data:\n chart = data['chart']\n\ + \ abnormal_count = data['abnormalCount']\n unit = data['unit']\n\ + \ \n values = sorted(chart.values(), reverse=True)\n \ + \ spikes = values[:abnormal_count]\n analysis.append({\n \ + \ 'tid': data['tid'],\n 'abnormalCount': abnormal_count,\n\ + \ 'spikes': spikes,\n 'avg': data['avg'],\n \ + \ 'unit': unit\n })\n\n return analysis\n\ndef get_summary(analysis:\ + \ dict, type: str) -> str:\n \"\"\"\n 生成异常线程报告\n :param abnormal_count:\ + \ 异常线程数量 (int)\n :param related_items: 相关项列表 (list)\n :param item_type:\ + \ 相关项类型名称 (str, default: \"threads\")\n :return: 格式化报告字符串\n \"\"\"\ + \n \n item_type = ''\n item_str = ''\n if type == 'net' or type\ + \ == 'epoll':\n pods = analysis.get(\"related\", [])\n if\ + \ len(pods) > 0 :\n pods_str = \", \".join(pods)\n \ + \ item_str += f\"{pods_str}. The RTT raised about 20% than 0.05s over 20%\ + \ time samples. \"\n else:\n item_str = 'The RTT is normal.'\n\ + \ else:\n item_type = 'threads'\n\n threads = analysis.get(\"\ + related\", [])\n threads_str = \", \".join(threads)\n if len(threads_str)\ + \ > 0:\n item_str += threads_str + \". \"\n\n related_str\ + \ = ''\n if len(item_str) > 0:\n if type == 'net' or type == 'epoll':\n\ + \ related_str = item_str\n else:\n related_str\ + \ = f\"The most related {item_type} are {item_str} \"\n \n return\ + \ f\"There are {len(analysis['abnormalAnalysis'])} abnormal threads. \"\ + \ + related_str\n " + code_language: python3 + desc: '' + isInIteration: true + iteration_id: '1741497176064' + outputs: + result: + children: null + type: string + selected: false + title: it. epoll analyze + type: code + variables: + - value_selector: + - '1744206286177' + - result + variable: data_json + - value_selector: + - '1744342068305' + - result + variable: proof_json + height: 54 + id: '1744342426374' + parentId: '1741497176064' + position: + x: 2092.4007677668633 + y: 673.2187341513443 + positionAbsolute: + x: 7573.400767766863 + y: 1577.2187341513443 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='cpu')\ + \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ + \ \n analysis = analyze_abnormal_data(data, proof_data, type)\n \ + \ summary = get_summary(analysis=analysis, type=type)\n\n analysis['summary']\ + \ = summary\n return {\n \"result\": json.dumps(analysis)\n \ + \ }\n\ndef analyze_abnormal_data(abnormal_data, proof_data, type: str)\ + \ -> dict:\n if len(abnormal_data) == 0:\n return {}\n \n \ + \ if type == 'net' or type == 'epoll':\n return net_analyze(abnormal_data=abnormal_data,\ + \ proof_data=proof_data)\n elif type == 'runq':\n return runq_analyze(abnormal_data=abnormal_data)\n\ + \ else:\n return consistent_analyze(abnormal_data=abnormal_data,\ + \ proof_data=proof_data)\n\n\ndef consistent_analyze(abnormal_data: list,\ + \ proof_data: dict) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ + \n timeseries = proof_data.get('data', {}).get('timeseries', [])\n \ + \ proof_chart = {}\n if len(timeseries) > 0:\n proof_chart =\ + \ timeseries[0].get('chart', {}).get('chartData', {})\n related_threads\ + \ = find_similar_charts(abnormal_data=abnormal_data, proof_chart=proof_chart,\ + \ id_key='tid') \n \n return {\n 'abnormalAnalysis': analysis,\n\ + \ 'related': related_threads\n }\n\ndef find_similar_charts(abnormal_data,\ + \ proof_chart, id_key: str, threshold=0.8):\n if not abnormal_data or\ + \ not proof_chart:\n return []\n\n def cosine_sim(a, b):\n \ + \ dot_product = sum(x * y for x, y in zip(a, b))\n norm_a = sum(x**2\ + \ for x in a) ** 0.5\n norm_b = sum(y**2 for y in b) ** 0.5\n \ + \ if norm_a == 0 or norm_b == 0:\n return 0.0\n return\ + \ dot_product / (norm_a * norm_b)\n\n similar = []\n for data in abnormal_data:\n\ + \ chart = data['chart']\n tid = data[id_key]\n \n \ + \ common_ts = set(chart.keys()) & set(proof_chart.keys())\n \ + \ if not common_ts:\n continue\n \n sorted_ts\ + \ = sorted(common_ts)\n a = [chart[ts] for ts in sorted_ts]\n \ + \ b = [proof_chart[ts] for ts in sorted_ts]\n \n similarity\ + \ = cosine_sim(a, b)\n if similarity >= threshold:\n similar.append((tid,\ + \ similarity))\n \n similar.sort(key=lambda x: x[1], reverse=True)\n\ + \ return [item[0] for item in similar]\n\ndef net_analyze(abnormal_data:\ + \ list, proof_data: list) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ + \n related = []\n for data in abnormal_data:\n chart = data.get('chart',\ + \ {})\n\n tid = data.get('tid', '')\n if tid == '':\n \ + \ continue\n\n # find which downstream pod is related to this\ + \ thread\n res = find_similar_charts(proof_data, chart, id_key='dst_pod')\n\ + \n related.append({\n 'tid': tid,\n 'relatedItems':\ + \ res,\n })\n \n return {\n 'abnormalAnalysis': analysis,\n\ + \ 'related': related\n }\n\ndef runq_analyze(abnormal_data: list)\ + \ -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ + \n return {\n 'abnormalAnalysis': analysis,\n }\n\ndef get_analysis_data(abnormal_data):\n\ + \ \"\"\"Get abnormal count, chart, spikes, avg\"\"\"\n analysis =\ + \ []\n\n for data in abnormal_data:\n chart = data['chart']\n\ + \ abnormal_count = data['abnormalCount']\n unit = data['unit']\n\ + \ \n values = sorted(chart.values(), reverse=True)\n \ + \ spikes = values[:abnormal_count]\n analysis.append({\n \ + \ 'tid': data['tid'],\n 'abnormalCount': abnormal_count,\n\ + \ 'spikes': spikes,\n 'avg': data['avg'],\n \ + \ 'unit': unit\n })\n\n return analysis\n\ndef get_summary(analysis:\ + \ dict, type: str) -> str:\n \"\"\"\n 生成异常线程报告\n :param abnormal_count:\ + \ 异常线程数量 (int)\n :param related_items: 相关项列表 (list)\n :param item_type:\ + \ 相关项类型名称 (str, default: \"threads\")\n :return: 格式化报告字符串\n \"\"\"\ + \n if analysis == {}:\n good_str = f\"Thread consumption on {type}\ + \ is normal.\"\n return 'No abnormal data was observed. ' + good_str\n\ + \ \n item_type = ''\n item_str = ''\n if type == 'net' or type\ + \ == 'epoll':\n item_type = 'downstream pods'\n for item in\ + \ analysis.get(\"related\", []):\n tid = item.get(\"tid\", \"\ + \")\n if tid == '':\n continue\n\n \ + \ pods = item.get(\"relatedItems\", [])\n pods = item.get(\"\ + relatedItems\", [])\n if len(pods) == 0 :\n continue\n\ + \ \n pods_str = \", \".join(pods)\n item_str +=\ + \ f\"thread {tid}: affected by {pods_str}. The RTT between them raised about\ + \ 20% over 20% time samples.\"\n \n \n else:\n\ + \ item_type = 'threads'\n\n threads = analysis.get(\"related\"\ + , [])\n threads_str = \", \".join(threads)\n if len(threads_str)\ + \ > 0:\n item_str += threads_str + \". \"\n\n related_str\ + \ = ''\n if len(item_str) > 0:\n related_str = f\"The most related\ + \ {item_type} are {item_str}. \"\n return f\"There are {len(analysis['abnormalAnalysis'])}\ + \ abnormal threads. \" + related_str\n " + code_language: python3 + desc: '' + isInIteration: true + iteration_id: '1741497176064' + outputs: + result: + children: null + type: string + selected: false + title: it. runq analyze + type: code + variables: + - value_selector: + - '1744206314118' + - result + variable: data_json + - value_selector: + - '17430610756270' + - text + variable: proof_json + height: 54 + id: '1744342478278' + parentId: '1741497176064' + position: + x: 1768.3401312758333 + y: 804.853363697991 + positionAbsolute: + x: 7249.340131275833 + y: 1708.853363697991 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ + \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ + \ f\"# {analysis_type} Consumption Analysis Report\\n\",\n \"## Executive\ + \ Summary\",\n f\"The summary of {analysis_type} thread consumption\ + \ analysis.\\n\"\n ]\n \n if 'abnormalAnalysis' in data and data['abnormalAnalysis']:\n\ + \ markdown.append(\"## Abnormal Thread Details\")\n for analysis\ + \ in data['abnormalAnalysis']:\n unit = analysis['unit']\n \ + \ markdown.extend([\n f\"### Thread {analysis['tid']}\"\ + ,\n f\"- ​**​Abnormal data count​**​: {analysis['abnormalCount']}\"\ + ,\n f\"- ​**​Spike values​**​: ({unit})\",\n \ + \ *[f\" - {format_number(spike)}{unit}\" for spike in sorted(analysis['spikes'],\ + \ reverse=True)],\n f\"- ​**​Average consumption​**​: {format_number(round(analysis['avg'],\ + \ 2))}{unit}\",\n ])\n \n markdown.append(\"\ + \\n---\\n\") \n \n if 'summary' in data:\n markdown.extend([\n\ + \ \"## Final Conclusion\",\n f\"{data['summary']}\"\ + ,\n ])\n \n return \"\\n\".join(markdown)\n\ndef main(data_json:\ + \ str) -> dict:\n data = json.loads(data_json)\n markdown = json_to_markdown(data,\ + \ 'runq')\n return {\"result\": markdown}" + code_language: python3 + desc: '' + isInIteration: true + iteration_id: '1741497176064' + outputs: + result: + children: null + type: string + selected: false + title: it. to template runq + type: code + variables: + - value_selector: + - '1744342478278' + - result + variable: data_json + height: 54 + id: '1744342609856' + parentId: '1741497176064' + position: + x: 2075.340131275834 + y: 804.853363697991 + positionAbsolute: + x: 7556.340131275834 + y: 1708.853363697991 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ + \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ + \ f\"# {analysis_type} Consumption Analysis Report\\n\",\n \"## Executive\ + \ Summary\",\n f\"The summary of {analysis_type} thread consumption\ + \ analysis.\\n\"\n ]\n \n if 'abnormalAnalysis' in data and data['abnormalAnalysis']:\n\ + \ markdown.append(\"## Abnormal Thread Details\")\n for analysis\ + \ in data['abnormalAnalysis']:\n unit = analysis['unit']\n \ + \ markdown.extend([\n f\"### Thread {analysis['tid']}\"\ + ,\n f\"- ​**​Abnormal data count​**​: {analysis['abnormalCount']}\"\ + ,\n f\"- ​**​Spike values​**​: ({unit})\",\n \ + \ *[f\" - {format_number(spike)}{unit}\" for spike in sorted(analysis['spikes'],\ + \ reverse=True)],\n f\"- ​**​Average consumption​**​: {format_number(round(analysis['avg'],\ + \ 2))}{unit}\",\n ])\n \n markdown.append(\"\ + \\n---\\n\") \n \n if 'summary' in data:\n markdown.extend([\n\ + \ \"## Final Conclusion\",\n f\"{data['summary']}\"\ + ,\n ])\n \n return \"\\n\".join(markdown)\n\ndef main(data_json:\ + \ str) -> dict:\n data = json.loads(data_json)\n markdown = json_to_markdown(data,\ + \ 'net')\n return {\"result\": markdown}" + code_language: python3 + desc: '' + isInIteration: true + iteration_id: '1741497176064' + outputs: + result: + children: null + type: string + selected: false + title: it. to template epoll + type: code + variables: + - value_selector: + - '1744342426374' + - result + variable: data_json + height: 54 + id: '1744342800777' + parentId: '1741497176064' + position: + x: 2396.2516631873705 + y: 671.1408594726702 + positionAbsolute: + x: 7877.2516631873705 + y: 1575.1408594726702 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ + \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ + \ f\"# {analysis_type} Consumption Analysis Report\\n\",\n \"## Executive\ + \ Summary\",\n f\"The summary of {analysis_type} thread consumption\ + \ analysis.\\n\"\n ]\n \n if 'abnormalAnalysis' in data and data['abnormalAnalysis']:\n\ + \ markdown.append(\"## Abnormal Thread Details\")\n for analysis\ + \ in data['abnormalAnalysis']:\n unit = analysis['unit']\n \ + \ markdown.extend([\n f\"### Thread {analysis['tid']}\"\ + ,\n f\"- ​**​Abnormal data count​**​: {analysis['abnormalCount']}\"\ + ,\n f\"- ​**​Spike values​**​: ({unit})\",\n \ + \ *[f\" - {format_number(spike)}{unit}\" for spike in sorted(analysis['spikes'],\ + \ reverse=True)],\n f\"- ​**​Average consumption​**​: {format_number(round(analysis['avg'],\ + \ 2))}{unit}\",\n ])\n \n markdown.append(\"\ + \\n---\\n\") \n \n if 'summary' in data:\n markdown.extend([\n\ + \ \"## Final Conclusion\",\n f\"{data['summary']}\"\ + ,\n ])\n \n return \"\\n\".join(markdown)\n\ndef main(data_json:\ + \ str) -> dict:\n data = json.loads(data_json)\n markdown = json_to_markdown(data,\ + \ 'net')\n return {\"result\": markdown}" + code_language: python3 + desc: '' + isInIteration: true + iteration_id: '1741497176064' + outputs: + result: + children: null + type: string + selected: false + title: it. to template net + type: code + variables: + - value_selector: + - '1744342372882' + - result + variable: data_json + height: 54 + id: '1744342846753' + parentId: '1741497176064' + position: + x: 2397.0575190597137 + y: 576.2913937324586 + positionAbsolute: + x: 7878.057519059714 + y: 1480.2913937324586 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ + \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ + \ f\"# {analysis_type} Consumption Analysis Report\\n\",\n \"## Executive\ + \ Summary\",\n f\"The summary of {analysis_type} thread consumption\ + \ analysis.\\n\"\n ]\n \n if 'abnormalAnalysis' in data and data['abnormalAnalysis']:\n\ + \ markdown.append(\"## Abnormal Thread Details\")\n for analysis\ + \ in data['abnormalAnalysis']:\n unit = analysis['unit']\n \ + \ markdown.extend([\n f\"### Thread {analysis['tid']}\"\ + ,\n f\"- ​**​Abnormal data count​**​: {analysis['abnormalCount']}\"\ + ,\n f\"- ​**​Spike values​**​: ({unit})\",\n \ + \ *[f\" - {format_number(spike)}{unit}\" for spike in sorted(analysis['spikes'],\ + \ reverse=True)],\n f\"- ​**​Average consumption​**​: {format_number(round(analysis['avg'],\ + \ 2))}{unit}\",\n ])\n \n markdown.append(\"\ + \\n---\\n\") \n \n if 'summary' in data:\n markdown.extend([\n\ + \ \"## Final Conclusion\",\n f\"{data['summary']}\"\ + ,\n ])\n \n return \"\\n\".join(markdown)\n\ndef main(data_json:\ + \ str) -> dict:\n data = json.loads(data_json)\n markdown = json_to_markdown(data,\ + \ 'net')\n return {\"result\": markdown}" + code_language: python3 + desc: '' + isInIteration: true + iteration_id: '1741497176064' + outputs: + result: + children: null + type: string + selected: false + title: it. to template file + type: code + variables: + - value_selector: + - '1744342309386' + - result + variable: data_json + height: 54 + id: '1744342890395' + parentId: '1741497176064' + position: + x: 2298.297497136483 + y: 475.73311174167225 + positionAbsolute: + x: 7779.297497136483 + y: 1379.7331117416722 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ + \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ + \ f\"# {analysis_type} Consumption Analysis Report\\n\",\n \"## Executive\ + \ Summary\",\n f\"The summary of {analysis_type} thread consumption\ + \ analysis.\\n\"\n ]\n \n if 'abnormalAnalysis' in data and data['abnormalAnalysis']:\n\ + \ markdown.append(\"## Abnormal Thread Details\")\n for analysis\ + \ in data['abnormalAnalysis']:\n unit = analysis['unit']\n \ + \ markdown.extend([\n f\"### Thread {analysis['tid']}\"\ + ,\n f\"- ​**​Abnormal data count​**​: {analysis['abnormalCount']}\"\ + ,\n f\"- ​**​Spike values​**​: ({unit})\",\n \ + \ *[f\" - {format_number(spike)}{unit}\" for spike in sorted(analysis['spikes'],\ + \ reverse=True)],\n f\"- ​**​Average consumption​**​: {format_number(round(analysis['avg'],\ + \ 2))}{unit}\",\n ])\n \n markdown.append(\"\ + \\n---\\n\") \n \n if 'summary' in data:\n markdown.extend([\n\ + \ \"## Final Conclusion\",\n f\"{data['summary']}\"\ + ,\n ])\n \n return \"\\n\".join(markdown)\n\ndef main(data_json:\ + \ str) -> dict:\n data = json.loads(data_json)\n markdown = json_to_markdown(data,\ + \ 'net')\n return {\"result\": markdown}" + code_language: python3 + desc: '' + isInIteration: true + iteration_id: '1741497176064' + outputs: + result: + children: null + type: string + selected: false + title: it. to template cpu + type: code + variables: + - value_selector: + - '1744342244843' + - result + variable: data_json + height: 54 + id: '1744342920172' + parentId: '1741497176064' + position: + x: 2291.073286368818 + y: 355.5973351437458 + positionAbsolute: + x: 7772.073286368818 + y: 1259.5973351437458 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + desc: '' + is_team_authorization: true + output_schema: null + paramSchemas: + - auto_generate: null + default: null + form: llm + human_description: + en_US: Query condition(clickhouse where clause). + ja_JP: Query condition(clickhouse where clause). + pt_BR: Query condition(clickhouse where clause). + zh_Hans: 查询条件(clickhouse where子句) + label: + en_US: Query condition(clickhouse where clause). + ja_JP: Query condition(clickhouse where clause). + pt_BR: Query condition(clickhouse where clause). + zh_Hans: 查询条件(clickhouse where子句) + llm_description: Query condition(clickhouse where clause). + max: null + min: null + name: query + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: Start timestamp in microseconds + ja_JP: Start timestamp in microseconds + pt_BR: Start timestamp in microseconds + zh_Hans: 查询开始时间(微秒) + label: + en_US: Start Time + ja_JP: Start Time + pt_BR: Start Time + zh_Hans: 开始时间 + llm_description: Microsecond timestamp for start time + max: null + min: null + name: startTime + options: [] + placeholder: null + precision: null + required: true + scope: null + template: null + type: number + - auto_generate: null + default: null + form: llm + human_description: + en_US: End timestamp in microseconds + ja_JP: End timestamp in microseconds + pt_BR: End timestamp in microseconds + zh_Hans: 查询结束时间(微秒) + label: + en_US: End Time + ja_JP: End Time + pt_BR: End Time + zh_Hans: 结束时间 + llm_description: Microsecond timestamp for end time + max: null + min: null + name: endTime + options: [] + placeholder: null + precision: null + required: true + scope: null + template: null + type: number + params: + endTime: '' + query: '' + startTime: '' + provider_id: apo_select + provider_name: apo_select + provider_type: builtin + selected: false + title: 查询全量日志 + tool_configurations: {} + tool_label: 查询全量日志 + tool_name: 查询全量日志 + tool_parameters: + endTime: + type: variable + value: + - '1741227526517' + - endTime + pageNum: + type: constant + value: 1 + pageSize: + type: constant + value: 999 + query: + type: mixed + value: '{{#1744878352649.query#}}' + startTime: + type: variable + value: + - '1741227526517' + - startTime + type: tool + height: 54 + id: '1744878296971' + position: + x: 3363 + y: 1400 + positionAbsolute: + x: 3363 + y: 1400 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + desc: '' + is_team_authorization: true + output_schema: null + paramSchemas: + - auto_generate: null + default: null + form: llm + human_description: + en_US: service name + ja_JP: service name + pt_BR: service name + zh_Hans: 服务名 + label: + en_US: service + ja_JP: service + pt_BR: service + zh_Hans: service + llm_description: service name + max: null + min: null + name: service + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: namespace + ja_JP: namespace + pt_BR: namespace + zh_Hans: 命名空间 + label: + en_US: namespace + ja_JP: namespace + pt_BR: namespace + zh_Hans: namespace + llm_description: namespace + max: null + min: null + name: namespace + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: endpoint name + ja_JP: endpoint name + pt_BR: endpoint name + zh_Hans: 服务端点 + label: + en_US: endpoint + ja_JP: endpoint + pt_BR: endpoint + zh_Hans: endpoint + llm_description: endpoint name + max: null + min: null + name: endpoint + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: instance name + ja_JP: instance name + pt_BR: instance name + zh_Hans: 实例名 + label: + en_US: instance + ja_JP: instance + pt_BR: instance + zh_Hans: instance + llm_description: instance name + max: null + min: null + name: instance + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: node name + ja_JP: node name + pt_BR: node name + zh_Hans: 服务端点 + label: + en_US: nodeName + ja_JP: nodeName + pt_BR: nodeName + zh_Hans: 主机名 + llm_description: node name + max: null + min: null + name: nodeName + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: containerId + ja_JP: containerId + pt_BR: containerId + zh_Hans: 服务端点 + label: + en_US: containerId + ja_JP: containerId + pt_BR: containerId + zh_Hans: 容器id + llm_description: node name + max: null + min: null + name: containerId + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: process id + ja_JP: process id + pt_BR: process id + zh_Hans: 进程id + label: + en_US: pid + ja_JP: pid + pt_BR: pid + zh_Hans: 进程id + llm_description: process id + max: null + min: null + name: pid + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: trace id + ja_JP: trace id + pt_BR: trace id + zh_Hans: trace id + label: + en_US: traceId + ja_JP: traceId + pt_BR: traceId + zh_Hans: traceId + llm_description: trace id + max: null + min: null + name: traceId + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: page number + ja_JP: page number + pt_BR: page number + zh_Hans: 页码 + label: + en_US: pageNum + ja_JP: pageNum + pt_BR: pageNum + zh_Hans: 页码 + llm_description: page number + max: null + min: null + name: pageNum + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: number + - auto_generate: null + default: null + form: llm + human_description: + en_US: page size + ja_JP: page size + pt_BR: page size + zh_Hans: 每页条数 + label: + en_US: pageSize + ja_JP: pageSize + pt_BR: pageSize + zh_Hans: 每页条数 + llm_description: page size + max: null + min: null + name: pageSize + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: number + - auto_generate: null + default: null + form: llm + human_description: + en_US: Data query start time(Microsecond) + ja_JP: Data query start time(Microsecond) + pt_BR: Data query start time(Microsecond) + zh_Hans: 开始时间 (微秒) + label: + en_US: startTime + ja_JP: startTime + pt_BR: startTime + zh_Hans: startTime + llm_description: Data query start time(Microsecond) + max: null + min: null + name: startTime + options: [] + placeholder: null + precision: null + required: true + scope: null + template: null + type: number + - auto_generate: null + default: null + form: llm + human_description: + en_US: Data query end time(Microsecond) + ja_JP: Data query end time(Microsecond) + pt_BR: Data query end time(Microsecond) + zh_Hans: 结束时间 (微秒) + label: + en_US: endTime + ja_JP: endTime + pt_BR: endTime + zh_Hans: endTime + llm_description: Data query end time(Microsecond) + max: null + min: null + name: endTime + options: [] + placeholder: null + precision: null + required: true + scope: null + template: null + type: number + params: + containerId: '' + endTime: '' + endpoint: '' + instance: '' + namespace: '' + nodeName: '' + pageNum: '' + pageSize: '' + pid: '' + service: '' + startTime: '' + traceId: '' + provider_id: apo_select + provider_name: apo_select + provider_type: builtin + selected: false + title: 查询链路数据 + tool_configurations: {} + tool_label: 查询链路数据 + tool_name: 查询链路数据 + tool_parameters: + endTime: + type: variable + value: + - '1741227526517' + - endTime + instance: + type: mixed + value: '{{#1742807803325.pod#}}' + namespace: + type: mixed + value: '' + pageNum: + type: constant + value: 1 + pageSize: + type: constant + value: 100 + service: + type: mixed + value: '{{#1742807803325.service#}}' + startTime: + type: variable + value: + - '1741227526517' + - startTime + type: tool + height: 54 + id: '1744878298576' + position: + x: 2454 + y: 1272 + positionAbsolute: + x: 2454 + y: 1272 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + code: "\ndef main(pod: str, service: str) -> dict:\n query = ''\n if\ + \ len(pod) > 0 and len(service) > 0:\n query = f\"\"\"k8s_pod_name='{pod}'\ + \ and container_name='{service}'\"\"\"\n elif len(pod) > 0:\n \ + \ query = f\"\"\"k8s_pod_name='{pod}'\"\"\"\n elif len(service) > 0:\n\ + \ query = f\"\"\"container_name='{service}'\"\"\"\n\n return {\n\ + \ \"query\": query,\n }" + code_language: python3 + desc: '' outputs: - result: + query: children: null type: string selected: false - title: it. cpu analyze + title: 获取查询条件 type: code variables: - value_selector: - - '1744206107700' - - result - variable: data_json + - '1742807803325' + - pod + variable: pod - value_selector: - - '1742980228913' - - text - variable: proof_json - height: 53 - id: '1744342244843' - parentId: '1741497176064' + - '1742807803325' + - service + variable: service + height: 54 + id: '1744878352649' position: - x: 1791.8192785924111 - y: 371.13327676062727 + x: 3060 + y: 1400 positionAbsolute: - x: 7054.275555872496 - y: 1287.1332767606273 + x: 3060 + y: 1400 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 + width: 244 - data: - code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='file')\ - \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ - \ \n analysis = analyze_abnormal_data(data, proof_data, type)\n \ - \ summary = get_summary(analysis=analysis, type=type)\n\n analysis['summary']\ - \ = summary\n return {\n \"result\": json.dumps(analysis)\n \ - \ }\n\ndef analyze_abnormal_data(abnormal_data, proof_data, type: str)\ - \ -> dict:\n if len(abnormal_data) == 0:\n return {}\n \n \ - \ if type == 'net' or type == 'epoll':\n return net_analyze(abnormal_data=abnormal_data,\ - \ proof_data=proof_data)\n elif type == 'runq':\n return runq_analyze(abnormal_data=abnormal_data)\n\ - \ else:\n return consistent_analyze(abnormal_data=abnormal_data,\ - \ proof_data=proof_data)\n\n\ndef consistent_analyze(abnormal_data: list,\ - \ proof_data: dict) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ - \n timeseries = proof_data.get('data', {}).get('timeseries', [])\n \ - \ proof_chart = {}\n if len(timeseries) > 0:\n proof_chart =\ - \ timeseries[0].get('chart', {}).get('chartData', {})\n related_threads\ - \ = find_similar_charts(abnormal_data=abnormal_data, proof_chart=proof_chart,\ - \ id_key='tid') \n \n return {\n 'abnormalAnalysis': analysis,\n\ - \ 'related': related_threads\n }\n\ndef find_similar_charts(abnormal_data,\ - \ proof_chart, id_key: str, threshold=0.8):\n if not abnormal_data or\ - \ not proof_chart:\n return []\n\n def cosine_sim(a, b):\n \ - \ dot_product = sum(x * y for x, y in zip(a, b))\n norm_a = sum(x**2\ - \ for x in a) ** 0.5\n norm_b = sum(y**2 for y in b) ** 0.5\n \ - \ if norm_a == 0 or norm_b == 0:\n return 0.0\n return\ - \ dot_product / (norm_a * norm_b)\n\n similar = []\n for data in abnormal_data:\n\ - \ chart = data['chart']\n tid = data[id_key]\n \n \ - \ common_ts = set(chart.keys()) & set(proof_chart.keys())\n \ - \ if not common_ts:\n continue\n \n sorted_ts\ - \ = sorted(common_ts)\n a = [chart[ts] for ts in sorted_ts]\n \ - \ b = [proof_chart[ts] for ts in sorted_ts]\n \n similarity\ - \ = cosine_sim(a, b)\n if similarity >= threshold:\n similar.append((tid,\ - \ similarity))\n \n similar.sort(key=lambda x: x[1], reverse=True)\n\ - \ return [item[0] for item in similar]\n\ndef net_analyze(abnormal_data:\ - \ list, proof_data: list) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ - \n related = []\n for data in abnormal_data:\n chart = data.get('chart',\ - \ {})\n\n tid = data.get('tid', '')\n if tid == '':\n \ - \ continue\n\n # find which downstream pod is related to this\ - \ thread\n res = find_similar_charts(proof_data, chart, id_key='dst_pod')\n\ - \n related.append({\n 'tid': tid,\n 'relatedItems':\ - \ res,\n })\n \n return {\n 'abnormalAnalysis': analysis,\n\ - \ 'related': related\n }\n\ndef runq_analyze(abnormal_data: list)\ - \ -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ - \n return {\n 'abnormalAnalysis': analysis,\n }\n\ndef get_analysis_data(abnormal_data):\n\ - \ \"\"\"Get abnormal count, chart, spikes, avg\"\"\"\n analysis =\ - \ []\n\n for data in abnormal_data:\n chart = data['chart']\n\ - \ abnormal_count = data['abnormalCount']\n unit = data['unit']\n\ - \ \n values = sorted(chart.values(), reverse=True)\n \ - \ spikes = values[:abnormal_count]\n analysis.append({\n \ - \ 'tid': data['tid'],\n 'abnormalCount': abnormal_count,\n\ - \ 'spikes': spikes,\n 'avg': data['avg'],\n \ - \ 'unit': unit\n })\n\n return analysis\n\ndef get_summary(analysis:\ - \ dict, type: str) -> str:\n \"\"\"\n 生成异常线程报告\n :param abnormal_count:\ - \ 异常线程数量 (int)\n :param related_items: 相关项列表 (list)\n :param item_type:\ - \ 相关项类型名称 (str, default: \"threads\")\n :return: 格式化报告字符串\n \"\"\"\ - \n if analysis == {}:\n good_str = f\"Thread consumption on {type}\ - \ is normal.\"\n return 'No abnormal data was observed. ' + good_str\n\ - \ \n item_type = ''\n item_str = ''\n if type == 'net' or type\ - \ == 'epoll':\n item_type = 'downstream pods'\n for item in\ - \ analysis.get(\"related\", []):\n tid = item.get(\"tid\", \"\ - \")\n if tid == '':\n continue\n\n \ - \ pods = item.get(\"relatedItems\", [])\n pods = item.get(\"\ - relatedItems\", [])\n if len(pods) == 0 :\n continue\n\ - \ \n pods_str = \", \".join(pods)\n item_str +=\ - \ f\"thread {tid}: affected by {pods_str}. The RTT between them raised about\ - \ 20% over 20% time samples.\"\n \n \n else:\n\ - \ item_type = 'threads'\n\n threads = analysis.get(\"related\"\ - , [])\n threads_str = \", \".join(threads)\n if len(threads_str)\ - \ > 0:\n item_str += threads_str + \". \"\n\n related_str\ - \ = ''\n if len(item_str) > 0:\n related_str = f\"The most related\ - \ {item_type} are {item_str}. \"\n return f\"There are {len(analysis['abnormalAnalysis'])}\ - \ abnormal threads. \" + related_str\n " + context: + enabled: false + variable_selector: [] + desc: '' + model: + completion_params: + temperature: 0.7 + mode: chat + name: deepseek-chat + provider: langgenius/deepseek/deepseek + prompt_template: + - id: 615ece65-9193-4bad-a375-d51b67dbaafc + role: system + text: 你是一位可观测性领域智能助手,帮助用户分析解决问题。 + - id: cc52b45f-d678-4d55-98d1-7ed48f7b4091 + role: user + text: '# 目的 + + 根据异常pod,结合相关日志,分析告警{{#1742807803325.alertName#}} + + 产生的原因和对{{#1742807803325.service#}} + + 或{{#1742807803325.pod#}} + + 的影响。最后结合日志和常见排查思路为用户提出解决方案。 + + + + # 数据 + + 数据包括: + + 1. 对应时间内的error日志{{#1744901266309.logs#}} + + + # 分析思路 + + 结合异常pods和日志,找到出错pods和对应的错误内容,分析错误原因。如果没有异常日志则提示用户主动查看相关时间内的日志内容,并提醒用户配置日志解析规则。 + + + # 注意事项 + + - 时间单位均为微秒 + + + # 输出内容 + + 告警{{#1742807803325.alertName#}} + + 产生的原因是xx,对 + + {{#1742807803325.pod#}}或{{#1742807803325.service#}} + + 的影响是:哪些pods出错,具体日志为xx。 + + + 解决方案:xx' + selected: false + title: 总结日志错误 + type: llm + variables: [] + vision: + enabled: false + height: 90 + id: '1744878372490' + position: + x: 3969 + y: 1382 + positionAbsolute: + x: 3969 + y: 1382 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + code: "import json\n\ndef main(data_json: str):\n data = json.loads(data_json).get('data',\ + \ {}).get('list', [])\n\n error_instances = []\n for trace in data:\ + \ \n if not trace.get(\"flags\", {}).get('is_error', False):\n \ + \ continue\n\n error_instances.append({\n 'endpoint':\ + \ trace.get('endpoint'),\n 'instance': trace.get('instanceId'),\n\ + \ 'tid': trace.get('tid'),\n 'service': trace.get('service')\n\ + \ })\n error_instances = error_instances[:30]\n return {\n\ + \ 'errorInstances': error_instances,\n }\n" code_language: python3 desc: '' - isInIteration: true - iteration_id: '1741497176064' outputs: - result: + errorInstances: children: null - type: string + type: array[object] selected: false - title: it. file analyze + title: 提取故障链路相关端点 type: code variables: - value_selector: - - '1744206199211' - - result - variable: data_json - - value_selector: - - '1742980318484' + - '1744878298576' - text - variable: proof_json - height: 53 - id: '1744342309386' - parentId: '1741497176064' + variable: data_json + height: 54 + id: '1744878389686' position: - x: 1790.2988857309501 - y: 474.870200391917 + x: 2757 + y: 1272 positionAbsolute: - x: 7052.755163011035 - y: 1390.870200391917 + x: 2757 + y: 1272 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 + width: 244 - data: - code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='net')\ - \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ - \ \n analysis = analyze_abnormal_data(data, proof_data, type)\n \ - \ summary = get_summary(analysis=analysis, type=type)\n\n analysis['summary']\ - \ = summary\n return {\n \"result\": json.dumps(analysis)\n \ - \ }\n\ndef analyze_abnormal_data(abnormal_data, proof_data, type: str)\ - \ -> dict:\n \n if type == 'net' or type == 'epoll':\n return\ - \ net_analyze(abnormal_data=abnormal_data, proof_data=proof_data)\n elif\ - \ type == 'runq':\n return runq_analyze(abnormal_data=abnormal_data)\n\ - \ else:\n return consistent_analyze(abnormal_data=abnormal_data,\ - \ proof_data=proof_data)\n\n\ndef consistent_analyze(abnormal_data: list,\ - \ proof_data: dict) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ - \n timeseries = proof_data.get('data', {}).get('timeseries', [])\n \ - \ proof_chart = {}\n if len(timeseries) > 0:\n proof_chart =\ - \ timeseries[0].get('chart', {}).get('chartData', {})\n related_threads\ - \ = find_similar_charts(abnormal_data=abnormal_data, proof_chart=proof_chart,\ - \ id_key='tid') \n \n return {\n 'abnormalAnalysis': analysis,\n\ - \ 'related': related_threads\n }\n\ndef find_similar_charts(abnormal_data,\ - \ proof_chart, id_key: str, threshold=0.8):\n if not abnormal_data or\ - \ not proof_chart:\n return []\n\n def cosine_sim(a, b):\n \ - \ dot_product = sum(x * y for x, y in zip(a, b))\n norm_a = sum(x**2\ - \ for x in a) ** 0.5\n norm_b = sum(y**2 for y in b) ** 0.5\n \ - \ if norm_a == 0 or norm_b == 0:\n return 0.0\n return\ - \ dot_product / (norm_a * norm_b)\n\n similar = []\n for data in abnormal_data:\n\ - \ chart = data['chart']\n tid = data[id_key]\n \n \ - \ common_ts = set(chart.keys()) & set(proof_chart.keys())\n \ - \ if not common_ts:\n continue\n \n sorted_ts\ - \ = sorted(common_ts)\n a = [chart[ts] for ts in sorted_ts]\n \ - \ b = [proof_chart[ts] for ts in sorted_ts]\n \n similarity\ - \ = cosine_sim(a, b)\n if similarity >= threshold:\n similar.append((tid,\ - \ similarity))\n \n similar.sort(key=lambda x: x[1], reverse=True)\n\ - \ return [item[0] for item in similar]\n\ndef net_analyze(abnormal_data:\ - \ list, proof_data: list) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ - \n related = []\n for data in proof_data:\n dst_pod = data.get('dst_pod',\ - \ '')\n if dst_pod == '':\n continue\n\n related.append(dst_pod)\n\ - \ \n return {\n 'abnormalAnalysis': analysis,\n 'related':\ - \ related\n }\n\ndef runq_analyze(abnormal_data: list) -> dict:\n \ - \ analysis = get_analysis_data(abnormal_data=abnormal_data)\n\n return\ - \ {\n 'abnormalAnalysis': analysis,\n }\n\ndef get_analysis_data(abnormal_data):\n\ - \ \"\"\"Get abnormal count, chart, spikes, avg\"\"\"\n analysis =\ - \ []\n\n for data in abnormal_data:\n chart = data['chart']\n\ - \ abnormal_count = data['abnormalCount']\n unit = data['unit']\n\ - \ \n values = sorted(chart.values(), reverse=True)\n \ - \ spikes = values[:abnormal_count]\n analysis.append({\n \ - \ 'tid': data['tid'],\n 'abnormalCount': abnormal_count,\n\ - \ 'spikes': spikes,\n 'avg': data['avg'],\n \ - \ 'unit': unit\n })\n\n return analysis\n\ndef get_summary(analysis:\ - \ dict, type: str) -> str:\n \"\"\"\n 生成异常线程报告\n :param abnormal_count:\ - \ 异常线程数量 (int)\n :param related_items: 相关项列表 (list)\n :param item_type:\ - \ 相关项类型名称 (str, default: \"threads\")\n :return: 格式化报告字符串\n \"\"\"\ - \n \n item_type = ''\n item_str = ''\n if type == 'net' or type\ - \ == 'epoll':\n pods = analysis.get(\"related\", [])\n if\ - \ len(pods) > 0 :\n pods_str = \", \".join(pods)\n \ - \ item_str += f\"{pods_str}. The RTT raised about 20% than 0.05s over 20%\ - \ time samples. \"\n else:\n item_str = 'The RTT is normal.'\n\ - \ else:\n item_type = 'threads'\n\n threads = analysis.get(\"\ - related\", [])\n threads_str = \", \".join(threads)\n if len(threads_str)\ - \ > 0:\n item_str += threads_str + \". \"\n\n related_str\ - \ = ''\n if len(item_str) > 0:\n if type == 'net' or type == 'epoll':\n\ - \ related_str = item_str\n else:\n related_str\ - \ = f\"The most related {item_type} are {item_str} \"\n \n return\ - \ f\"There are {len(analysis['abnormalAnalysis'])} abnormal threads. \"\ - \ + related_str\n " - code_language: python3 desc: '' - isInIteration: true - iteration_id: '1741497176064' - outputs: - result: - children: null - type: string + is_team_authorization: true + output_schema: null + paramSchemas: + - auto_generate: null + default: null + form: llm + human_description: + en_US: Query condition(clickhouse where clause). + ja_JP: Query condition(clickhouse where clause). + pt_BR: Query condition(clickhouse where clause). + zh_Hans: 查询条件(clickhouse where子句) + label: + en_US: Query condition(clickhouse where clause). + ja_JP: Query condition(clickhouse where clause). + pt_BR: Query condition(clickhouse where clause). + zh_Hans: 查询条件(clickhouse where子句) + llm_description: Query condition(clickhouse where clause). + max: null + min: null + name: query + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: null + form: llm + human_description: + en_US: Start timestamp in microseconds + ja_JP: Start timestamp in microseconds + pt_BR: Start timestamp in microseconds + zh_Hans: 查询开始时间(微秒) + label: + en_US: Start Time + ja_JP: Start Time + pt_BR: Start Time + zh_Hans: 开始时间 + llm_description: Microsecond timestamp for start time + max: null + min: null + name: startTime + options: [] + placeholder: null + precision: null + required: true + scope: null + template: null + type: number + - auto_generate: null + default: null + form: llm + human_description: + en_US: End timestamp in microseconds + ja_JP: End timestamp in microseconds + pt_BR: End timestamp in microseconds + zh_Hans: 查询结束时间(微秒) + label: + en_US: End Time + ja_JP: End Time + pt_BR: End Time + zh_Hans: 结束时间 + llm_description: Microsecond timestamp for end time + max: null + min: null + name: endTime + options: [] + placeholder: null + precision: null + required: true + scope: null + template: null + type: number + params: + endTime: '' + query: '' + startTime: '' + provider_id: apo_select + provider_name: apo_select + provider_type: builtin selected: false - title: it. net analyze - type: code - variables: - - value_selector: - - '1744206249528' - - result - variable: data_json - - value_selector: - - '1744342138380' - - result - variable: proof_json - height: 53 - id: '1744342372882' - parentId: '1741497176064' + title: 查询全量日志 + tool_configurations: {} + tool_label: 查询全量日志 + tool_name: 查询全量日志 + tool_parameters: + endTime: + type: variable + value: + - '1741227526517' + - endTime + pageNum: + type: constant + value: 1 + pageSize: + type: constant + value: 999 + query: + type: mixed + value: '{{#17448784268890.query#}}' + startTime: + type: variable + value: + - '1741227526517' + - startTime + type: tool + height: 54 + id: '17448784142480' position: - x: 2092.972416533212 - y: 575.9896449249493 + x: 3363 + y: 1272 positionAbsolute: - x: 7355.428693813296 - y: 1491.9896449249493 + x: 3363 + y: 1272 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 + width: 244 - data: - code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='epoll')\ - \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ - \ \n analysis = analyze_abnormal_data(data, proof_data, type)\n \ - \ summary = get_summary(analysis=analysis, type=type)\n\n analysis['summary']\ - \ = summary\n return {\n \"result\": json.dumps(analysis)\n \ - \ }\n\ndef analyze_abnormal_data(abnormal_data, proof_data, type: str)\ - \ -> dict:\n \n if type == 'net' or type == 'epoll':\n return\ - \ net_analyze(abnormal_data=abnormal_data, proof_data=proof_data)\n elif\ - \ type == 'runq':\n return runq_analyze(abnormal_data=abnormal_data)\n\ - \ else:\n return consistent_analyze(abnormal_data=abnormal_data,\ - \ proof_data=proof_data)\n\n\ndef consistent_analyze(abnormal_data: list,\ - \ proof_data: dict) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ - \n timeseries = proof_data.get('data', {}).get('timeseries', [])\n \ - \ proof_chart = {}\n if len(timeseries) > 0:\n proof_chart =\ - \ timeseries[0].get('chart', {}).get('chartData', {})\n related_threads\ - \ = find_similar_charts(abnormal_data=abnormal_data, proof_chart=proof_chart,\ - \ id_key='tid') \n \n return {\n 'abnormalAnalysis': analysis,\n\ - \ 'related': related_threads\n }\n\ndef find_similar_charts(abnormal_data,\ - \ proof_chart, id_key: str, threshold=0.8):\n if not abnormal_data or\ - \ not proof_chart:\n return []\n\n def cosine_sim(a, b):\n \ - \ dot_product = sum(x * y for x, y in zip(a, b))\n norm_a = sum(x**2\ - \ for x in a) ** 0.5\n norm_b = sum(y**2 for y in b) ** 0.5\n \ - \ if norm_a == 0 or norm_b == 0:\n return 0.0\n return\ - \ dot_product / (norm_a * norm_b)\n\n similar = []\n for data in abnormal_data:\n\ - \ chart = data['chart']\n tid = data[id_key]\n \n \ - \ common_ts = set(chart.keys()) & set(proof_chart.keys())\n \ - \ if not common_ts:\n continue\n \n sorted_ts\ - \ = sorted(common_ts)\n a = [chart[ts] for ts in sorted_ts]\n \ - \ b = [proof_chart[ts] for ts in sorted_ts]\n \n similarity\ - \ = cosine_sim(a, b)\n if similarity >= threshold:\n similar.append((tid,\ - \ similarity))\n \n similar.sort(key=lambda x: x[1], reverse=True)\n\ - \ return [item[0] for item in similar]\n\ndef net_analyze(abnormal_data:\ - \ list, proof_data: list) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ - \n related = []\n for data in proof_data:\n dst_pod = data.get('dst_pod',\ - \ '')\n if dst_pod == '':\n continue\n\n related.append(dst_pod)\n\ - \ \n return {\n 'abnormalAnalysis': analysis,\n 'related':\ - \ related\n }\n\ndef runq_analyze(abnormal_data: list) -> dict:\n \ - \ analysis = get_analysis_data(abnormal_data=abnormal_data)\n\n return\ - \ {\n 'abnormalAnalysis': analysis,\n }\n\ndef get_analysis_data(abnormal_data):\n\ - \ \"\"\"Get abnormal count, chart, spikes, avg\"\"\"\n analysis =\ - \ []\n\n for data in abnormal_data:\n chart = data['chart']\n\ - \ abnormal_count = data['abnormalCount']\n unit = data['unit']\n\ - \ \n values = sorted(chart.values(), reverse=True)\n \ - \ spikes = values[:abnormal_count]\n analysis.append({\n \ - \ 'tid': data['tid'],\n 'abnormalCount': abnormal_count,\n\ - \ 'spikes': spikes,\n 'avg': data['avg'],\n \ - \ 'unit': unit\n })\n\n return analysis\n\ndef get_summary(analysis:\ - \ dict, type: str) -> str:\n \"\"\"\n 生成异常线程报告\n :param abnormal_count:\ - \ 异常线程数量 (int)\n :param related_items: 相关项列表 (list)\n :param item_type:\ - \ 相关项类型名称 (str, default: \"threads\")\n :return: 格式化报告字符串\n \"\"\"\ - \n \n item_type = ''\n item_str = ''\n if type == 'net' or type\ - \ == 'epoll':\n pods = analysis.get(\"related\", [])\n if\ - \ len(pods) > 0 :\n pods_str = \", \".join(pods)\n \ - \ item_str += f\"{pods_str}. The RTT raised about 20% than 0.05s over 20%\ - \ time samples. \"\n else:\n item_str = 'The RTT is normal.'\n\ - \ else:\n item_type = 'threads'\n\n threads = analysis.get(\"\ - related\", [])\n threads_str = \", \".join(threads)\n if len(threads_str)\ - \ > 0:\n item_str += threads_str + \". \"\n\n related_str\ - \ = ''\n if len(item_str) > 0:\n if type == 'net' or type == 'epoll':\n\ - \ related_str = item_str\n else:\n related_str\ - \ = f\"The most related {item_type} are {item_str} \"\n \n return\ - \ f\"There are {len(analysis['abnormalAnalysis'])} abnormal threads. \"\ - \ + related_str\n " + code: "\ndef main(pod: str, service: str) -> dict:\n query = ''\n if\ + \ len(pod) > 0 and len(service) > 0:\n query = f\"\"\"k8s_pod_name='{pod}'\ + \ and container_name='{service}'\"\"\"\n elif len(pod) > 0:\n \ + \ query = f\"\"\"k8s_pod_name='{pod}'\"\"\"\n elif len(service) > 0:\n\ + \ query = f\"\"\"container_name='{service}'\"\"\"\n\n return {\n\ + \ \"query\": query,\n }" code_language: python3 desc: '' - isInIteration: true - iteration_id: '1741497176064' outputs: - result: + query: children: null type: string selected: false - title: it. epoll analyze + title: 获取查询条件 type: code variables: - value_selector: - - '1744206286177' - - result - variable: data_json + - '1742807803325' + - pod + variable: pod - value_selector: - - '1744342068305' - - result - variable: proof_json - height: 53 - id: '1744342426374' - parentId: '1741497176064' + - '1742807803325' + - service + variable: service + height: 54 + id: '17448784268890' position: - x: 2092.4007677668633 - y: 673.2187341513443 + x: 3060 + y: 1272 positionAbsolute: - x: 7354.857045046948 - y: 1589.2187341513443 + x: 3060 + y: 1272 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 + width: 244 + - data: + context: + enabled: false + variable_selector: [] + desc: '' + model: + completion_params: + temperature: 0.7 + mode: chat + name: deepseek-chat + provider: langgenius/deepseek/deepseek + prompt_template: + - id: 615ece65-9193-4bad-a375-d51b67dbaafc + role: system + text: 你是一位可观测性领域智能助手,帮助用户分析解决问题。 + - id: 1701b6b3-dcd0-4d8b-80cc-62a96aef9170 + role: user + text: '# 目的 + + 根据异常pod,结合相关日志,分析告警{{#1742807803325.alertName#}} + + 产生的原因和对{{#1742807803325.pod#}} + + 或{{#1742807803325.service#}} + + 的影响。最后结合日志和常见排查思路为用户提出解决方案。 + + + + # 数据 + + 数据包括: + + 1. 对应时间内的error日志{{#17449013347930.logs#}} + + + 2. 对应时间段内链路相关的30个endpoints,service和instances组合{{#1744878389686.errorInstances#}} + + + 3. 产生告警的service{{#1742807803325.service#}}和endpoint{{#1742807803325.endpoint#}} + + + + # 分析思路 + + 结合异常pods和日志,找到出错pods和对应的错误内容,分析错误原因。如果没有异常日志则提示用户主动查看相关时间内的日志内容,并提醒用户配置日志解析规则。 + + + + # 注意事项 + + - 时间单位均为微秒 + + + # 输出内容 + + 告警{{#1742807803325.alertName#}} + + 产生的原因是xx,对 + + {{#1742807803325.service#}}或{{#1742807803325.pod#}} + + 的影响是:哪些pods出错,具体日志为xx。 + + + 解决方案:xx' + selected: false + title: 总结请求错误 + type: llm + variables: [] + vision: + enabled: false + height: 90 + id: '17448784398000' + position: + x: 3969 + y: 1254 + positionAbsolute: + x: 3969 + y: 1254 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 - data: - code: "import json\n\n\ndef main(data_json: str, proof_json: str, type='cpu')\ - \ -> dict:\n data = json.loads(data_json)\n proof_data = json.loads(proof_json)\n\ - \ \n analysis = analyze_abnormal_data(data, proof_data, type)\n \ - \ summary = get_summary(analysis=analysis, type=type)\n\n analysis['summary']\ - \ = summary\n return {\n \"result\": json.dumps(analysis)\n \ - \ }\n\ndef analyze_abnormal_data(abnormal_data, proof_data, type: str)\ - \ -> dict:\n if len(abnormal_data) == 0:\n return {}\n \n \ - \ if type == 'net' or type == 'epoll':\n return net_analyze(abnormal_data=abnormal_data,\ - \ proof_data=proof_data)\n elif type == 'runq':\n return runq_analyze(abnormal_data=abnormal_data)\n\ - \ else:\n return consistent_analyze(abnormal_data=abnormal_data,\ - \ proof_data=proof_data)\n\n\ndef consistent_analyze(abnormal_data: list,\ - \ proof_data: dict) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ - \n timeseries = proof_data.get('data', {}).get('timeseries', [])\n \ - \ proof_chart = {}\n if len(timeseries) > 0:\n proof_chart =\ - \ timeseries[0].get('chart', {}).get('chartData', {})\n related_threads\ - \ = find_similar_charts(abnormal_data=abnormal_data, proof_chart=proof_chart,\ - \ id_key='tid') \n \n return {\n 'abnormalAnalysis': analysis,\n\ - \ 'related': related_threads\n }\n\ndef find_similar_charts(abnormal_data,\ - \ proof_chart, id_key: str, threshold=0.8):\n if not abnormal_data or\ - \ not proof_chart:\n return []\n\n def cosine_sim(a, b):\n \ - \ dot_product = sum(x * y for x, y in zip(a, b))\n norm_a = sum(x**2\ - \ for x in a) ** 0.5\n norm_b = sum(y**2 for y in b) ** 0.5\n \ - \ if norm_a == 0 or norm_b == 0:\n return 0.0\n return\ - \ dot_product / (norm_a * norm_b)\n\n similar = []\n for data in abnormal_data:\n\ - \ chart = data['chart']\n tid = data[id_key]\n \n \ - \ common_ts = set(chart.keys()) & set(proof_chart.keys())\n \ - \ if not common_ts:\n continue\n \n sorted_ts\ - \ = sorted(common_ts)\n a = [chart[ts] for ts in sorted_ts]\n \ - \ b = [proof_chart[ts] for ts in sorted_ts]\n \n similarity\ - \ = cosine_sim(a, b)\n if similarity >= threshold:\n similar.append((tid,\ - \ similarity))\n \n similar.sort(key=lambda x: x[1], reverse=True)\n\ - \ return [item[0] for item in similar]\n\ndef net_analyze(abnormal_data:\ - \ list, proof_data: list) -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ - \n related = []\n for data in abnormal_data:\n chart = data.get('chart',\ - \ {})\n\n tid = data.get('tid', '')\n if tid == '':\n \ - \ continue\n\n # find which downstream pod is related to this\ - \ thread\n res = find_similar_charts(proof_data, chart, id_key='dst_pod')\n\ - \n related.append({\n 'tid': tid,\n 'relatedItems':\ - \ res,\n })\n \n return {\n 'abnormalAnalysis': analysis,\n\ - \ 'related': related\n }\n\ndef runq_analyze(abnormal_data: list)\ - \ -> dict:\n analysis = get_analysis_data(abnormal_data=abnormal_data)\n\ - \n return {\n 'abnormalAnalysis': analysis,\n }\n\ndef get_analysis_data(abnormal_data):\n\ - \ \"\"\"Get abnormal count, chart, spikes, avg\"\"\"\n analysis =\ - \ []\n\n for data in abnormal_data:\n chart = data['chart']\n\ - \ abnormal_count = data['abnormalCount']\n unit = data['unit']\n\ - \ \n values = sorted(chart.values(), reverse=True)\n \ - \ spikes = values[:abnormal_count]\n analysis.append({\n \ - \ 'tid': data['tid'],\n 'abnormalCount': abnormal_count,\n\ - \ 'spikes': spikes,\n 'avg': data['avg'],\n \ - \ 'unit': unit\n })\n\n return analysis\n\ndef get_summary(analysis:\ - \ dict, type: str) -> str:\n \"\"\"\n 生成异常线程报告\n :param abnormal_count:\ - \ 异常线程数量 (int)\n :param related_items: 相关项列表 (list)\n :param item_type:\ - \ 相关项类型名称 (str, default: \"threads\")\n :return: 格式化报告字符串\n \"\"\"\ - \n if analysis == {}:\n good_str = f\"Thread consumption on {type}\ - \ is normal.\"\n return 'No abnormal data was observed. ' + good_str\n\ - \ \n item_type = ''\n item_str = ''\n if type == 'net' or type\ - \ == 'epoll':\n item_type = 'downstream pods'\n for item in\ - \ analysis.get(\"related\", []):\n tid = item.get(\"tid\", \"\ - \")\n if tid == '':\n continue\n\n \ - \ pods = item.get(\"relatedItems\", [])\n pods = item.get(\"\ - relatedItems\", [])\n if len(pods) == 0 :\n continue\n\ - \ \n pods_str = \", \".join(pods)\n item_str +=\ - \ f\"thread {tid}: affected by {pods_str}. The RTT between them raised about\ - \ 20% over 20% time samples.\"\n \n \n else:\n\ - \ item_type = 'threads'\n\n threads = analysis.get(\"related\"\ - , [])\n threads_str = \", \".join(threads)\n if len(threads_str)\ - \ > 0:\n item_str += threads_str + \". \"\n\n related_str\ - \ = ''\n if len(item_str) > 0:\n related_str = f\"The most related\ - \ {item_type} are {item_str}. \"\n return f\"There are {len(analysis['abnormalAnalysis'])}\ - \ abnormal threads. \" + related_str\n " - code_language: python3 desc: '' - isInIteration: true - iteration_id: '1741497176064' - outputs: - result: - children: null - type: string + output_type: string selected: false - title: it. runq analyze - type: code + title: 变量聚合器 + type: variable-aggregator variables: - - value_selector: - - '1744206314118' - - result - variable: data_json - - value_selector: - - '17430610756270' + - - '1744878372490' - text - variable: proof_json - height: 53 - id: '1744342478278' - parentId: '1741497176064' + - - '17448784398000' + - text + height: 130 + id: '1744878466157' position: - x: 1768.3401312758333 - y: 804.853363697991 + x: 4272 + y: 1233.5 positionAbsolute: - x: 7030.796408555918 - y: 1720.853363697991 + x: 4272 + y: 1233.5 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 + width: 244 - data: - code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ - \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ - \ f\"# {analysis_type} Consumption Analysis Report\\n\",\n \"## Executive\ - \ Summary\",\n f\"The summary of {analysis_type} thread consumption\ - \ analysis.\\n\"\n ]\n \n if 'abnormalAnalysis' in data and data['abnormalAnalysis']:\n\ - \ markdown.append(\"## Abnormal Thread Details\")\n for analysis\ - \ in data['abnormalAnalysis']:\n unit = analysis['unit']\n \ - \ markdown.extend([\n f\"### Thread {analysis['tid']}\"\ - ,\n f\"- ​**​Abnormal data count​**​: {analysis['abnormalCount']}\"\ - ,\n f\"- ​**​Spike values​**​: ({unit})\",\n \ - \ *[f\" - {format_number(spike)}{unit}\" for spike in sorted(analysis['spikes'],\ - \ reverse=True)],\n f\"- ​**​Average consumption​**​: {format_number(round(analysis['avg'],\ - \ 2))}{unit}\",\n ])\n \n markdown.append(\"\ - \\n---\\n\") \n \n if 'summary' in data:\n markdown.extend([\n\ - \ \"## Final Conclusion\",\n f\"{data['summary']}\"\ - ,\n ])\n \n return \"\\n\".join(markdown)\n\ndef main(data_json:\ - \ str) -> dict:\n data = json.loads(data_json)\n markdown = json_to_markdown(data,\ - \ 'runq')\n return {\"result\": markdown}" + desc: '' + outputs: + - value_selector: + - '1744878466157' + - output + variable: output + selected: false + title: 结束 + type: end + height: 90 + id: '1744878476249' + position: + x: 4575 + y: 1272 + positionAbsolute: + x: 4575 + y: 1272 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + code: "import re\nfrom typing import List\nimport json\n\nERROR_PATTERNS:\ + \ List[str] = [\n r\"\\bERROR\\b\",\n r\"\\bERR\\b\",\n r\"\\bEXCEPTION\\\ + b\",\n r\"\\bPANIC\\b\",\n r\"\\bFAIL(?:ED)?\\b\",\n r\"\\bWRONG\\\ + b\",\n r\"\\bFATAL\\b\",\n r\"\\b500\\b\",\n r\"\\b5\\d{2}\\b\"\ + ,\n]\n\nERROR_REGEXES = [re.compile(pat, re.IGNORECASE) for pat in ERROR_PATTERNS]\n\ + \ndef is_error_log(line: str) -> bool:\n if not isinstance(line, (str,\ + \ bytes)):\n return False\n for regex in ERROR_REGEXES:\n \ + \ if regex.search(line):\n return True\n return False\n\ + \ndef main(data_json: str) -> dict:\n logs = json.loads(data_json).get(\"\ + data\", {}).get(\"logs\", [])\n\n error_logs = []\n simplified_logs\ + \ = []\n for log in logs:\n source = log.get(\"tags\", {}).get(\"\ + source\", \"\")\n level = log.get(\"logFields\", {}).get(\"level\"\ + , \"\")\n content = log.get(\"content\", \"\")\n pod = log.get(\"\ + k8s_pod_name\", \"\")\n timestamp = log.get(\"timestamp\")\n\n \ + \ simplified_log = {\n \"content\": content,\n \ + \ \"level\": level,\n \"pod\": pod,\n \"timestamp\"\ + : timestamp,\n }\n\n if is_error_log(level) or source == \"\ + stderr\" or is_error_log(content):\n error_logs.append(simplified_log)\n\ + \ \n if len(simplified_logs) < 100:\n simplified_logs.append(simplified_log)\n\ + \n error_logs = error_logs[:100]\n if not error_logs:\n error_logs\ + \ = simplified_logs[:100]\n\n MAX_LENGTH = 80000 \n error_logs_str\ + \ = json.dumps(error_logs, separators=(\",\", \":\"))\n \n while len(error_logs_str)\ + \ > MAX_LENGTH and len(error_logs) > 0:\n remove_count = max(1, len(error_logs)\ + \ // 10)\n error_logs = error_logs[:-remove_count]\n error_logs_str\ + \ = json.dumps(error_logs, separators=(\",\", \":\"))\n\n return {\"\ + logs\": error_logs_str}" code_language: python3 desc: '' - isInIteration: true - iteration_id: '1741497176064' outputs: - result: + logs: children: null type: string selected: false - title: it. to template runq + title: 提取错误日志 type: code variables: - value_selector: - - '1744342478278' - - result + - '1744878296971' + - text variable: data_json - height: 53 - id: '1744342609856' - parentId: '1741497176064' + height: 54 + id: '1744901266309' position: - x: 2071.3401312758333 - y: 804.853363697991 + x: 3666 + y: 1400 positionAbsolute: - x: 7333.796408555918 - y: 1720.853363697991 + x: 3666 + y: 1400 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 - - data: - code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ - \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ - \ f\"# {analysis_type} Consumption Analysis Report\\n\",\n \"## Executive\ - \ Summary\",\n f\"The summary of {analysis_type} thread consumption\ - \ analysis.\\n\"\n ]\n \n if 'abnormalAnalysis' in data and data['abnormalAnalysis']:\n\ - \ markdown.append(\"## Abnormal Thread Details\")\n for analysis\ - \ in data['abnormalAnalysis']:\n unit = analysis['unit']\n \ - \ markdown.extend([\n f\"### Thread {analysis['tid']}\"\ - ,\n f\"- ​**​Abnormal data count​**​: {analysis['abnormalCount']}\"\ - ,\n f\"- ​**​Spike values​**​: ({unit})\",\n \ - \ *[f\" - {format_number(spike)}{unit}\" for spike in sorted(analysis['spikes'],\ - \ reverse=True)],\n f\"- ​**​Average consumption​**​: {format_number(round(analysis['avg'],\ - \ 2))}{unit}\",\n ])\n \n markdown.append(\"\ - \\n---\\n\") \n \n if 'summary' in data:\n markdown.extend([\n\ - \ \"## Final Conclusion\",\n f\"{data['summary']}\"\ - ,\n ])\n \n return \"\\n\".join(markdown)\n\ndef main(data_json:\ - \ str) -> dict:\n data = json.loads(data_json)\n markdown = json_to_markdown(data,\ - \ 'net')\n return {\"result\": markdown}" + width: 244 + - data: + code: "import re\nfrom typing import List\nimport json\n\nERROR_PATTERNS:\ + \ List[str] = [\n r\"\\bERROR\\b\",\n r\"\\bERR\\b\",\n r\"\\bEXCEPTION\\\ + b\",\n r\"\\bPANIC\\b\",\n r\"\\bFAIL(?:ED)?\\b\",\n r\"\\bWRONG\\\ + b\",\n r\"\\bFATAL\\b\",\n r\"\\b500\\b\",\n r\"\\b5\\d{2}\\b\"\ + ,\n]\n\nERROR_REGEXES = [re.compile(pat, re.IGNORECASE) for pat in ERROR_PATTERNS]\n\ + \ndef is_error_log(line: str) -> bool:\n if not isinstance(line, (str,\ + \ bytes)):\n return False\n for regex in ERROR_REGEXES:\n \ + \ if regex.search(line):\n return True\n return False\n\ + \ndef main(data_json: str) -> dict:\n logs = json.loads(data_json).get(\"\ + data\", {}).get(\"logs\", [])\n\n error_logs = []\n simplified_logs\ + \ = []\n for log in logs:\n source = log.get(\"tags\", {}).get(\"\ + source\", \"\")\n level = log.get(\"logFields\", {}).get(\"level\"\ + , \"\")\n content = log.get(\"content\", \"\")\n pod = log.get(\"\ + k8s_pod_name\", \"\")\n timestamp = log.get(\"timestamp\")\n\n \ + \ simplified_log = {\n \"content\": content,\n \ + \ \"level\": level,\n \"pod\": pod,\n \"timestamp\"\ + : timestamp,\n }\n\n if is_error_log(level) or source == \"\ + stderr\" or is_error_log(content):\n error_logs.append(simplified_log)\n\ + \ \n if len(simplified_logs) < 100:\n simplified_logs.append(simplified_log)\n\ + \n error_logs = error_logs[:100]\n if not error_logs:\n error_logs\ + \ = simplified_logs[:100]\n\n MAX_LENGTH = 80000\n error_logs_str\ + \ = json.dumps(error_logs, separators=(\",\", \":\"))\n \n while len(error_logs_str)\ + \ > MAX_LENGTH and len(error_logs) > 0:\n remove_count = max(1, len(error_logs)\ + \ // 10)\n error_logs = error_logs[:-remove_count]\n error_logs_str\ + \ = json.dumps(error_logs, separators=(\",\", \":\"))\n\n return {\"\ + logs\": error_logs_str}" code_language: python3 desc: '' - isInIteration: true - iteration_id: '1741497176064' outputs: - result: + logs: children: null type: string selected: false - title: it. to template epoll + title: 提取错误日志 type: code variables: - value_selector: - - '1744342426374' - - result + - '17448784142480' + - text variable: data_json - height: 53 - id: '1744342800777' - parentId: '1741497176064' + height: 54 + id: '17449013347930' position: - x: 2396.2516631873705 - y: 671.1408594726702 + x: 3666 + y: 1272 positionAbsolute: - x: 7658.707940467455 - y: 1587.1408594726702 + x: 3666 + y: 1272 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 - - data: - code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ - \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ - \ f\"# {analysis_type} Consumption Analysis Report\\n\",\n \"## Executive\ - \ Summary\",\n f\"The summary of {analysis_type} thread consumption\ - \ analysis.\\n\"\n ]\n \n if 'abnormalAnalysis' in data and data['abnormalAnalysis']:\n\ - \ markdown.append(\"## Abnormal Thread Details\")\n for analysis\ - \ in data['abnormalAnalysis']:\n unit = analysis['unit']\n \ - \ markdown.extend([\n f\"### Thread {analysis['tid']}\"\ - ,\n f\"- ​**​Abnormal data count​**​: {analysis['abnormalCount']}\"\ - ,\n f\"- ​**​Spike values​**​: ({unit})\",\n \ - \ *[f\" - {format_number(spike)}{unit}\" for spike in sorted(analysis['spikes'],\ - \ reverse=True)],\n f\"- ​**​Average consumption​**​: {format_number(round(analysis['avg'],\ - \ 2))}{unit}\",\n ])\n \n markdown.append(\"\ - \\n---\\n\") \n \n if 'summary' in data:\n markdown.extend([\n\ - \ \"## Final Conclusion\",\n f\"{data['summary']}\"\ - ,\n ])\n \n return \"\\n\".join(markdown)\n\ndef main(data_json:\ - \ str) -> dict:\n data = json.loads(data_json)\n markdown = json_to_markdown(data,\ - \ 'net')\n return {\"result\": markdown}" + width: 244 + - data: + code: "import requests\nimport json\n\nAPO_BACKEND = \"http://192.168.1.16:31363/api/alerts/anomaly-span/list\"\ + \n\n\ndef main(start: int, end: int, service: str, endpoint: str) -> dict:\n\ + \ reason = 'cpu_time'\n params = {\n \"contentKey\": endpoint,\n\ + \ \"currentPage\": 1,\n \"endTime\": end,\n \"pageSize\"\ + : 10,\n \"reason\": reason,\n \"service\": service,\n \ + \ \"startTime\": start,\n }\n\n resp = requests.post(url=APO_BACKEND,\ + \ json=params)\n\n res_list = []\n\n for item in resp.json()[\"list\"\ + ]:\n resurl = f\"http://kindling.myddns.me:31553/#/cause/report/{item['traceId']}/{item['spanId']}?mutatedType={reason}\"\ + \n res_list.append(resurl)\n if len(res_list) == 3:\n \ + \ break\n\n return {\"result\": json.dumps(res_list)}\n" code_language: python3 desc: '' - isInIteration: true - iteration_id: '1741497176064' outputs: result: children: null type: string selected: false - title: it. to template net + title: CPU reports type: code variables: - value_selector: - - '1744342372882' - - result - variable: data_json - height: 53 - id: '1744342846753' - parentId: '1741497176064' + - '1741227526517' + - startTime + variable: start + - value_selector: + - '1741227526517' + - endTime + variable: end + - value_selector: + - '1742807803325' + - service + variable: service + - value_selector: + - '1742807803325' + - endpoint + variable: endpoint + height: 54 + id: '1744957721512' position: - x: 2397.0575190597137 - y: 576.2913937324586 + x: 4589.2145686280455 + y: 752.3779346341792 positionAbsolute: - x: 7659.513796339798 - y: 1492.2913937324586 + x: 4589.2145686280455 + y: 752.3779346341792 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 - - data: - code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ - \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ - \ f\"# {analysis_type} Consumption Analysis Report\\n\",\n \"## Executive\ - \ Summary\",\n f\"The summary of {analysis_type} thread consumption\ - \ analysis.\\n\"\n ]\n \n if 'abnormalAnalysis' in data and data['abnormalAnalysis']:\n\ - \ markdown.append(\"## Abnormal Thread Details\")\n for analysis\ - \ in data['abnormalAnalysis']:\n unit = analysis['unit']\n \ - \ markdown.extend([\n f\"### Thread {analysis['tid']}\"\ - ,\n f\"- ​**​Abnormal data count​**​: {analysis['abnormalCount']}\"\ - ,\n f\"- ​**​Spike values​**​: ({unit})\",\n \ - \ *[f\" - {format_number(spike)}{unit}\" for spike in sorted(analysis['spikes'],\ - \ reverse=True)],\n f\"- ​**​Average consumption​**​: {format_number(round(analysis['avg'],\ - \ 2))}{unit}\",\n ])\n \n markdown.append(\"\ - \\n---\\n\") \n \n if 'summary' in data:\n markdown.extend([\n\ - \ \"## Final Conclusion\",\n f\"{data['summary']}\"\ - ,\n ])\n \n return \"\\n\".join(markdown)\n\ndef main(data_json:\ - \ str) -> dict:\n data = json.loads(data_json)\n markdown = json_to_markdown(data,\ - \ 'net')\n return {\"result\": markdown}" + width: 244 + - data: + code: "import requests\nimport json\n\nAPO_BACKEND = \"http://192.168.1.16:31363/api/alerts/anomaly-span/list\"\ + \n\n\ndef main(start: int, end: int, service: str, endpoint: str) -> dict:\n\ + \ reason = 'network_time'\n params = {\n \"contentKey\": endpoint,\n\ + \ \"currentPage\": 1,\n \"endTime\": end,\n \"pageSize\"\ + : 10,\n \"reason\": reason,\n \"service\": service,\n \ + \ \"startTime\": start,\n }\n\n resp = requests.post(url=APO_BACKEND,\ + \ json=params)\n\n res_list = []\n\n for item in resp.json()[\"list\"\ + ]:\n resurl = f\"http://kindling.myddns.me:31553/#/cause/report/{item['traceId']}/{item['spanId']}?mutatedType={reason}\"\ + \n res_list.append(resurl)\n if len(res_list) == 3:\n \ + \ break\n\n return {\"result\": json.dumps(res_list)}\n" code_language: python3 desc: '' - isInIteration: true - iteration_id: '1741497176064' outputs: result: children: null type: string selected: false - title: it. to template file + title: Net reports type: code variables: - value_selector: - - '1744342309386' - - result - variable: data_json - height: 53 - id: '1744342890395' - parentId: '1741497176064' + - '1741227526517' + - startTime + variable: start + - value_selector: + - '1741227526517' + - endTime + variable: end + - value_selector: + - '1742807803325' + - service + variable: service + - value_selector: + - '1742807803325' + - endpoint + variable: endpoint + height: 54 + id: '17449580284790' position: - x: 2298.297497136483 - y: 475.73311174167225 + x: 4564.5759830061 + y: 856.3076926945673 positionAbsolute: - x: 7560.7537744165675 - y: 1391.7331117416722 + x: 4564.5759830061 + y: 856.3076926945673 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 - - data: - code: "import json\n\ndef format_number(num):\n return \"{:,}\".format(num)\n\ - \ndef json_to_markdown(data, analysis_type):\n markdown = [\n \ - \ f\"# {analysis_type} Consumption Analysis Report\\n\",\n \"## Executive\ - \ Summary\",\n f\"The summary of {analysis_type} thread consumption\ - \ analysis.\\n\"\n ]\n \n if 'abnormalAnalysis' in data and data['abnormalAnalysis']:\n\ - \ markdown.append(\"## Abnormal Thread Details\")\n for analysis\ - \ in data['abnormalAnalysis']:\n unit = analysis['unit']\n \ - \ markdown.extend([\n f\"### Thread {analysis['tid']}\"\ - ,\n f\"- ​**​Abnormal data count​**​: {analysis['abnormalCount']}\"\ - ,\n f\"- ​**​Spike values​**​: ({unit})\",\n \ - \ *[f\" - {format_number(spike)}{unit}\" for spike in sorted(analysis['spikes'],\ - \ reverse=True)],\n f\"- ​**​Average consumption​**​: {format_number(round(analysis['avg'],\ - \ 2))}{unit}\",\n ])\n \n markdown.append(\"\ - \\n---\\n\") \n \n if 'summary' in data:\n markdown.extend([\n\ - \ \"## Final Conclusion\",\n f\"{data['summary']}\"\ - ,\n ])\n \n return \"\\n\".join(markdown)\n\ndef main(data_json:\ - \ str) -> dict:\n data = json.loads(data_json)\n markdown = json_to_markdown(data,\ - \ 'net')\n return {\"result\": markdown}" + width: 244 + - data: + code: "import requests\nimport json\n\nAPO_BACKEND = \"http://192.168.1.16:31363/api/alerts/anomaly-span/list\"\ + \n\n\ndef main(start: int, end: int, service: str, endpoint: str) -> dict:\n\ + \ reason = 'scheduling_time'\n params = {\n \"contentKey\"\ + : endpoint,\n \"currentPage\": 1,\n \"endTime\": end,\n \ + \ \"pageSize\": 10,\n \"reason\": reason,\n \"service\"\ + : service,\n \"startTime\": start,\n }\n\n resp = requests.post(url=APO_BACKEND,\ + \ json=params)\n\n res_list = []\n\n for item in resp.json()[\"list\"\ + ]:\n resurl = f\"http://kindling.myddns.me:31553/#/cause/report/{item['traceId']}/{item['spanId']}?mutatedType={reason}\"\ + \n res_list.append(resurl)\n if len(res_list) == 3:\n \ + \ break\n\n return {\"result\": json.dumps(res_list)}\n" code_language: python3 desc: '' - isInIteration: true - iteration_id: '1741497176064' outputs: result: children: null type: string selected: false - title: it. to template cpu + title: Runq REPORTS type: code variables: - value_selector: - - '1744342244843' - - result - variable: data_json - height: 53 - id: '1744342920172' - parentId: '1741497176064' + - '1741227526517' + - startTime + variable: start + - value_selector: + - '1741227526517' + - endTime + variable: end + - value_selector: + - '1742807803325' + - service + variable: service + - value_selector: + - '1742807803325' + - endpoint + variable: endpoint + height: 54 + id: '17449581781070' position: - x: 2291.073286368818 - y: 355.5973351437458 + x: 4589.2145686280455 + y: 962.4431384506418 positionAbsolute: - x: 7553.529563648903 - y: 1271.5973351437458 + x: 4589.2145686280455 + y: 962.4431384506418 selected: false sourcePosition: right targetPosition: left type: custom - width: 243 - zIndex: 1002 + width: 244 viewport: - x: -2615.891775110111 - y: -292.61647100919583 - zoom: 0.4425894832963618 + x: -1033.6008261970235 + y: 75.79460259114265 + zoom: 0.25