diff --git a/docker-compose/prometheus/promtheus/config/rules/cpu.yaml b/docker-compose/prometheus/promtheus/config/rules/cpu.yaml new file mode 100644 index 0000000..7c31f56 --- /dev/null +++ b/docker-compose/prometheus/promtheus/config/rules/cpu.yaml @@ -0,0 +1,12 @@ +groups: +- name: cpu + interval: 15s + rules: + - alert: HighCpuUsage + expr: 1 - avg(irate(node_cpu_seconds_total{mode="idle"}[1m])) by (hostname) > 0.8 + for: 30s + labels: + severity: warning + annotations: + summary: "主机 {{ $labels.hostname }} CPU 使用率过高" + description: "CPU 使用率: {{ $value | humanizePercentage }}" \ No newline at end of file diff --git a/docker-compose/prometheus/promtheus/config/rules/disk.yaml b/docker-compose/prometheus/promtheus/config/rules/disk.yaml new file mode 100644 index 0000000..f52ca8d --- /dev/null +++ b/docker-compose/prometheus/promtheus/config/rules/disk.yaml @@ -0,0 +1,23 @@ +groups: + - name: disk + interval: 15s + rules: + - alert: HighDiskUsageRoot + expr: (1 - node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) > 0.8 + for: 5m + labels: + severity: critical + alert_type: disk + annotations: + summary: "主机 {{ $labels.hostname }} 根分区磁盘使用率过高" + description: "根分区 / 当前使用率: {{ $value | humanizePercentage }}(阈值80%)" + + - alert: HighDiskUsageData + expr: (1 - node_filesystem_avail_bytes{mountpoint="/data"} / node_filesystem_size_bytes{mountpoint="/data"}) > 0.9 + for: 5m + labels: + severity: warning + alert_type: disk + annotations: + summary: "主机 {{ $labels.hostname }} /data 分区磁盘使用率过高" + description: "/data 分区当前使用率: {{ $value | humanizePercentage }}(阈值90%)" \ No newline at end of file diff --git a/docker-compose/prometheus/promtheus/config/rules/mem.yaml b/docker-compose/prometheus/promtheus/config/rules/mem.yaml new file mode 100644 index 0000000..789db41 --- /dev/null +++ b/docker-compose/prometheus/promtheus/config/rules/mem.yaml @@ -0,0 +1,14 @@ +groups: + - name: memory + interval: 15s + rules: + - alert: HighMemoryUsage + expr: 1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) > 0.8 + for: 2m + labels: + severity: warning + alert_type: memory + annotations: + summary: "主机 {{ $labels.hostname }} 内存使用率过高" + # 修正:移除不支持的函数,只保留百分比显示 + description: "当前内存使用率: {{ $value | humanizePercentage }}(阈值80%)" \ No newline at end of file diff --git a/docker-compose/prometheus/promtheus/config/rules/mysql.yaml b/docker-compose/prometheus/promtheus/config/rules/mysql.yaml new file mode 100644 index 0000000..4b9bb11 --- /dev/null +++ b/docker-compose/prometheus/promtheus/config/rules/mysql.yaml @@ -0,0 +1,147 @@ +groups: + - name: mysql + interval: 15s + rules: + - alert: MysqlDown + expr: mysql_up == 0 + for: 0m + labels: + severity: critical + annotations: + summary: MySQL down (instance {{ $labels.instance }}) + description: "MySQL instance is down on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlTooManyConnections(>80%) + expr: max_over_time(mysql_global_status_threads_connected[1m]) / mysql_global_variables_max_connections * 100 > 80 + for: 2m + labels: + severity: warning + annotations: + summary: MySQL too many connections (> 80%) (instance {{ $labels.instance }}) + description: "More than 80% of MySQL connections are in use on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlHighPreparedStatementsUtilization(>80%) + expr: max_over_time(mysql_global_status_prepared_stmt_count[1m]) / mysql_global_variables_max_prepared_stmt_count * 100 > 80 + for: 2m + labels: + severity: warning + annotations: + summary: MySQL high prepared statements utilization (> 80%) (instance {{ $labels.instance }}) + description: "High utilization of prepared statements (>80%) on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: MysqlHighPreparedStatementsUtilization(>80%) + expr: max_over_time(mysql_global_status_prepared_stmt_count[1m]) / mysql_global_variables_max_prepared_stmt_count * 100 > 80 + for: 2m + labels: + severity: warning + annotations: + summary: MySQL high prepared statements utilization (> 80%) (instance {{ $labels.instance }}) + description: "High utilization of prepared statements (>80%) on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlHighThreadsRunning + expr: max_over_time(mysql_global_status_threads_running[1m]) / mysql_global_variables_max_connections * 100 > 60 + for: 2m + labels: + severity: warning + annotations: + summary: MySQL high threads running (instance {{ $labels.instance }}) + description: "More than 60% of MySQL connections are in running state on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlSlaveIoThreadNotRunning + expr: ( mysql_slave_status_slave_io_running and ON (instance) mysql_slave_status_master_server_id > 0 ) == 0 + for: 0m + labels: + severity: critical + annotations: + summary: MySQL Slave IO thread not running (instance {{ $labels.instance }}) + description: "MySQL Slave IO thread not running on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlSlaveSqlThreadNotRunning + expr: ( mysql_slave_status_slave_sql_running and ON (instance) mysql_slave_status_master_server_id > 0) == 0 + for: 0m + labels: + severity: critical + annotations: + summary: MySQL Slave SQL thread not running (instance {{ $labels.instance }}) + description: "MySQL Slave SQL thread not running on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlSlaveReplicationLag + expr: ( (mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay) and ON (instance) mysql_slave_status_master_server_id > 0 ) > 30 + for: 1m + labels: + severity: critical + annotations: + summary: MySQL Slave replication lag (instance {{ $labels.instance }}) + description: "MySQL replication lag on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlSlowQueries + expr: increase(mysql_global_status_slow_queries[1m]) > 0 + for: 2m + labels: + severity: warning + annotations: + summary: MySQL slow queries (instance {{ $labels.instance }}) + description: "MySQL server mysql has some new slow query.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlInnodbLogWaits + expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10 + for: 0m + labels: + severity: warning + annotations: + summary: MySQL InnoDB log waits (instance {{ $labels.instance }}) + description: "MySQL innodb log writes stalling\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlRestarted + expr: mysql_global_status_uptime < 60 + for: 0m + labels: + severity: info + annotations: + summary: MySQL restarted (instance {{ $labels.instance }}) + description: "MySQL has just been restarted, less than one minute ago on {{ $labels.instance }}.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlHighQps + expr: irate(mysql_global_status_questions[1m]) > 10000 + for: 2m + labels: + severity: info + annotations: + summary: MySQL High QPS (instance {{ $labels.instance }}) + description: "MySQL is being overload with unusual QPS (> 10k QPS).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlTooManyOpenFiles + expr: mysql_global_status_innodb_num_open_files / mysql_global_variables_open_files_limit * 100 > 75 + for: 2m + labels: + severity: warning + annotations: + summary: MySQL too many open files (instance {{ $labels.instance }}) + description: "MySQL has too many open files, consider increase variables open_files_limit on {{ $labels.instance }}.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlInnodbForceRecoveryIsEnabled + expr: mysql_global_variables_innodb_force_recovery != 0 + for: 2m + labels: + severity: warning + annotations: + summary: MySQL InnoDB Force Recovery is enabled (instance {{ $labels.instance }}) + description: "MySQL InnoDB force recovery is enabled on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: MysqlInnodbHistory_lenTooLong + expr: mysql_info_schema_innodb_metrics_transaction_trx_rseg_history_len > 50000 + for: 2m + labels: + severity: warning + annotations: + summary: MySQL InnoDB history_len too long (instance {{ $labels.instance }}) + description: "MySQL history_len (undo log) too long on {{ $labels.instance }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + + + + + + + + +