diff --git a/README.md b/README.md index f7a76edd14cc32d5cd63461d5a86991ce25613f0..4493d8df0a893f7152a6eb228457117efa488952 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ greatsql> GRANT ALL ON mcp.* TO mcp; java -version ``` -显示类似如下内容即安装成功: +显示类似如下内容即安装成功: ``` java version "21.0.1" 2023-10-17 @@ -113,7 +113,7 @@ java version "21.0.8" 2025-07-15 LTS ... ``` -#### 2.2、安装Maven 3.6+ +#### 2.2、安装Maven 3.6+ 打以Windows客户端环境为例,开 [Maven 官方下载链接](https://maven.apache.org/download.cgi) 下载Maven安装包。 @@ -254,35 +254,133 @@ HTTP Header处填写 *api-key* 参数,如:`Authorization=Bearer 7355608`。 ### 6、使用MCP服务 -使用该MCP服务工具的问答示例如下: +截止目前,支持以下这些MCP服务 -#### 6.1、列出服务器上所有可用的数据库 +| 工具名称 | 说明 | +| :--- | :--- | +| listDatabases | 列出服务器上所有可用的数据库 | +| listTables | 列出指定数据库中的所有表 | +| getTableRowCount | 获取指定表的数据行数 | +| executeQuery | 在指定数据库中执行SQL查询 | +| insertData | 向指定表插入数据 | +| updateData | 更新指定表的数据 | +| deleteData | 删除指定表的数据 | +| createDB | 创建数据库 | +| checkCriticalTransactions | 检查当前是否有活跃的大事务或长事务" | +| avgSQLRT | 计算SQL请求平均响应耗时 | +| trackNotableStats | 关注需要注意的数据库状态,包括线程危险状态和全局状态指标 | +| checkMGRStatus | 监控MGR集群状态 | +| findAbnormalMemoryIssue | 检查数据库中是否存在内存异常情况 | +| findImproperVars | 检查数据库系统参数配置是否合理 | +| monitorReplicationLag | 监控主从复制延迟 | -![使用示例1](./docs/images/使用示例1.png) +下面逐一演示这些方法的使用 -#### 6.2、列出指定数据库中的所有表 +#### 列出服务器上所有可用的数据库 -![使用示例2](./docs/images/使用示例2.png) +**提示词**:`列出所有的数据库` -#### 6.3、获取指定表的数据行数 +![使用示例:列出服务器上所有可用的数据库](./docs/images/使用示例1.png) -![使用示例3](./docs/images/使用示例3.png) +#### 列出指定数据库中的所有表 -#### 6.4、在指定数据库中执行SQL查询 +**提示词**:`列出greatsql里所有的表` -![使用示例4](./docs/images/使用示例4.png) +![使用示例:列出指定数据库中的所有表](./docs/images/使用示例2.png) -#### 6.5、向指定表插入数据 +#### 获取指定表的数据行数 -![使用示例5](./docs/images/使用示例5.png) +**提示词**:`users表中有多少行` -#### 6.6、更新指定表的数据 +![使用示例:获取指定表的数据行数](./docs/images/使用示例3.png) -![使用示例6](./docs/images/使用示例6.png) +#### 在指定数据库中执行SQL查询 -#### 6.7、删除指定表的数据 +**提示词**:`查询users表中年龄大于30的人` -![使用示例7](./docs/images/使用示例7.png) +![使用示例:在指定数据库中执行SQL查询](./docs/images/使用示例4.png) + +#### 向指定表插入数据 + +**提示词**:`向greatsql里的users表中插入一行数据 名字为lucy 年龄为18` + +![使用示例:向指定表插入数据](./docs/images/使用示例5.png) + +#### 更新指定表的数据 + +**提示词**:`把lucy的年龄改为20` + +![使用示例:更新指定表的数据](./docs/images/使用示例6.png) + +#### 删除指定表的数据 + +**提示词**:`删除lucy的数据` + +![使用示例:删除指定表的数据](./docs/images/使用示例7.png) + +#### 创建指定数据库 + +**提示词**:`请创建一个名为test的新数据库` + +![使用示例:创建指定数据库](./docs/images/使用示例8.png) + +> 前提是要有相应数据库的创建权限。 + +#### 检查大事务或长事务 + +**提示词**:`检查大事务或长事务` + +![使用示例:检查大事务或长事务](./docs/images/使用示例9.png) + +#### 计算SQL请求平均响应耗时 + +**提示词**:`帮忙检查SQL平均响应耗时` + +![使用示例:计算SQL请求平均响应耗时](./docs/images/使用示例10.png) + +#### 检查数据库状态,包括线程危险状态和全局状态指标 + +**提示词**:`检查数据库状态` + +![使用示例:检查数据库状态](./docs/images/使用示例11.png) + +#### 监控MGR集群状态 + +**提示词**:`监控MGR状态` + +这是MGR状态正常时 + +![使用示例:监控MGR集群状态,正常时](./docs/images/使用示例12.png) + +这是有个成员节点上待应用的事务队列超过阈值时 + +![使用示例:监控MGR集群状态,异常时](./docs/images/使用示例13.png) + +#### 检查数据库中是否存在内存异常情况 + +**提示词**:`检查数据库内存` + +有时候好像会抽风,没办法一次命中对应的MCP指令,可以多试几次 + +![使用示例:检查数据库中是否存在内存异常情况](./docs/images/使用示例14.png) + +也可以直接“指名道姓”地要求执行相应的MCP指令 + +![使用示例:检查数据库中是否存在内存异常情况](./docs/images/使用示例15.png) + +#### 检查不当配置参数 + +**提示词**:`检查系统参数` + +![使用示例: 检查不当配置参数](./docs/images/使用示例16.png) + +#### 监控主从复制延迟 + +**提示词**:`监控主从复制延迟` + +![使用示例:监控主从复制延迟](./docs/images/使用示例17.png) + +更多MCP指令,请自行进一步探索。 ## 适用场景与建议 @@ -354,7 +452,7 @@ index ea20c48..c89a38e 100644 "name", "listTables", "description", "列出指定数据库中的所有表", @@ -263,6 +277,15 @@ public class McpController { - + Object result = switch (name) { case "listDatabases" -> databaseService.listDatabases(); + @@ -388,7 +486,7 @@ index ea20c48..c89a38e 100644 @@ -327,4 +352,4 @@ public class McpController { ); } - + -} \ No newline at end of file +} @@ -399,7 +497,7 @@ index 0e212be..0bab7be 100644 @@ -41,6 +41,20 @@ public class DatabaseService { return databases; } - + + @Tool(name = "createDB", description = "创建一个新数据库") + public int createDB( @ToolParam(description = "数据库名称") String database) { + @@ -414,13 +512,13 @@ index 0e212be..0bab7be 100644 + } + return 0; + } - + @Tool(name = "listTables", description = "列出指定数据库中的所有表") public List listTables( @@ -273,4 +287,4 @@ public class DatabaseService { } } - + -} \ No newline at end of file +} diff --git "a/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21310.png" "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21310.png" new file mode 100644 index 0000000000000000000000000000000000000000..c3fd88c6d838983ccc648cb4a018cf1e0d421dba Binary files /dev/null and "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21310.png" differ diff --git "a/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21311.png" "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21311.png" new file mode 100644 index 0000000000000000000000000000000000000000..575bf04dc500828eb55dba6725fd468c170b676a Binary files /dev/null and "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21311.png" differ diff --git "a/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21312.png" "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21312.png" new file mode 100644 index 0000000000000000000000000000000000000000..75e39a2f20e10b948436df700926dd488e55f476 Binary files /dev/null and "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21312.png" differ diff --git "a/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21313.png" "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21313.png" new file mode 100644 index 0000000000000000000000000000000000000000..b082d5ee4035469147972b18df0cf48beccfcb44 Binary files /dev/null and "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21313.png" differ diff --git "a/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21314.png" "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21314.png" new file mode 100644 index 0000000000000000000000000000000000000000..1f3967fb4dee950908cae13573437c52309e9894 Binary files /dev/null and "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21314.png" differ diff --git "a/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21315.png" "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21315.png" new file mode 100644 index 0000000000000000000000000000000000000000..66a4732afcada35c2d80df581436e03a216e8a55 Binary files /dev/null and "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21315.png" differ diff --git "a/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21316.png" "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21316.png" new file mode 100644 index 0000000000000000000000000000000000000000..3bd5f8b08db073c7c879658a4c139b7b5c58d994 Binary files /dev/null and "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21316.png" differ diff --git "a/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21317.png" "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21317.png" new file mode 100644 index 0000000000000000000000000000000000000000..4672de176da40f0c1bd36e956551a607f8d329ed Binary files /dev/null and "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\21317.png" differ diff --git "a/docs/images/\344\275\277\347\224\250\347\244\272\344\276\2138.png" "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\2138.png" new file mode 100644 index 0000000000000000000000000000000000000000..678703a47c4d49080120fe51db96633564209965 Binary files /dev/null and "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\2138.png" differ diff --git "a/docs/images/\344\275\277\347\224\250\347\244\272\344\276\2139.png" "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\2139.png" new file mode 100644 index 0000000000000000000000000000000000000000..cae4f9218c130cf4c229c404c8b44d9b89e213f9 Binary files /dev/null and "b/docs/images/\344\275\277\347\224\250\347\244\272\344\276\2139.png" differ diff --git "a/docs/images/\350\277\234\347\250\213\351\205\215\347\275\256.png" "b/docs/images/\350\277\234\347\250\213\351\205\215\347\275\256.png" index 9919602b4e5342ffd7f437c2a7be44352e0a419a..baf8436f28bf63e4b5cff7ef0a218c394b183a7a 100644 Binary files "a/docs/images/\350\277\234\347\250\213\351\205\215\347\275\256.png" and "b/docs/images/\350\277\234\347\250\213\351\205\215\347\275\256.png" differ diff --git a/src/main/java/org/greatsql/greatsqlmcp/controller/McpController.java b/src/main/java/org/greatsql/greatsqlmcp/controller/McpController.java index 1de0e90656678a5ba976f2d8bde2c087d8cc0181..e9b62c984df2ebbdf378dcad25dd97d8d3f3f880 100644 --- a/src/main/java/org/greatsql/greatsqlmcp/controller/McpController.java +++ b/src/main/java/org/greatsql/greatsqlmcp/controller/McpController.java @@ -283,8 +283,8 @@ public class McpController { ) ), Map.of( - "name", "listNotableWaitEvents", - "description", "检查需要关注的数据库等待事件", + "name", "trackNotableStats", + "description", "关注需要注意的数据库状态,包括线程危险状态和全局状态指标", "inputSchema", Map.of( "type", "object", "properties", Map.of(), @@ -308,6 +308,24 @@ public class McpController { "properties", Map.of(), "required", new String[]{} ) + ), + Map.of( + "name", "findImproperVars", + "description", "检查数据库系统参数配置是否合理", + "inputSchema", Map.of( + "type", "object", + "properties", Map.of(), + "required", new String[]{} + ) + ), + Map.of( + "name", "monitorReplicationLag", + "description", "监控主从复制延迟", + "inputSchema", Map.of( + "type", "object", + "properties", Map.of(), + "required", new String[]{} + ) ) } ); @@ -389,11 +407,12 @@ public class McpController { } yield databaseService.createDB(databaseName); } + case "monitorReplicationLag" -> databaseService.monitorReplicationLag(); case "checkCriticalTransactions" -> { yield databaseService.checkCriticalTransactions(); } - case "listNotableWaitEvents" -> { - yield databaseService.listNotableWaitEvents(); + case "trackNotableStats" -> { + yield databaseService.trackNotableStats(); } case "checkMGRStatus" -> { yield databaseService.checkMGRStatus(); @@ -401,6 +420,9 @@ public class McpController { case "findAbnormalMemoryIssue" -> { yield databaseService.findAbnormalMemoryIssue(); } + case "findImproperVars" -> { + yield databaseService.findImproperVars(); + } default -> Map.of("error", "未知的工具: " + name); }; return Map.of( diff --git a/src/main/java/org/greatsql/greatsqlmcp/service/DatabaseService.java b/src/main/java/org/greatsql/greatsqlmcp/service/DatabaseService.java index f35195cddef7fa6ce88370235a5b453097d63676..7f80522ab9813f8c775092564ea46733abccabb2 100644 --- a/src/main/java/org/greatsql/greatsqlmcp/service/DatabaseService.java +++ b/src/main/java/org/greatsql/greatsqlmcp/service/DatabaseService.java @@ -9,8 +9,11 @@ import org.springframework.stereotype.Service; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; +import java.sql.ResultSetMetaData; import java.sql.SQLException; +import java.sql.Statement; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -48,7 +51,7 @@ public class DatabaseService { System.out.println("listTables called with database: " + database); List tables = new ArrayList<>(); - String sql = "SELECT table_name,table_schema,table_rows,create_time,table_comment FROM information_schema.tables WHERE table_schema=?"; + String sql = "SELECT TABLE_NAME, TABLE_SCHEMA, TABLE_ROWS, CREATE_TIME, TABLE_COMMENT FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA=?"; try (Connection conn = connectionService.getConnection(); PreparedStatement stmt = conn.prepareStatement(sql)) { @@ -97,7 +100,6 @@ public class DatabaseService { public List> executeQuery( @ToolParam(description = "数据库名称") String database, @ToolParam(description = "SQL查询语句") String query) { - List> results = new ArrayList<>(); try (Connection conn = connectionService.getConnection(database); @@ -122,6 +124,95 @@ public class DatabaseService { return results; } + public Map getSlaveStatus(Connection conn) throws SQLException { + Map slaveStatus = new HashMap<>(); + String sql = "SHOW SLAVE STATUS"; + + try (PreparedStatement stmt = conn.prepareStatement(sql); + ResultSet rs = stmt.executeQuery()) { + + if (rs.next()) { + ResultSetMetaData metaData = rs.getMetaData(); + int columnCount = metaData.getColumnCount(); + + for (int i = 1; i <= columnCount; i++) { + String columnName = metaData.getColumnName(i); + String columnValue = rs.getString(i); + slaveStatus.put(columnName, columnValue); + } + + // 检查 Slave_IO_Running 和 Slave_SQL_Running 状态 + String ioRunning = slaveStatus.get("Slave_IO_Running"); + String sqlRunning = slaveStatus.get("Slave_SQL_Running"); + + if (!"Yes".equals(ioRunning) || !"Yes".equals(sqlRunning)) { + // 检查错误代码和错误信息 + String ioErrno = slaveStatus.get("Last_IO_Errno"); + String sqlErrno = slaveStatus.get("Last_SQL_Errno"); + String lastErrno = slaveStatus.get("Last_Errno"); + + String ioError = slaveStatus.get("Last_IO_Error"); + String sqlError = slaveStatus.get("Last_SQL_Error"); + String lastError = slaveStatus.get("Last_Error"); + + StringBuilder repairAdvice = new StringBuilder(); + + if (!"0".equals(ioErrno) && ioErrno != null && !ioErrno.isEmpty()) { + repairAdvice.append("IO 线程错误代码: ").append(ioErrno).append(", 错误信息: ").append(ioError).append("\n"); + } + + if (!"0".equals(sqlErrno) && sqlErrno != null && !sqlErrno.isEmpty()) { + repairAdvice.append("SQL 线程错误代码: ").append(sqlErrno).append(", 错误信息: ").append(sqlError).append("\n"); + } + + if (!"0".equals(lastErrno) && lastErrno != null && !lastErrno.isEmpty()) { + repairAdvice.append("最近错误代码: ").append(lastErrno).append(", 错误信息: ").append(lastError).append("\n"); + } + + if (repairAdvice.length() > 0) { + repairAdvice.append("修复建议: \n"); + repairAdvice.append("1. 检查主从服务器网络连接是否正常。\n"); + repairAdvice.append("2. 检查主从服务器的配置是否一致。\n"); + repairAdvice.append("3. 检查主从服务器的日志文件,确认是否有其他错误。\n"); + repairAdvice.append("4. 根据错误信息,参考官方文档或社区解决方案进行修复。\n"); + + slaveStatus.put("RepairAdvice", repairAdvice.toString()); + } + } + + // 检查 secondsBehindMaster + String secondsBehindMaster = slaveStatus.get("Seconds_Behind_Master"); + if (secondsBehindMaster != null && !secondsBehindMaster.isEmpty()) { + if (secondsBehindMaster.matches("\\d+")) { + slaveStatus.put("SecondsBehindMasterAlert", "主从延迟: " + secondsBehindMaster + " 秒"); + } + } + + // 检查 GTID 差异 + String masterGtidSet = slaveStatus.get("Retrieved_Gtid_Set"); + String slaveGtidSet = slaveStatus.get("Executed_Gtid_Set"); + if (masterGtidSet != null && slaveGtidSet != null && !masterGtidSet.isEmpty() && !slaveGtidSet.isEmpty()) { + try { + // 计算 GTID 差异(简化逻辑,实际可能需要更复杂的解析) + long masterCount = masterGtidSet.split(",").length; + long slaveCount = slaveGtidSet.split(",").length; + long diff = Math.abs(masterCount - slaveCount); + + if (diff > 100) { + slaveStatus.put("GtidDiffAlert", "严重告警: GTID 差异超过阈值,当前差值: " + diff); + } else if (diff > 0) { + slaveStatus.put("GtidDiffAlert", "一般告警: GTID 存在差异,当前差值: " + diff); + } + } catch (Exception e) { + // 忽略解析异常 + } + } + } + } + + return slaveStatus; + } + @Tool(name = "insertData", description = "向指定表插入数据") public int insertData( @ToolParam(description = "数据库名称") String database, @@ -291,7 +382,7 @@ public class DatabaseService { @Tool(name = "checkCriticalTransactions", description = "检查当前是否有活跃的大事务或长事务") public List> checkCriticalTransactions() { List> results = new ArrayList<>(); - String sql = "SELECT * FROM information_schema.INNODB_TRX WHERE " + + String sql = "SELECT * FROM INFORMATION_SCHEMA.INNODB_TRX WHERE " + "trx_lock_structs >= 5 OR " + "trx_rows_locked >= 100 OR " + "trx_rows_modified >= 100 OR " + @@ -350,80 +441,154 @@ public class DatabaseService { } } - @Tool(name = "listNotableWaitEvents", description = "检查需要关注的数据库等待事件") - public Map listNotableWaitEvents() { + @Tool(name = "trackNotableStats", description = "关注需要注意的数据库状态,包括线程危险状态和全局状态指标") + public Map trackNotableStats() { Map results = new HashMap<>(); - try (Connection conn = connectionService.getConnection()) { - // 1. 检查行锁等待 - checkRowLockWaits(conn, results); - - // 2. 检查Buffer Pool等待 - checkBufferPoolWaits(conn, results); - - // 3. 检查Redo Log等待 - checkRedoLogWaits(conn, results); - - // 4. 检查Undo Log清理 - checkUndoLogPurge(conn, results); - + // 检查线程危险状态 + try (Connection connection = connectionService.getConnection(); + Statement statement = connection.createStatement(); + ResultSet rs = statement.executeQuery("SELECT * FROM PERFORMANCE_SCHEMA.PROCESSLIST")) { + while (rs.next()) { + String state = rs.getString("State"); + if (state != null && (state.contains("converting HEAP to ondisk") + || state.contains("copy to tmp table") + || state.contains("Copying to group table") + || state.contains("Copying to tmp table") + || state.contains("Copying to tmp table on disk") + || state.contains("Creating sort index") + || state.contains("Creating tmp table") + || state.contains("Rolling back") + || state.contains("Sending data") + || state.contains("Sorting result") + || (state.contains("Waiting for") && !state.contains("Waiting for an event from Coordinator")))) { + results.put("ThreadStateWarning", "严重级潜在性能风险: 线程状态为 " + state); + } + } } catch (SQLException e) { - throw new RuntimeException("检查等待事件时出错:" + e.getMessage(), e); + results.put("ThreadStateError", "检查线程状态失败: " + e.getMessage()); } - return results; - } - - private void checkRowLockWaits(Connection conn, Map results) throws SQLException { - String sql = "SELECT variable_value FROM performance_schema.global_status " + - "WHERE variable_name = 'Innodb_row_lock_current_waits'"; - try (PreparedStatement stmt = conn.prepareStatement(sql); - ResultSet rs = stmt.executeQuery()) { - if (rs.next()) { - int value = rs.getInt(1); - if (value > 10) { - results.put("row_lock_wait", "严重级告警:当前有 " + value + " 个活跃的行锁等待,请DBA立即介入检查"); - } else if (value > 0) { - results.put("row_lock_wait", "一般级告警:当前有 " + value + " 个活跃的行锁等待,建议DBA检查"); + // 检查全局状态指标 + final String GLOBAL_STATUS_QUERY = "SELECT * FROM PERFORMANCE_SCHEMA.GLOBAL_STATUS"; + try (Connection connection = connectionService.getConnection(); + PreparedStatement statement = connection.prepareStatement(GLOBAL_STATUS_QUERY)) { + // 第一次查询 + ResultSet rs = statement.executeQuery(); + Map firstGlobalStatus = new HashMap<>(); + while (rs.next()) { + String variableName = rs.getString("VARIABLE_NAME"); + String valueStr = rs.getString("VARIABLE_VALUE"); + if (valueStr.matches("\\d+")) { + try { + firstGlobalStatus.put(variableName, Long.parseLong(valueStr)); + } catch (NumberFormatException e) { + // 如果数值过大,跳过解析 + firstGlobalStatus.put(variableName, 0L); + } } } - } - } - - private void checkBufferPoolWaits(Connection conn, Map results) throws SQLException { - String sql = "SELECT variable_value FROM performance_schema.global_status " + - "WHERE variable_name = 'Innodb_buffer_pool_wait_free'"; - try (PreparedStatement stmt = conn.prepareStatement(sql); - ResultSet rs = stmt.executeQuery()) { - if (rs.next()) { - int value = rs.getInt(1); - if (value > 10) { - results.put("buffer_pool_wait", "严重级告警:Buffer Pool等待事件 " + value + " 次,请立即调大innodb_buffer_pool_size并检查"); - } else if (value > 0) { - results.put("buffer_pool_wait", "一般级告警:Buffer Pool等待事件 " + value + " 次,建议调大innodb_buffer_pool_size"); + + // 暂停5秒 + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("暂停被中断", e); + } + + // 第二次查询 + rs = statement.executeQuery(); + Map secondGlobalStatus = new HashMap<>(); + while (rs.next()) { + String variableName = rs.getString("VARIABLE_NAME"); + String valueStr = rs.getString("VARIABLE_VALUE"); + if (valueStr.matches("\\d+")) { + try { + secondGlobalStatus.put(variableName, Long.parseLong(valueStr)); + } catch (NumberFormatException e) { + // 如果数值过大,跳过解析 + secondGlobalStatus.put(variableName, 0L); + } } } - } - } - - private void checkRedoLogWaits(Connection conn, Map results) throws SQLException { - String sql = "SELECT variable_value FROM performance_schema.global_status " + - "WHERE variable_name = 'Innodb_log_waits'"; - try (PreparedStatement stmt = conn.prepareStatement(sql); - ResultSet rs = stmt.executeQuery()) { - if (rs.next()) { - int value = rs.getInt(1); - if (value > 10) { - results.put("redo_log_wait", "严重级告警:Redo Log等待事件 " + value + " 次,请立即调大innodb_log_buffer_size并检查"); - } else if (value > 0) { - results.put("redo_log_wait", "一般级告警:Redo Log等待事件 " + value + " 次,建议调大innodb_log_buffer_size"); + + // 筛选关键指标并检查阈值 + List keyMetrics = Arrays.asList( + "Created_tmp_disk_tables", + "Innodb_buffer_pool_wait_free", + "Innodb_log_waits", + "Innodb_row_lock_current_waits", + "Handler_read_rnd", + "Handler_read_rnd_next", + "Handler_read_first", + "Handler_read_key", + "Handler_read_last", + "Handler_read_next", + "Handler_read_prev", + "Select_full_join", + "Select_scan", + "Sort_merge_passes" + ); + + // 计算Handler_read总和(第一次查询) + long firstHandlerReadTotal = firstGlobalStatus.getOrDefault("Handler_read_first", 0L) + + firstGlobalStatus.getOrDefault("Handler_read_key", 0L) + + firstGlobalStatus.getOrDefault("Handler_read_last", 0L) + + firstGlobalStatus.getOrDefault("Handler_read_next", 0L) + + firstGlobalStatus.getOrDefault("Handler_read_prev", 0L); + + // 计算Handler_read总和(第二次查询) + long secondHandlerReadTotal = secondGlobalStatus.getOrDefault("Handler_read_first", 0L) + + secondGlobalStatus.getOrDefault("Handler_read_key", 0L) + + secondGlobalStatus.getOrDefault("Handler_read_last", 0L) + + secondGlobalStatus.getOrDefault("Handler_read_next", 0L) + + secondGlobalStatus.getOrDefault("Handler_read_prev", 0L); + + // 检查每个关键指标 + for (String metric : keyMetrics) { + long firstValue = firstGlobalStatus.getOrDefault(metric, 0L); + long secondValue = secondGlobalStatus.getOrDefault(metric, 0L); + long value = secondValue - firstValue; + if (metric.equals("Created_tmp_disk_tables") || metric.equals("Innodb_buffer_pool_wait_free") || + metric.equals("Innodb_log_waits") || metric.equals("Innodb_row_lock_current_waits") || + metric.equals("Select_full_join") || metric.equals("Select_scan") || metric.equals("Sort_merge_passes")) { + if (value > 20) { + results.put(metric + "_Warning", "严重级潜在性能风险: " + metric + " = " + value); + } else if (value > 5) { + results.put(metric + "_Warning", "一般级潜在性能风险: " + metric + " = " + value); + } + } else if (metric.equals("Handler_read_rnd") || metric.equals("Handler_read_rnd_next")) { + long handlerReadRndTotal = secondGlobalStatus.getOrDefault("Handler_read_rnd", 0L) + + secondGlobalStatus.getOrDefault("Handler_read_rnd_next", 0L); + double ratio = (double) handlerReadRndTotal / secondHandlerReadTotal; + if (ratio > 0.4) { + results.put("HandlerReadRndRatio_Warning", "严重级潜在性能风险: Handler_read_rnd + Handler_read_rnd_next 占比 " + (ratio * 100) + "%"); + } else if (ratio > 0.2) { + results.put("HandlerReadRndRatio_Warning", "一般级潜在性能风险: Handler_read_rnd + Handler_read_rnd_next 占比 " + (ratio * 100) + "%"); + } } } + + } catch (SQLException e) { + results.put("GlobalStatusError", "检查全局状态失败: " + e.getMessage()); + } + + try (Connection conn = connectionService.getConnection()) { + // 检查Undo Log清理 + checkUndoLogPurge(conn, results); + + } catch (SQLException e) { + throw new RuntimeException("检查等待事件时出错:" + e.getMessage(), e); } + + return results; } + + private void checkUndoLogPurge(Connection conn, Map results) throws SQLException { - String sql = "SELECT COUNT, COMMENT FROM information_schema.INNODB_METRICS " + + String sql = "SELECT COUNT, COMMENT FROM INFORMATION_SCHEMA.INNODB_METRICS " + "WHERE NAME = 'trx_rseg_history_len'"; try (PreparedStatement stmt = conn.prepareStatement(sql); ResultSet rs = stmt.executeQuery()) { @@ -457,7 +622,7 @@ public class DatabaseService { } private void checkMGREnabled(Connection conn, Map results) throws SQLException { - String sql = "SELECT * FROM performance_schema.replication_group_members"; + String sql = "SELECT * FROM PERFORMANCE_SCHEMA.REPLICATION_GROUP_MEMBERS"; try (PreparedStatement stmt = conn.prepareStatement(sql); ResultSet rs = stmt.executeQuery()) { if (!rs.next()) { @@ -466,14 +631,18 @@ public class DatabaseService { } boolean hasOnlineMember = false; + int rowCount = 1; do { if ("ONLINE".equals(rs.getString("MEMBER_STATE"))) { hasOnlineMember = true; break; } + rowCount++; } while (rs.next()); - if (!hasOnlineMember) { + if (!hasOnlineMember && rowCount == 1 && "OFFLINE".equals(rs.getString("MEMBER_STATE"))) { + results.put("mgr_status", "提示:已启用group_replication plugin,是否要继续构建MGR服务"); + } else if (!hasOnlineMember) { results.put("mgr_status", "严重级告警:MGR已启用但无ONLINE状态的成员"); } else { results.put("mgr_status", "MGR运行正常"); @@ -482,25 +651,30 @@ public class DatabaseService { } private void checkMGRTransactionQueue(Connection conn, Map results) throws SQLException { - String sql = "SELECT MEMBER_ID as id, COUNT_TRANSACTIONS_IN_QUEUE as trx_tobe_certified, " + - "COUNT_TRANSACTIONS_REMOTE_IN_APPLIER_QUEUE as relaylog_tobe_applied " + - "FROM performance_schema.replication_group_member_stats"; + String sql = "SELECT s.MEMBER_ID as id, s.COUNT_TRANSACTIONS_IN_QUEUE as trx_tobe_certified, " + + "s.COUNT_TRANSACTIONS_REMOTE_IN_APPLIER_QUEUE as relaylog_tobe_applied, " + + "m.MEMBER_HOST as host, m.MEMBER_PORT as port " + + "FROM PERFORMANCE_SCHEMA.REPLICATION_GROUP_MEMBER_STATS s " + + "JOIN PERFORMANCE_SCHEMA.REPLICATION_GROUP_MEMBERS m ON s.MEMBER_ID = m.MEMBER_ID"; try (PreparedStatement stmt = conn.prepareStatement(sql); ResultSet rs = stmt.executeQuery()) { - if (rs.next()) { + while (rs.next()) { + String memberId = rs.getString("id"); + String host = rs.getString("host"); + int port = rs.getInt("port"); int trxToCertify = rs.getInt("trx_tobe_certified"); int relaylogToApply = rs.getInt("relaylog_tobe_applied"); if (trxToCertify > 100) { - results.put("mgr_trx_certify", "严重级告警:待认证事务队列大小 " + trxToCertify); + results.put("mgr_trx_certify_" + memberId, "严重级告警:节点 " + host + ":" + port + " (ID: " + memberId + ") 待认证事务队列大小 " + trxToCertify); } else if (trxToCertify > 10) { - results.put("mgr_trx_certify", "一般级关注:待认证事务队列大小 " + trxToCertify); + results.put("mgr_trx_certify_" + memberId, "一般级关注:节点 " + host + ":" + port + " (ID: " + memberId + ") 待认证事务队列大小 " + trxToCertify); } if (relaylogToApply > 100) { - results.put("mgr_relaylog_apply", "严重级告警:待回放事务队列大小 " + relaylogToApply); + results.put("mgr_relaylog_apply", "严重级告警:节点 " + host + ":" + port + " (ID: " + memberId + ") 待回放事务队列大小 " + relaylogToApply); } else if (relaylogToApply > 10) { - results.put("mgr_relaylog_apply", "一般级关注:待回放事务队列大小 " + relaylogToApply); + results.put("mgr_relaylog_apply", "一般级关注:节点 " + host + ":" + port + " (ID: " + memberId + ") 待回放事务队列大小 " + relaylogToApply); } } } @@ -551,11 +725,11 @@ public class DatabaseService { } private long getInnoDBBufferPoolSize(Connection conn) throws SQLException { - String sql = "SHOW VARIABLES LIKE 'innodb_buffer_pool_size'"; + String sql = "SELECT * FROM PERFORMANCE_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME='innodb_buffer_pool_size'"; try (PreparedStatement stmt = conn.prepareStatement(sql); ResultSet rs = stmt.executeQuery()) { if (rs.next()) { - return rs.getLong("Value"); + return rs.getLong("VARIABLE_VALUE"); } } return 0; @@ -587,4 +761,336 @@ public class DatabaseService { } } } + + public Map findImproperVars() { + + + Map results = new HashMap<>(); + + try (Connection conn = connectionService.getConnection()) { + // 查询全局变量和状态 + Map vars = getGlobalVariables(conn); + Map stats = getGlobalStatus(conn); + + // 检查连接数配置 + checkMaxConnections(vars, stats, results); + + // 检查表缓存配置 + checkTableCaches(vars, stats, results); + + // 检查线程缓存配置 + checkThreadCache(vars, stats, results); + + // 检查临时表配置 + checkTempTables(vars, stats, results); + + // 检查 InnoDB 日志配置 + checkInnoDBLogs(vars, results); + + // 检查二进制日志和事务提交配置 + checkBinaryLogAndFlush(vars, results); + + // 检查InnoDB日志配置 + checkInnoDBLogs(vars, results); + + // 检查并行复制配置 + checkParallelReplication(vars, results); + + // 检查IO容量配置 + checkIOCapacity(vars, results); + + // 检查并发线程配置 + checkThreadConcurrency(vars, results); + + // 检查二进制日志格式 + checkBinlogFormat(vars, results); + + // 检查日志缓冲区配置 + checkLogBuffer(vars, results); + + // 检查其他推荐配置 + checkRecommendedSettings(vars, results); + + // 检查慢查询日志配置 + checkSlowQuerySettings(vars, results); + + // 检查缓冲池配置 + checkBufferPoolSize(vars, results); + + } catch (SQLException e) { + throw new RuntimeException("检查配置参数时出错:" + e.getMessage(), e); + } + + return results; + } + + private Map getGlobalVariables(Connection conn) throws SQLException { + Map vars = new HashMap<>(); + String sql = "SELECT * FROM PERFORMANCE_SCHEMA.GLOBAL_VARIABLES"; + + try (PreparedStatement stmt = conn.prepareStatement(sql); + ResultSet rs = stmt.executeQuery()) { + while (rs.next()) { + vars.put(rs.getString("VARIABLE_NAME"), rs.getString("VARIABLE_VALUE")); + } + } + return vars; + } + + private Map getGlobalStatus(Connection conn) throws SQLException { + Map stats = new HashMap<>(); + String sql = "SELECT * FROM PERFORMANCE_SCHEMA.GLOBAL_STATUS"; + + try (PreparedStatement stmt = conn.prepareStatement(sql); + ResultSet rs = stmt.executeQuery()) { + while (rs.next()) { + stats.put(rs.getString("VARIABLE_NAME"), rs.getString("VARIABLE_VALUE")); + } + } + return stats; + } + + private void checkMaxConnections(Map vars, Map stats, Map results) { + if (vars.containsKey("max_connections") && stats.containsKey("Threads_connected")) { + int maxConn = Integer.parseInt(vars.get("max_connections")); + int currConn = Integer.parseInt(stats.get("Threads_connected")); + + if (currConn >= maxConn * 0.8) { + results.put("max_connections", "警告:当前连接数(" + currConn + ")已接近最大连接数限制(" + maxConn + "),建议考虑增加max_connections参数值"); + } + } + } + + private void checkTableCaches(Map vars, Map stats, Map results) { + // 检查table_open_cache + if (vars.containsKey("table_open_cache") && stats.containsKey("Open_tables") && stats.containsKey("Opened_tables")) { + int cacheSize = Integer.parseInt(vars.get("table_open_cache")); + int openTables = Integer.parseInt(stats.get("Open_tables")); + int openedTables = Integer.parseInt(stats.get("Opened_tables")); + + if (openTables >= cacheSize * 0.8 && openedTables > openTables * 10) { + results.put("table_open_cache", "警告:当前打开表数(" + openTables + ")接近缓存限制(" + cacheSize + ")且表打开次数(" + openedTables + ")较高,建议增加table_open_cache参数值"); + } + } + + // 检查table_definition_cache + if (vars.containsKey("table_definition_cache") && stats.containsKey("Open_table_definitions") && stats.containsKey("Opened_table_definitions")) { + int cacheSize = Integer.parseInt(vars.get("table_definition_cache")); + int openDefs = Integer.parseInt(stats.get("Open_table_definitions")); + int openedDefs = Integer.parseInt(stats.get("Opened_table_definitions")); + + if (openDefs >= cacheSize * 0.8 && openedDefs > openDefs * 10) { + results.put("table_definition_cache", "警告:当前打开表定义数(" + openDefs + ")接近缓存限制(" + cacheSize + ")且表定义打开次数(" + openedDefs + ")较高,建议增加table_definition_cache参数值"); + } + } + } + + private void checkThreadCache(Map vars, Map stats, Map results) { + if (vars.containsKey("thread_cache_size") && stats.containsKey("Threads_cached") && stats.containsKey("Threads_created")) { + int cacheSize = Integer.parseInt(vars.get("thread_cache_size")); + int cachedThreads = Integer.parseInt(stats.get("Threads_cached")); + int createdThreads = Integer.parseInt(stats.get("Threads_created")); + + if (cachedThreads <= cacheSize * 0.2 && createdThreads > cachedThreads * 10) { + results.put("thread_cache_size", "警告:线程缓存使用率低(" + cachedThreads + "/" + cacheSize + ")但线程创建次数高(" + createdThreads + "),建议增加thread_cache_size参数值"); + } + } + } + + private void checkTempTables(Map vars, Map stats, Map results) { + if (stats.containsKey("Created_tmp_disk_tables") && stats.containsKey("Created_tmp_tables")) { + int diskTmpTables = Integer.parseInt(stats.get("Created_tmp_disk_tables")); + int memTmpTables = Integer.parseInt(stats.get("Created_tmp_tables")); + + if (diskTmpTables > 100 || diskTmpTables > memTmpTables * 0.1) { + results.put("tmp_table_size", "警告:磁盘临时表创建次数高(" + diskTmpTables + "),建议增加tmp_table_size和max_heap_table_size参数值(至少96MB)"); + } + } + } + + private void checkBinaryLogAndFlush(Map vars, Map results) { + if (vars.containsKey("sync_binlog") && vars.get("sync_binlog").equals("0")) { + results.put("sync_binlog", "警告:sync_binlog参数设置为0,服务器掉电时可能丢失二进制日志数据,建议设置为1"); + } + + if (vars.containsKey("innodb_flush_log_at_trx_commit") && vars.get("innodb_flush_log_at_trx_commit").equals("0")) { + results.put("innodb_flush_log_at_trx_commit", "警告:innodb_flush_log_at_trx_commit参数设置为0,服务器掉电时可能丢失事务数据,建议设置为1"); + } + } + + private void checkInnoDBLogs(Map vars, Map results) { + // 检查redo日志大小 + long var1 = 0; + long var2 = 0; + if (vars.containsKey("innodb_log_file_size") && vars.containsKey("innodb_log_files_in_group")) { + var1 = Long.parseLong(vars.get("innodb_log_file_size")) * Long.parseLong(vars.get("innodb_log_files_in_group")); + } + if (vars.containsKey("innodb_redo_log_capacity")) { + var2 = Long.parseLong(vars.get("innodb_redo_log_capacity")); + } + + if (var1 < 2147483648L && var2 < 2147483648L) { // 2GB + results.put("innodb_redo_space", "InnoDB Redo 空间可能不够用,会影响性能"); + } + + // 检查日志缓冲区大小 + if (vars.containsKey("innodb_log_buffer_size")) { + long logBufferSize = Long.parseLong(vars.get("innodb_log_buffer_size")); + if (logBufferSize < 33554432) { // 32MB + results.put("innodb_log_buffer_size", "建议:innodb_log_buffer_size参数值(" + (logBufferSize / 1024 / 1024) + "MB)较小,建议设置为至少32MB"); + } + } + } + + private void checkParallelReplication(Map vars, Map results) { + String parallelType = vars.getOrDefault("slave_parallel_type", vars.getOrDefault("replica_parallel_type", "")); + if (!parallelType.equals("LOGICAL_CLOCK")) { + results.put("parallel_replication", "警告:并行复制类型设置为" + parallelType + ",建议设置为LOGICAL_CLOCK以获得更好的并行复制性能"); + } + } + + private void checkIOCapacity(Map vars, Map results) { + if (vars.containsKey("innodb_io_capacity")) { + int ioCapacity = Integer.parseInt(vars.get("innodb_io_capacity")); + if (ioCapacity < 10000) { + results.put("innodb_io_capacity", "警告:innodb_io_capacity参数值(" + ioCapacity + ")较低,建议设置为至少10000"); + } + } + + if (vars.containsKey("innodb_io_capacity_max")) { + int ioCapacityMax = Integer.parseInt(vars.get("innodb_io_capacity_max")); + if (ioCapacityMax < 10000) { + results.put("innodb_io_capacity_max", "警告:innodb_io_capacity_max参数值(" + ioCapacityMax + ")较低,建议设置为至少10000"); + } + } + } + + private void checkThreadConcurrency(Map vars, Map results) { + if (vars.containsKey("innodb_thread_concurrency") && !vars.get("innodb_thread_concurrency").equals("0")) { + results.put("innodb_thread_concurrency", "警告:innodb_thread_concurrency参数设置为" + vars.get("innodb_thread_concurrency") + ",建议设置为0以获得更好的并发性能"); + } + } + + private void checkBinlogFormat(Map vars, Map results) { + if (vars.containsKey("binlog_format") && !vars.get("binlog_format").equals("ROW")) { + results.put("binlog_format", "警告:binlog_format参数设置为" + vars.get("binlog_format") + ",建议设置为ROW以确保数据安全"); + } + } + + private void checkLogBuffer(Map vars, Map results) { + // 已在checkInnoDBLogs中检查 + } + + private void checkRecommendedSettings(Map vars, Map results) { + // 检查各种缓冲区大小 + checkBufferSize(vars, "sort_buffer_size", 4194304, results); // 4MB + checkBufferSize(vars, "join_buffer_size", 4194304, results); + checkBufferSize(vars, "read_rnd_buffer_size", 4194304, results); + checkBufferSize(vars, "read_buffer_size", 4194304, results); + + // 检查其他推荐设置 + checkSetting(vars, "innodb_open_files", "65534", results); + checkSetting(vars, "innodb_flush_method", "O_DIRECT", results); + checkSetting(vars, "innodb_use_fdatasync", "ON", results); + checkSetting(vars, "innodb_adaptive_hash_index", "OFF", results); + checkSetting(vars, "innodb_doublewrite_pages", "128", results); + } + + private void checkBufferSize(Map vars, String param, long minSize, Map results) { + if (vars.containsKey(param)) { + long size = Long.parseLong(vars.get(param)); + if (size < minSize) { + results.put(param, "建议:" + param + "参数值(" + (size / 1024 / 1024) + "MB)较小,建议设置为至少" + (minSize / 1024 / 1024) + "MB"); + } + } + } + + private void checkSetting(Map vars, String param, String recommended, Map results) { + if (vars.containsKey(param) && !vars.get(param).equals(recommended)) { + results.put(param, "建议:" + param + "参数设置为" + vars.get(param) + ",推荐设置为" + recommended); + } + } + + private void checkSlowQuerySettings(Map vars, Map results) { + if (vars.containsKey("long_query_time")) { + double longQueryTime = Double.parseDouble(vars.get("long_query_time")); + if (longQueryTime > 1.0) { + results.put("long_query_time", "建议:long_query_time参数值(" + longQueryTime + ")较大,建议设置为0.05-1.0之间"); + } + } + } + + private void checkBufferPoolSize(Map vars, Map results) { + if (vars.containsKey("innodb_buffer_pool_size")) { + long bufferPoolSize = Long.parseLong(vars.get("innodb_buffer_pool_size")); + if (bufferPoolSize < 2147483648L) { // 2GB + results.put("innodb_buffer_pool_size", "警告:innodb_buffer_pool_size参数值(" + (bufferPoolSize / 1024 / 1024) + "MB)较小,建议设置为至少2GB"); + } + } + } + + public Map monitorReplicationLag() { + Map results = new HashMap<>(); + + try (Connection conn = connectionService.getConnection()) { + Map slaveStatus = getSlaveStatus(conn); + + // 检查是否启用了主从复制 + if (slaveStatus.isEmpty() || + slaveStatus.get("Master_Host") == null || + slaveStatus.get("Master_Log_File") == null || + slaveStatus.get("Exec_Master_Log_Pos") == null || + slaveStatus.get("Retrieved_Gtid_Set") == null || + slaveStatus.get("Executed_Gtid_Set") == null) { + results.put("replication_status", "未启用主从复制"); + return results; + } + + // 检查 Slave_IO_Running 和 Slave_SQL_Running 状态 + if (!"Yes".equals(slaveStatus.get("Slave_IO_Running")) || + !"Yes".equals(slaveStatus.get("Slave_SQL_Running"))) { + results.put("replication_error", "严重级告警: Slave IO 或 SQL 线程异常"); + // 检查错误信息 + if (slaveStatus.get("Last_IO_Errno") != null || slaveStatus.get("Last_SQL_Errno") != null) { + results.put("io_error", slaveStatus.get("Last_IO_Error")); + results.put("sql_error", slaveStatus.get("Last_SQL_Error")); + } + return results; + } + + // 检查 Seconds_Behind_Master + int secondsBehindMaster = Integer.parseInt(slaveStatus.getOrDefault("Seconds_Behind_Master", "0")); + if (secondsBehindMaster > 0) { + if (secondsBehindMaster > 100) { + results.put("replication_lag", "严重级告警: 复制延迟超过100秒"); + } else { + results.put("replication_lag", "一般级告警: 复制延迟"); + } + } + + // 检查 Relay_Master_Log_File 和 Master_Log_File + if (!slaveStatus.get("Relay_Master_Log_File").equals(slaveStatus.get("Master_Log_File"))) { + results.put("replication_lag", "严重级告警: Relay日志落后于Master日志"); + } else { + // 检查 Exec_Master_Log_Pos 和 Read_Master_Log_Pos 差异 + int execPos = Integer.parseInt(slaveStatus.getOrDefault("Exec_Master_Log_Pos", "0")); + int readPos = Integer.parseInt(slaveStatus.getOrDefault("Read_Master_Log_Pos", "0")); + if (execPos < readPos) { + results.put("replication_lag", "一般级告警: SQL线程落后于IO线程"); + } + } + + // 检查 GTID 差异 + String retrievedGtid = slaveStatus.get("Retrieved_Gtid_Set"); + String executedGtid = slaveStatus.get("Executed_Gtid_Set"); + if (retrievedGtid != null && executedGtid != null && !retrievedGtid.equals(executedGtid)) { + results.put("replication_lag", "一般级告警: GTID执行落后"); + } + + } catch (Exception e) { + results.put("error", "监控主从复制延迟失败: " + e.getMessage()); + } + + return results; + } }