From 92267d3f803a6330116122f7c2dcc96d5cf68319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=9F=B3=E7=9F=B3?= Date: Wed, 3 Dec 2025 14:31:16 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=91=BD=E4=BB=A4?= =?UTF-8?q?=E8=A1=8C=E8=AF=AD=E5=8F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mcp_center/servers/rag/run.sh | 48 ++++++ mcp_center/servers/rag/src/cli.py | 82 +++++++++ mcp_center/servers/rag/src/cli/__init__.py | 0 mcp_center/servers/rag/src/cli/handle.py | 168 +++++++++++++++++++ mcp_center/servers/rag/src/cli/parse_args.py | 58 +++++++ 5 files changed, 356 insertions(+) create mode 100644 mcp_center/servers/rag/run.sh create mode 100644 mcp_center/servers/rag/src/cli.py create mode 100644 mcp_center/servers/rag/src/cli/__init__.py create mode 100644 mcp_center/servers/rag/src/cli/handle.py create mode 100644 mcp_center/servers/rag/src/cli/parse_args.py diff --git a/mcp_center/servers/rag/run.sh b/mcp_center/servers/rag/run.sh new file mode 100644 index 00000000..65d837b8 --- /dev/null +++ b/mcp_center/servers/rag/run.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# RAG 服务部署脚本 + +# 设置路径 +RAG_DIR="/usr/lib/euler-copilot-framework/mcp_center/servers/rag" +SERVICE_FILE="/usr/lib/euler-copilot-framework/mcp_center/service/rag.service" + +# 复制 service 文件 +if [ -f "$SERVICE_FILE" ]; then + cp "$SERVICE_FILE" /etc/systemd/system/ + echo "✅ Service 文件已复制" +else + echo "⚠️ 警告:未找到 service 文件:$SERVICE_FILE" +fi + +# 安装依赖 +if [ -f "$RAG_DIR/src/requirements.txt" ]; then + pip install -r "$RAG_DIR/src/requirements.txt" -i https://pypi.tuna.tsinghua.edu.cn/simple + echo "✅ 依赖安装完成" +fi + +# 重新加载 systemd +systemctl daemon-reload + +# 启用服务 +systemctl enable rag.service +echo "✅ 服务已启用" + +# 启动服务 +systemctl start rag.service +echo "✅ 服务已启动" + +# 查看服务状态 +systemctl status rag.service + +# 设置 CLI 工具权限并创建符号链接 +chmod +x "$RAG_DIR/src/cli.py" +rm -f /usr/local/bin/rag-server +ln -s "$RAG_DIR/src/cli.py" /usr/local/bin/rag-server +echo "✅ CLI 工具已安装:rag-server" + +echo "" +echo "安装完成!可以使用以下命令:" +echo " rag-server --help # 查看帮助" +echo " rag-server list_kb # 列出知识库" +echo " rag-server import_doc --file_paths /path/to/file.txt # 导入文档" + diff --git a/mcp_center/servers/rag/src/cli.py b/mcp_center/servers/rag/src/cli.py new file mode 100644 index 00000000..e89e0832 --- /dev/null +++ b/mcp_center/servers/rag/src/cli.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +""" +RAG Server CLI 工具 +用于直接调用 RAG 工具函数的命令行接口 +""" +import os +import sys + +# 从 systemd service 文件读取工作目录 +SERVICE_FILE = "/etc/systemd/system/rag.service" +PROJECT_ROOT = "/usr/lib/euler-copilot-framework/mcp_center" +if os.path.exists(SERVICE_FILE): + with open(SERVICE_FILE, "r") as f: + for line in f: + if line.strip().startswith("WorkingDirectory="): + PROJECT_ROOT = line.strip().split("=", 1)[1] + break + +# 添加项目根目录到 sys.path +if PROJECT_ROOT not in sys.path: + sys.path.insert(0, PROJECT_ROOT) + +# 添加当前目录到路径 +current_dir = os.path.dirname(os.path.abspath(__file__)) +if current_dir not in sys.path: + sys.path.insert(0, current_dir) + +# 导入 CLI 模块 +from cli.parse_args import parse_args +from cli.handle import ( + handle_create_kb, + handle_delete_kb, + handle_list_kb, + handle_select_kb, + handle_import_doc, + handle_list_doc, + handle_delete_doc, + handle_update_doc, + handle_search, + handle_export_db, + handle_import_db +) + +def main(): + """主函数""" + args = parse_args() + + if not args.command: + print("❌ 请指定命令,使用 --help 查看帮助") + sys.exit(1) + + success = False + + # 命令调度 + if args.command == "create_kb": + success = handle_create_kb(args) + elif args.command == "delete_kb": + success = handle_delete_kb(args) + elif args.command == "list_kb": + success = handle_list_kb(args) + elif args.command == "select_kb": + success = handle_select_kb(args) + elif args.command == "import_doc": + success = handle_import_doc(args) + elif args.command == "list_doc": + success = handle_list_doc(args) + elif args.command == "delete_doc": + success = handle_delete_doc(args) + elif args.command == "update_doc": + success = handle_update_doc(args) + elif args.command == "search": + success = handle_search(args) + elif args.command == "export_db": + success = handle_export_db(args) + elif args.command == "import_db": + success = handle_import_db(args) + + sys.exit(0 if success else 1) + +if __name__ == "__main__": + main() + diff --git a/mcp_center/servers/rag/src/cli/__init__.py b/mcp_center/servers/rag/src/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mcp_center/servers/rag/src/cli/handle.py b/mcp_center/servers/rag/src/cli/handle.py new file mode 100644 index 00000000..2872d4fd --- /dev/null +++ b/mcp_center/servers/rag/src/cli/handle.py @@ -0,0 +1,168 @@ +import os +import sys +import asyncio +import json +from typing import Dict, Any + +# 添加路径 +current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if current_dir not in sys.path: + sys.path.insert(0, current_dir) + +# 添加 mcp_center 目录到路径 +mcp_center_dir = os.path.abspath(os.path.join(current_dir, '../../..')) +if mcp_center_dir not in sys.path: + sys.path.insert(0, mcp_center_dir) + +from tool import ( + create_knowledge_base, + delete_knowledge_base, + list_knowledge_bases, + select_knowledge_base, + import_document, + search, + list_documents, + delete_document, + update_document, + export_database, + import_database +) + +def print_result(result: Dict[str, Any]): + """打印结果""" + if result.get("success"): + print(f"✅ {result.get('message', '操作成功')}") + if result.get("data"): + print(json.dumps(result["data"], ensure_ascii=False, indent=2)) + else: + print(f"❌ {result.get('message', '操作失败')}") + +def handle_create_kb(args): + """创建知识库""" + if not args.kb_name or not args.chunk_size: + print("❌ 缺少参数:--kb_name 和 --chunk_size 必填") + return False + + result = create_knowledge_base( + kb_name=args.kb_name, + chunk_size=args.chunk_size, + embedding_model=args.embedding_model, + embedding_endpoint=args.embedding_endpoint, + embedding_api_key=args.embedding_api_key + ) + print_result(result) + return result.get("success", False) + +def handle_delete_kb(args): + """删除知识库""" + if not args.kb_name: + print("❌ 缺少参数:--kb_name 必填") + return False + + result = delete_knowledge_base(args.kb_name) + print_result(result) + return result.get("success", False) + +def handle_list_kb(args): + """列出知识库""" + result = list_knowledge_bases() + print_result(result) + return result.get("success", False) + +def handle_select_kb(args): + """选择知识库""" + if not args.kb_name: + print("❌ 缺少参数:--kb_name 必填") + return False + + result = select_knowledge_base(args.kb_name) + print_result(result) + return result.get("success", False) + +async def handle_import_doc_async(args): + """导入文档(异步)""" + if not args.file_paths: + print("❌ 缺少参数:--file_paths 必填(文件路径列表)") + return False + + result = await import_document( + file_paths=args.file_paths, + chunk_size=args.chunk_size + ) + print_result(result) + return result.get("success", False) + +def handle_import_doc(args): + """导入文档(同步包装)""" + return asyncio.run(handle_import_doc_async(args)) + +def handle_list_doc(args): + """列出文档""" + result = list_documents() + print_result(result) + return result.get("success", False) + +def handle_delete_doc(args): + """删除文档""" + if not args.doc_name: + print("❌ 缺少参数:--doc_name 必填") + return False + + result = delete_document(args.doc_name) + print_result(result) + return result.get("success", False) + +async def handle_update_doc_async(args): + """更新文档(异步)""" + if not args.doc_name or not args.chunk_size: + print("❌ 缺少参数:--doc_name 和 --chunk_size 必填") + return False + + result = await update_document( + doc_name=args.doc_name, + chunk_size=args.chunk_size + ) + print_result(result) + return result.get("success", False) + +def handle_update_doc(args): + """更新文档(同步包装)""" + return asyncio.run(handle_update_doc_async(args)) + +async def handle_search_async(args): + """搜索(异步)""" + if not args.query: + print("❌ 缺少参数:--query 必填") + return False + + result = await search( + query=args.query, + top_k=args.top_k + ) + print_result(result) + return result.get("success", False) + +def handle_search(args): + """搜索(同步包装)""" + return asyncio.run(handle_search_async(args)) + +def handle_export_db(args): + """导出数据库""" + if not args.export_path: + print("❌ 缺少参数:--export_path 必填(绝对路径)") + return False + + result = export_database(args.export_path) + print_result(result) + return result.get("success", False) + +def handle_import_db(args): + """导入数据库""" + if not args.source_db_path: + print("❌ 缺少参数:--source_db_path 必填(绝对路径)") + return False + + result = import_database(args.source_db_path) + print_result(result) + return result.get("success", False) + diff --git a/mcp_center/servers/rag/src/cli/parse_args.py b/mcp_center/servers/rag/src/cli/parse_args.py new file mode 100644 index 00000000..ae4b3f72 --- /dev/null +++ b/mcp_center/servers/rag/src/cli/parse_args.py @@ -0,0 +1,58 @@ +import argparse + +def parse_args(): + """解析命令行参数""" + parser = argparse.ArgumentParser(description="rag-server 命令行工具") + subparsers = parser.add_subparsers(dest="command", help="可用命令") + + # 创建知识库 + create_kb_parser = subparsers.add_parser("create_kb", help="创建知识库") + create_kb_parser.add_argument("--kb_name", required=True, help="知识库名称") + create_kb_parser.add_argument("--chunk_size", type=int, required=True, help="chunk大小(token数)") + create_kb_parser.add_argument("--embedding_model", help="向量化模型名称") + create_kb_parser.add_argument("--embedding_endpoint", help="向量化服务端点URL") + create_kb_parser.add_argument("--embedding_api_key", help="向量化服务API Key") + + # 删除知识库 + delete_kb_parser = subparsers.add_parser("delete_kb", help="删除知识库") + delete_kb_parser.add_argument("--kb_name", required=True, help="知识库名称") + + # 列出知识库 + subparsers.add_parser("list_kb", help="列出所有知识库") + + # 选择知识库 + select_kb_parser = subparsers.add_parser("select_kb", help="选择知识库") + select_kb_parser.add_argument("--kb_name", required=True, help="知识库名称") + + # 导入文档 + import_doc_parser = subparsers.add_parser("import_doc", help="导入文档") + import_doc_parser.add_argument("--file_paths", nargs="+", required=True, help="文件路径列表(绝对路径)") + import_doc_parser.add_argument("--chunk_size", type=int, help="chunk大小(可选,默认使用知识库的chunk_size)") + + # 列出文档 + subparsers.add_parser("list_doc", help="列出文档") + + # 删除文档 + delete_doc_parser = subparsers.add_parser("delete_doc", help="删除文档") + delete_doc_parser.add_argument("--doc_name", required=True, help="文档名称") + + # 更新文档 + update_doc_parser = subparsers.add_parser("update_doc", help="更新文档") + update_doc_parser.add_argument("--doc_name", required=True, help="文档名称") + update_doc_parser.add_argument("--chunk_size", type=int, required=True, help="新的chunk大小(token数)") + + # 搜索 + search_parser = subparsers.add_parser("search", help="搜索文档") + search_parser.add_argument("--query", required=True, help="查询文本") + search_parser.add_argument("--top_k", type=int, help="返回数量(可选,默认5)") + + # 导出数据库 + export_db_parser = subparsers.add_parser("export_db", help="导出数据库") + export_db_parser.add_argument("--export_path", required=True, help="导出路径(绝对路径)") + + # 导入数据库 + import_db_parser = subparsers.add_parser("import_db", help="导入数据库") + import_db_parser.add_argument("--source_db_path", required=True, help="源数据库文件路径(绝对路径)") + + return parser.parse_args() + -- Gitee From ef32405953700d373a0293888a7ce15871330c2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=9F=B3=E7=9F=B3?= Date: Wed, 3 Dec 2025 15:30:12 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E7=8A=B6=E6=80=81?= =?UTF-8?q?=E4=BF=9D=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mcp_center/servers/rag/src/tool.py | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/mcp_center/servers/rag/src/tool.py b/mcp_center/servers/rag/src/tool.py index 06f0a20e..87243315 100644 --- a/mcp_center/servers/rag/src/tool.py +++ b/mcp_center/servers/rag/src/tool.py @@ -4,6 +4,7 @@ import uuid import shutil import logging import asyncio +import json from typing import Optional, Dict, Any, List current_dir = os.path.dirname(os.path.abspath(__file__)) @@ -25,6 +26,35 @@ logger = logging.getLogger(__name__) _db_instance: Optional[Database] = None _db_path = os.path.join(current_dir, "database", "kb.db") _current_kb_id: Optional[str] = None +_state_file = os.path.join(current_dir, "database", "state.json") + + +def _load_state() -> None: + """从状态文件加载当前知识库ID(用于在不同进程间共享选择状态)""" + global _current_kb_id + try: + if os.path.exists(_state_file): + with open(_state_file, "r", encoding="utf-8") as f: + data = json.load(f) + _current_kb_id = data.get("current_kb_id") + except Exception as e: + logger.warning(f"[state] 加载当前知识库状态失败: {e}") + + +def _save_state() -> None: + """将当前知识库ID写入状态文件""" + try: + state_dir = os.path.dirname(_state_file) + if not os.path.exists(state_dir): + os.makedirs(state_dir, exist_ok=True) + with open(_state_file, "w", encoding="utf-8") as f: + json.dump({"current_kb_id": _current_kb_id}, f, ensure_ascii=False) + except Exception as e: + logger.warning(f"[state] 保存当前知识库状态失败: {e}") + + +# 模块导入时尝试加载之前保存的当前知识库状态 +_load_state() def _get_db() -> Database: @@ -203,6 +233,7 @@ def select_knowledge_base(kb_name: str) -> Dict[str, Any]: global _current_kb_id _current_kb_id = kb.id + _save_state() session = db.get_session() try: -- Gitee From 618b5d60ed705472a8227dbb364985cfa0a38d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E6=9F=B3=E7=9F=B3?= Date: Wed, 3 Dec 2025 15:40:19 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E6=9B=B4=E6=96=B0readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mcp_center/servers/rag/README.en.md | 113 ++++++++++++++++++++++++++++ mcp_center/servers/rag/README.md | 113 ++++++++++++++++++++++++++++ 2 files changed, 226 insertions(+) diff --git a/mcp_center/servers/rag/README.en.md b/mcp_center/servers/rag/README.en.md index 149e85f6..487a230c 100644 --- a/mcp_center/servers/rag/README.en.md +++ b/mcp_center/servers/rag/README.en.md @@ -21,3 +21,116 @@ This service is a comprehensive RAG (Retrieval-Augmented Generation) knowledge b | `export_database` | Exports the entire kb.db database file to the specified path | - `export_path`: Export path (absolute path, required) | Export result dictionary (including `source_path` source database path, `export_path` export path) | | `import_database` | Imports a .db database file and merges its contents into kb.db. Import will automatically handle name conflicts by adding timestamps to knowledge base and document names | - `source_db_path`: Source database file path (absolute path, required) | Import result dictionary (including `source_path` source database path, `imported_kb_count` number of imported knowledge bases, `imported_doc_count` number of imported documents) | + +## 3. `rag-server` CLI Usage Guide + +`rag-server` is a command-line wrapper around the core RAG tools, suitable for managing knowledge bases, importing documents, and performing searches directly from the terminal. After installation, you can run `rag-server [options]` anywhere. + +### 3.1 Basic Usage + +- Show help: + +```bash +rag-server --help +``` + +- General format: + +```bash +rag-server [--option value...] +``` + +### 3.2 Knowledge Base Management Commands + +- **Create knowledge base** + +```bash +rag-server create_kb --kb_name --chunk_size [--embedding_model model] [--embedding_endpoint URL] [--embedding_api_key KEY] +``` + +Description: +- `--kb_name`: Knowledge base name (required, must be unique) +- `--chunk_size`: Chunk size in tokens (required, e.g., 512, 1024) +- `--embedding_model`: Embedding model name (optional) +- `--embedding_endpoint`: Embedding service endpoint URL (optional) +- `--embedding_api_key`: Embedding service API Key (optional) + +- **Delete knowledge base** + +```bash +rag-server delete_kb --kb_name +``` + +- **List knowledge bases** + +```bash +rag-server list_kb +``` + +- **Select current knowledge base** + +```bash +rag-server select_kb --kb_name +``` + +Note: After selection, the current knowledge base ID is persisted to `database/state.json`, so different `rag-server` invocations will share the same selected KB state. + +### 3.3 Document Management Commands + +- **Import documents** + +```bash +rag-server import_doc --file_paths /abs/path/doc1.txt [/abs/path/doc2.txt ...] [--chunk_size ] +``` + +Description: +- `--file_paths`: One or more **absolute file paths** (required, at least one) +- `--chunk_size`: Override default chunk size (optional) +- You must call `select_kb` first to choose an active knowledge base. + +- **List documents** + +```bash +rag-server list_doc +``` + +- **Delete document** + +```bash +rag-server delete_doc --doc_name +``` + +- **Update document chunk size and rebuild vectors** + +```bash +rag-server update_doc --doc_name --chunk_size +``` + +### 3.4 Search Commands + +- **Search** + +```bash +rag-server search --query "" [--top_k ] +``` + +Description: +- `--query`: Query text (required) +- `--top_k`: Number of results to return (optional, default from config, usually 5) + +### 3.5 Database Import/Export Commands + +- **Export database** + +```bash +rag-server export_db --export_path /abs/path/kb_export.db +``` + +- **Import database** + +```bash +rag-server import_db --source_db_path /abs/path/other_kb.db +``` + +Description: During import, name conflicts of knowledge bases and documents will be automatically resolved by appending timestamps. + diff --git a/mcp_center/servers/rag/README.md b/mcp_center/servers/rag/README.md index d5a6def4..ac743822 100644 --- a/mcp_center/servers/rag/README.md +++ b/mcp_center/servers/rag/README.md @@ -21,3 +21,116 @@ | `export_database` | 导出整个kb.db数据库文件到指定路径 | - `export_path`:导出路径(绝对路径,必填) | 导出结果字典(含`source_path`源数据库路径、`export_path`导出路径) | | `import_database` | 导入一个.db数据库文件,将其中的内容合并到kb.db中,导入时会自动处理重名冲突,为知识库和文档名称添加时间戳 | - `source_db_path`:源数据库文件路径(绝对路径,必填) | 导入结果字典(含`source_path`源数据库路径、`imported_kb_count`导入的知识库数量、`imported_doc_count`导入的文档数量) | + +## 三、`rag-server` 命令行使用指南 + +`rag-server` 是对 RAG 核心工具的命令行封装,适合在服务器上以 CLI 方式直接管理知识库、导入文档和执行检索。安装完成后,可直接在终端中使用 `rag-server <子命令> [参数]`。 + +### 1. 基本用法 + +- 查看帮助: + +```bash +rag-server --help +``` + +- 命令通用格式: + +```bash +rag-server [--参数名 参数值...] +``` + +### 2. 知识库管理相关命令 + +- **创建知识库** + +```bash +rag-server create_kb --kb_name <名称> --chunk_size [--embedding_model 模型名] [--embedding_endpoint URL] [--embedding_api_key KEY] +``` + +说明: +- `--kb_name`:知识库名称(必填,必须唯一) +- `--chunk_size`:chunk 大小(必填,单位 token,例如 512、1024) +- `--embedding_model`:向量化模型名称(可选) +- `--embedding_endpoint`:向量化服务端点 URL(可选) +- `--embedding_api_key`:向量化服务 API Key(可选) + +- **删除知识库** + +```bash +rag-server delete_kb --kb_name <名称> +``` + +- **列出知识库** + +```bash +rag-server list_kb +``` + +- **选择当前知识库** + +```bash +rag-server select_kb --kb_name <名称> +``` + +说明:选择成功后,当前知识库 ID 会持久化到 `database/state.json` 中,不同次 `rag-server` 调用会自动复用该状态。 + +### 3. 文档管理相关命令 + +- **导入文档** + +```bash +rag-server import_doc --file_paths /abs/path/doc1.txt [/abs/path/doc2.txt ...] [--chunk_size ] +``` + +说明: +- `--file_paths`:一个或多个**绝对路径**的文件列表(必填,至少 1 个) +- `--chunk_size`:覆盖默认的 chunk 大小(可选) +- 在调用前需要先通过 `select_kb` 选择当前知识库 + +- **列出文档** + +```bash +rag-server list_doc +``` + +- **删除文档** + +```bash +rag-server delete_doc --doc_name <文档名称> +``` + +- **更新文档 chunk 大小并重建向量** + +```bash +rag-server update_doc --doc_name <文档名称> --chunk_size <新的chunk大小> +``` + +### 4. 检索相关命令 + +- **搜索** + +```bash +rag-server search --query "<查询文本>" [--top_k <返回数量>] +``` + +说明: +- `--query`:查询文本(必填) +- `--top_k`:返回结果数量(可选,默认从配置中读取,通常为 5) + +### 5. 数据库导入导出相关命令 + +- **导出数据库** + +```bash +rag-server export_db --export_path /abs/path/kb_export.db +``` + +- **导入数据库** + +```bash +rag-server import_db --source_db_path /abs/path/other_kb.db +``` + +说明:导入时会自动处理知识库和文档的重名冲突,为名称添加时间戳。 + -- Gitee