diff --git a/config/defaults.yaml b/config/defaults.yaml
index 1ac46a9ec81e108a24b9008c48e18d52db444b03..a6ae9f621311bc6036f52af91e6c14755d92cdb5 100644
--- a/config/defaults.yaml
+++ b/config/defaults.yaml
@@ -83,8 +83,63 @@ common: &common
         value is the recommended value for the adjustable parameter, please provide reasonable specific values based on the above environment configuration information, carefully confirm whether each value can be used, avoid application startup failures after setting.
         Do not provide any response other than JSON, avoid adding comments. Please output in English.
   slow:
+    mem:
+      value: |
+        You are a memory summarization system, responsible for recording and preserving the complete interaction history between humans and the AI tuning expert. You will receive the expert’s tuning ideas and actual execution feedback from the past N steps. Your task is to generate a comprehensive summary of the tuning process, covering the expert’s tuning concepts and corresponding feedback details, ensuring there is no ambiguity. **The expert’s tuning ideas do not need to be fully preserved; only the most important core concepts and key feedback should be summarized.**
+        ### Overall Structure:
+        - **Overview (global metadata):**
+          - **Task Objective: The tuning goal the expert is working to achieve.
+          - **Progress Status: Current completion percentage, along with a summary of completed milestones or steps.
+        - **Tuning Operation Sequence (numbered steps): Record only once per iteration.
+        1. **Iteration i:**
+            - Tuning Concept:
+                The main idea of this tuning iteration, including bottleneck analysis, underutilized resources, etc. Must record highly specific optimization strategies for the given scenario.
+            - Core Parameters:
+                Core parameters related to the tuning concept. Record the parameters modified during tuning and their specific values.
+            - Operation Result:
+                The performance result of this iteration.
+            - Key Findings:
+                Important conclusions drawn from feedback, including parameter effectiveness and reflections on the tuning method. For discrete parameters, if their importance is confirmed, they should be fixed in the key findings; for continuous parameters, the direction of adjustment should be recorded.
+        ### Example Template:
+        ```
+        ## Expert Tuning History Summary
+        **Task Objective**: Increase MySQL QPS by 10% in the current environment.   
+        **Progress Status**: 70% completed — achieved a 7% performance improvement.
+        1.  **Iteration 1:**
+            - Tuning Concept: Memory usage is far from the upper limit; adjust memory-related parameters to improve utilization.
+            - Core Parameters: {memory=50GB, xx=true}
+            - Operation Result: Performance result of this iteration is 60, improvement: 20.00%
+            - Key Findings: Increasing memory settings was correct; further increasing memory-related parameters is beneficial.
+        2. **Iteration 2:**
+            - Tuning Concept: Increase shared_buffers to reduce memory cache misses.
+            - Core Parameters: {shared_buffers=20GB, maintenance_work_mem=2GB}
+            - Operation Result: Performance result of this iteration is 65, improvement: 30.00%
+            - Key Findings: Increasing `shared_buffers` improved performance, but memory bottleneck has not yet been reached.
+        ... (subsequent iterations continue to be recorded by number)
+        ```
     recommender:
-      value: ""
+      value: |
+        You are a system application optimization expert. Your task is to leverage your extensive experience in optimization to provide relevant recommendations.
+        ### Overall Target:
+        - The performance metric for the scenario is **{self.performance_metric.name}**, which signifies: {self.performance_metric.value}. The goal of tuning optimization is to achieve {self.slo_goal:.2%}.
+        - The system performance analysis report is as follows:{self.performance_analysis_report}
+        - The related parameters available for analysis and their descriptions are: {params_set_str}
+        """ + "{long_mem}"  + "{short_mem}" + """
+        ### Guidelines:
+        - "Clear": Ensure that the answers are clear, concise, and directly address the question.
+        - **Cautious, Lazy-Type Modification**: Only output parameter changes expected to yield good results; for continuous parameters, the adjustment range should be between 0~50%, and for discrete/enumerated parameters, avoid extreme adjustment spans; do not adjust parameters that are analyzed in the context as having "no impact" on performance.
+        - **Logical**: Parameter adjustments should align with the tuning strategy, with each adjustment having a corresponding logic, and rigorously adjust mutually exclusive parameters simultaneously.
+        - "Strict Formatting": Must meet type and unit requirements; the default unit for numerical values is "bytes". If a unit follows the numerical value, it should be represented as a string (e.g., "512MB"). Parameter output must be in **JSON format**.
+        ### Example Template:
+        ```
+            {{
+                "tuning_concept": "The main bottleneck in the current scenario is IO, but the system's memory usage is only about 30% of the current capacity. The current memory parameter is set at 20G, which is advisable to appropriately increase to enhance memory resource utilization and thereby accelerate performance ... (Additional tuning_concept)",
+                "PARAM": {{
+                "memory": "30GB",
+                ... (Additional params)
+                }}
+            }}
+        ```
       meta: {lang: en, version: 1}    
 # ==================================================== nginx =====================================================
 nginx:
@@ -212,8 +267,61 @@ nginx:
         - Only output one JSON object, key is "adjustable parameter name", value is "recommended value".
         - Do not output any extra text, explanations, examples, code fences, or comments.
   slow:
+    mem:
+      value: |
+        You are a memory summarization system that records and preserves the complete interaction history between a human and an AI tuning expert focused on Nginx performance optimization. You are provided with the expert's tuning ideas and actual execution feedback over the past N steps. Your task is to produce a comprehensive summary of the tuning process that includes the expert's core ideas and their corresponding feedback details to ensure there is no ambiguity. **The expert's ideas do not need to be fully stored; summarize only the most important core concepts and key feedback. Crucially, you must retain all information from every interaction round—never discard, omit, or forget any iteration, as the full sequence is essential for accurate context preservation and future reasoning.**
+        ### Overall Structure:
+        - **Overview (Global Metadata):**
+          - **Task Objective**: The specific performance goal for Nginx (e.g., increase QPS, reduce latency, improve concurrency handling).
+          - **Progress Status**: Current completion percentage, achieved improvements, and milestones reached.
+        1. **Iteration i**:
+            - Tuning Concept: 
+                The main concept behind the tuning action, including identified bottlenecks (e.g., connection limits, CPU saturation, keep-alive inefficiency), underutilized resources, or scenario-specific strategies (e.g., static file serving, reverse proxy buffering).
+            - Core Params:
+                The key Nginx configuration parameters modified in this round. Include exact parameter names and values (e.g., worker_processes, worker_connections, keepalive_timeout, sendfile, tcp_nopush).
+            - Operation Result:
+                Quantitative performance outcome: e.g., QPS, P99 latency, error rate, CPU/memory usage before and after.
+            - Key Findings:
+                Insights derived from the results. For discrete parameters (e.g., on/off), fix their optimal value if confirmed. For continuous ones (e.g., buffer sizes), indicate the adjustment direction (increase/decrease). Note any side effects (e.g., higher memory usage).
+        ### Example Template:
+        ```
+        ## Summary of the expert's tuning history
+        **Task Objective**: Increase Nginx QPS by 20% under high concurrent load.
+        **Progress Status**: 70% completed - 7% improvement achieved.
+        1.  **Iteration 1**:
+            - Tuning Concept: The memory usage rate is far from reaching its limit, and parameters related to memory utilization can be adjusted to increase it.
+            - Core Params: {memory=50GB, xx=true}
+            - Operation Result: QPS increased from 70507.0 → 129443.0 (+83.59% vs baseline, +83.59% vs prev).
+            - Key Finding: Increasing the memory setting was correct, make memory params larger is better.
+        2. **Iteration 2**:
+            - Tuning Concept: Improve shared_buffers to reduce memory cache miss.
+            - Core Params: {shared_buffers=20GB, maintenance_work_mem=2GB}:
+            - Operation Result: QPS changed from 129443.0 → 129226.0 (−0.17% vs prev, +83.28% vs baseline).
+            - Key Finding: The ``shared_buffers`` increased performance, but the memory bottleneck was not reached.
+        ... (Additional numbered steps for subsequent actions)
     recommender:
-      value: ""
+      value: |
+        You are a system application optimization expert. Your task is to leverage your extensive experience in optimization to provide relevant recommendations.
+        ### Overall Target:
+        - The performance metric for the scenario is **{self.performance_metric.name}**, which signifies: {self.performance_metric.value}. The goal of tuning optimization is to achieve {self.slo_goal:.2%}.
+        - The system performance analysis report is as follows:{self.performance_analysis_report}
+        - The related parameters available for analysis and their descriptions are: {params_set_str}
+        """ + "{long_mem}"  + "{short_mem}" + """
+        ### Guidelines:
+        - "Clear": Ensure that the answers are clear, concise, and directly address the question.
+        - **Cautious, Lazy-Type Modification**: Only output parameter changes expected to yield good results; for continuous parameters, the adjustment range should be between 0~50%, and for discrete/enumerated parameters, avoid extreme adjustment spans; do not adjust parameters that are analyzed in the context as having "no impact" on performance.
+        - **Logical**: Parameter adjustments should align with the tuning strategy, with each adjustment having a corresponding logic, and rigorously adjust mutually exclusive parameters simultaneously.
+        - "Strict Formatting": Must meet type and unit requirements; the default unit for numerical values is "bytes". If a unit follows the numerical value, it should be represented as a string (e.g., "512MB"). Parameter output must be in **JSON format**.
+        ### Example Template:
+        ```
+            {{
+                "tuning_concept": "The main bottleneck in the current scenario is IO, but the system's memory usage is only about 30% of the current capacity. The current memory parameter is set at 20G, which is advisable to appropriately increase to enhance memory resource utilization and thereby accelerate performance ... (Additional tuning_concept)",
+                "PARAM": {{
+                "memory": "30GB",
+                ... (Additional params)
+                }}
+            }}
+        ```
       meta: {lang: en, version: 1}
 # ==================================================== mysql =====================================================
 mysql:
diff --git a/config/defaults_zh.yaml b/config/defaults_zh.yaml
index 6c1baced36f2d7d4ec23e2ba392af08cb76a8ab0..7df5d9b6f65eac83e3459de8ba8201e56a4217a1 100644
--- a/config/defaults_zh.yaml
+++ b/config/defaults_zh.yaml
@@ -82,8 +82,63 @@ common: &common
         value是可调参数的推荐取值，请根据上面的环境配置信息给出合理的具体取值，请仔细确认各个值是否可以被使用，避免设置后应用无法启动。
         请勿给出除了json以外其他的回复,切勿增加注释。
   slow:
+    mem:
+      value: |
+        你是一个记忆摘要系统，负责记录并保存人类与 AI 调优专家之间的完整交互历史。你将获得专家在过去N个步骤中的调优思路和实际执行反馈。你的任务是生成一份全面的调优过程摘要，涵盖专家的调优理念和对应的反馈细节，确保没有任何歧义。 **专家的调优思路不需要完整保存；只需总结最重要的核心概念和关键反馈。**
+        ### 总体结构:
+        - **概览（全局元数据）：**
+          - **任务目标：专家正在努力实现的调优目标。
+          - **进展状态：当前完成百分比，以及已完成的具体里程碑或步骤摘要。
+        - **调优操作序列（编号步骤）： 每次迭代只记录一次。
+        1. **第i次迭代:**
+            - 调优理念：
+                本次调优的主要思路，包括瓶颈分析、资源未充分利用等。需记录针对具体场景的高度特定的优化策略。
+            - 核心参数：
+                与调优理念相关的核心参数。记录调优过程中修改的核心参数及其具体数值。
+            - 操作结果：
+                本次迭代的性能结果。
+            - 关键发现：
+                从反馈中得出的重要结论，包括参数的有效性和对调优方法的反思。 对于离散类型的参数，如果其重要性被确认，应在关键发现中固定输出； 对于连续类型参数，应记录其应调整的方向。
+        ### 示例模板：
+        ```
+        ## 专家调优历史摘要
+        **任务目标**: 在当前环境下将 MySQL 的 QPS 提高 10%。  
+        **进展状态**: 已完成 70% —— 已实现 7% 的性能提升。
+        1.  **第 1 次迭代:**
+            - 调优理念：内存使用率远未达到上限，可调整与内存利用相关的参数以提升使用率。
+            - 核心参数：{memory=50GB, xx=true}
+            - 操作结果：本次迭代性能结果为 60，提升幅度：20.00%
+            - 关键发现：增加内存设置是正确的，继续增大内存相关参数更有利。
+        2. **第 2 次迭代:**
+            - 调优理念：提升 shared_buffers 以减少内存缓存未命中。
+            - 核心参数：{shared_buffers=20GB, maintenance_work_mem=2GB}
+            - 操作结果：本次迭代性能结果为 65，提升幅度：30.00%
+            - 关键发现：提升 `shared_buffers` 带来了性能提升，但尚未达到内存瓶颈。
+        ... (后续迭代按编号继续记录)
+        ```
     recommender:
-      value: ""
+      value: |
+        你是一位系统应用优化专家。你的任务是利用你在优化方面的丰富经验，提供相关的建议。
+        ### 总体目标
+        - 本场景的性能指标是 {self.performance_metric.name}，其含义为：{self.performance_metric.value}。调优优化的目标是实现 {self.slo_goal:.2%}。
+        - 系统性能分析报告如下： {self.performance_analysis_report}
+        - 可供分析的相关参数及其描述为：{params_set_str}
+        """ + "{long_mem}" + "{short_mem}" + """
+        ### 优化指南
+        - **清晰**：确保答案清楚、简洁，并直接回应问题。
+        - **谨慎、懒惰型修改**：仅输出预期能带来良好效果的参数调整；对于连续型参数，调整范围应在 0~50% 之间；对于离散/枚举型参数，避免极端跨度调整；不要调整在上下文分析中被认定为“对性能无影响”的参数。
+        - **逻辑性**：参数调整应符合调优策略，每个调整都应有相应的逻辑，并严格同时调整互斥参数。
+        - **严格格式化**：必须符合类型和单位要求；数值的默认单位为 “bytes”。如果数值后跟单位，应以字符串表示（例如 "512MB"）。参数输出必须为**JSON 格式**。
+        ### 示例模板
+        ```
+          {{
+              "tuning_concept": "当前场景的主要瓶颈在 IO，但系统的内存使用率仅约为当前容量的 30%。当前内存参数设置为 20G，建议适当增加以提升内存资源利用率，从而加速性能 ... （更多调优概念）",
+              "PARAM": {{
+                  "memory": "30GB",
+                  ... (更多参数)
+              }}
+          }}
+        ```
       meta: {lang: zh, version: 1}    
 # ==================================================== nginx =====================================================
 nginx:
@@ -208,8 +263,61 @@ nginx:
         - 仅输出一个 JSON 对象，键为“可调参数名称”，值为“推荐取值”。
         - 不要输出任何多余文字、说明、示例、代码围栏或注释。
   slow:
+    mem:
+      value: |
+        你是一个记忆摘要系统，负责记录并保存人类与专注于 Nginx 性能优化的 AI 调优专家之间的完整交互历史。你将获得专家在过去 N 个步骤中的调优思路和实际执行反馈。你的任务是生成一份全面的调优过程摘要，涵盖专家的核心理念及其对应的反馈细节，确保没有任何歧义。 **专家的调优思路不需要完整保存；只需总结最重要的核心概念和关键反馈。至关重要的是，你必须保留每一轮交互中的所有信息——绝不能丢弃、遗漏或遗忘任何迭代，因为完整的序列对于准确的上下文保留和未来推理至关重要。**
+        ### 总体结构:
+        - **概览（全局元数据）：**
+          - **任务目标**: Nginx 的具体性能目标（例如：提升 QPS、降低延迟、改善并发处理能力）。
+          - **进展状态**: 当前完成百分比、已实现的改进以及已达到的里程碑。
+        1. **第 i 次迭代:**:
+            - 调优理念: 
+                本次调优的主要思路，包括识别出的瓶颈（例如：连接数限制、CPU 饱和、keep-alive 效率低）、资源未充分利用，或特定场景的策略（例如：静态文件服务、反向代理缓冲）。
+            - 核心参数:
+                本轮修改的关键 Nginx 配置参数。需包含精确的参数名称和数值（例如：worker_processes、worker_connections、keepalive_timeout、sendfile、tcp_nopush）。
+            - 操作结果:
+                定量化的性能结果，例如：QPS、P99 延迟、错误率、CPU/内存使用率的前后对比。
+            - 关键发现:
+                从结果中得出的洞察。对于离散型参数（例如：开/关），如果其最优值已确认，应固定输出；对于连续型参数（例如：缓冲区大小），应记录调整方向（增加/减少）。同时需注明任何副作用（例如：更高的内存占用）。
+        ### 示例模板:
+        ```
+        ## 专家调优历史摘要
+        **任务目标**: 在高并发负载下将 Nginx QPS 提高 20%。  
+        **进展状态**: 已完成 70% —— 已实现 7% 的性能提升。
+        1.  **第 1 次迭代**:
+            - 调优理念: 内存使用率远未达到上限，可调整与内存利用相关的参数以提升使用率。
+            - 核心参数: {memory=50GB, xx=true}
+            - 操作结果: QPS 从 70507.0 → 129443.0 （相较基线提升 +83.59%，相较上次提升 +83.59%）。
+            - 关键发现: 增加内存设置是正确的，进一步增大内存相关参数更有利。
+        2. **第 2 次迭代**:
+            - 调优理念: 提升 shared_buffers 以减少内存缓存未命中。
+            - 核心参数: {shared_buffers=20GB, maintenance_work_mem=2GB}:
+            - 操作结果: QPS 从 129443.0 → 129226.0 （相较上次下降 −0.17%，相较基线提升 +83.28%）。
+            - 关键发现: 提升 ``shared_buffers`` 带来了性能提升，但尚未达到内存瓶颈。
+        ... (后续迭代按编号继续记录)
     recommender:
-      value: ""
+      value: |
+        你是一位系统应用优化专家。你的任务是利用你在优化方面的丰富经验，提供相关的建议。
+        ### 总体目标
+        - 本场景的性能指标是 {self.performance_metric.name}，其含义为：{self.performance_metric.value}。调优优化的目标是实现 {self.slo_goal:.2%}。
+        - 系统性能分析报告如下： {self.performance_analysis_report}
+        - 可供分析的相关参数及其描述为：{params_set_str}
+        """ + "{long_mem}" + "{short_mem}" + """
+        ### 优化指南
+        - **清晰**：确保答案清楚、简洁，并直接回应问题。
+        - **谨慎、懒惰型修改**：仅输出预期能带来良好效果的参数调整；对于连续型参数，调整范围应在 0~50% 之间；对于离散/枚举型参数，避免极端跨度调整；不要调整在上下文分析中被认定为“对性能无影响”的参数。
+        - **逻辑性**：参数调整应符合调优策略，每个调整都应有相应的逻辑，并严格同时调整互斥参数。
+        - **严格格式化**：必须符合类型和单位要求；数值的默认单位为 “bytes”。如果数值后跟单位，应以字符串表示（例如 "512MB"）。参数输出必须为**JSON 格式**。
+        ### 示例模板
+        ```
+          {{
+              "tuning_concept": "当前场景的主要瓶颈在 IO，但系统的内存使用率仅约为当前容量的 30%。当前内存参数设置为 20G，建议适当增加以提升内存资源利用率，从而加速性能 ... （更多调优概念）",
+              "PARAM": {{
+                  "memory": "30GB",
+                  ... (更多参数)
+              }}
+          }}
+        ```
       meta: {lang: zh, version: 1}
 # ==================================================== mysql =====================================================
 mysql:
diff --git a/src/memory/base.py b/src/memory/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a34d4a4fccaa251c36056e7980b213dd9b6f2d8
--- /dev/null
+++ b/src/memory/base.py
@@ -0,0 +1,43 @@
+from abc import ABC, abstractmethod
+
+class MemoryBase(ABC):
+    @abstractmethod
+    def get(self, memory_id:int):
+        """
+        Get the latest long-term and short-term memory.
+
+        Args:
+            memory_id (str): ID of the memory to update. If -1, return the latest meomory.
+
+        Returns:
+            dict: Retrieved memory.
+        """
+        pass
+
+    @abstractmethod
+    def update(self, data:str):
+        """
+        Update a memory.
+
+        Args:
+            data (str): New content to update the memory with.
+
+        Returns:
+            dict: Success message indicating the memory was updated.
+        """
+        pass
+
+    @abstractmethod
+    def history(self):
+        """
+        Get the history of changes for a memory by ID.
+
+        Args:
+            None
+
+        Returns:
+            list: List of changes for the memory.
+        """
+        pass
+
+
diff --git a/src/memory/h_mem.py b/src/memory/h_mem.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f7177b80188e39a37bee949522c14f042266163
--- /dev/null
+++ b/src/memory/h_mem.py
@@ -0,0 +1,75 @@
+import logging
+import hashlib
+from datetime import datetime
+import pytz
+from copy import deepcopy
+from src.memory.base import MemoryBase
+from src.utils.llm import get_llm_response
+from src.utils.prompt_instance import prompt_manager
+
+class HierarchicalMemory(MemoryBase):
+    def __init__(self, service_name):
+        self.long_term_memory = ""
+        self.short_term_meomry = ""
+        self.history_record = []
+        self.service_name = service_name
+
+    def get(self, memory_id:int=-1)->dict:
+        if memory_id==-1:
+            return {
+                "long": self.long_term_memory,
+                "short": self.short_term_meomry
+            }
+        if memory_id <= len(memory_id) and memory_id >0 :
+            return self.history_record[memory_id-1]
+        else:
+            logging.error("memory_id is exceed the hitory len")
+            return None
+
+    def update(self, data:str):
+        if prompt_manager.get_mode(self.service_name) == "slow":
+            self.short_term_meomry = self._pack_short_mem(data)
+            self.long_term_memory = self._pack_long_mem(data)
+            self.history_record.append({
+                "long": deepcopy(self.long_term_memory),
+                "short":deepcopy(self.short_term_meomry)
+            })
+            return self.history_record[-1]
+    
+    def history(self):
+        return self.history_record
+    
+    def _pack_short_mem(self, data, metadata=None)->dict|None:
+        metadata = metadata or {}
+        short_term = data
+        metadata["data"] = short_term
+        metadata["hash"] = hashlib.md5(short_term.encode()).hexdigest()
+        metadata["created_at"] = datetime.now(pytz.timezone("US/Pacific")).isoformat()
+        metadata["action"] = "UPDATE"
+        return metadata
+        
+    def _pack_long_mem(self, data, metadata=None)->dict|None:
+        metadata = metadata or {}
+        long_mem = self.long_term_memory['data'] if self.long_term_memory else ""
+
+        recommend_prompt_format = prompt_manager.get(
+            self.service_name, 
+            "slow", 
+            "mem"
+        )['value']
+        
+        parsed_message = recommend_prompt_format + "\n" + long_mem + "\n" + data + "\n" + "Create memory summary of the above conversation."
+
+        logging.info(parsed_message)
+        try:
+            memory_content = get_llm_response(prompt=parsed_message)
+        except Exception as e:
+            logging.error(f"Error generating procedural memory summary: {e}")
+            raise e
+        # logging.info(f"updated long mem:{memory_content}")
+        long_term = memory_content
+        metadata["data"] = long_term
+        metadata["hash"] = hashlib.md5(long_term.encode()).hexdigest()
+        metadata["created_at"] = datetime.now(pytz.timezone("US/Pacific")).isoformat()
+        metadata["action"] = "UPDATE"
+        return metadata
diff --git a/src/performance_optimizer/param_optimizer.py b/src/performance_optimizer/param_optimizer.py
index e1ac2b156351d390a31b5e1c6ea01d5901a98ab1..64a843e27799f18b9c04cd6402c3e932f7f5d054 100644
--- a/src/performance_optimizer/param_optimizer.py
+++ b/src/performance_optimizer/param_optimizer.py
@@ -8,6 +8,7 @@ from src.performance_test.pressure_test import wait_for_pressure_test
 from src.utils.config.app_config import AppInterface
 from src.utils.shell_execute import SshClient
 from src.utils.snapshot import load_snapshot, save_snapshot
+from src.memory.h_mem import HierarchicalMemory
 
 class ParamOptimizer:
 
@@ -77,6 +78,19 @@ class ParamOptimizer:
             return True
         return False
 
+    def pressure_test(self):
+        logging.info(f"[ParamOptimizer] waiting for pressure test finished ...")
+        pressure_test_result = wait_for_pressure_test(timeout=self.benchmark_timeout)
+        if pressure_test_result.status_code != 0:
+            raise RuntimeError(
+                f"[ParamOptimizer] failed to run pressure test, err msg is {pressure_test_result.err_msg}"
+            )
+        baseline = float(pressure_test_result.output)
+        logging.info(
+            f"[ParamOptimizer] pressure test finished, baseline is {baseline}"
+        )
+        return baseline
+
     def benchmark(self):
         logging.info(f"🔄 start to verify benchmark performance of {self.service_name}...")
         result = self.app_interface.benchmark()
@@ -164,26 +178,10 @@ class ParamOptimizer:
     def run(self):
         # 运行benchmark，摸底参数性能指标
         if self.pressure_test_mode:
-            logging.info(f"[ParamOptimizer] waiting for pressure test finished ...")
-            pressure_test_result = wait_for_pressure_test(timeout=self.benchmark_timeout)
-
-            if pressure_test_result.status_code != 0:
-                raise RuntimeError(
-                    f"[ParamOptimizer] failed to run pressure test, err msg is {pressure_test_result.err_msg}"
-                )
-
-            baseline = float(pressure_test_result.output)
-            logging.info(
-                f"[ParamOptimizer] pressure test finished, baseline is {baseline}"
-            )
+            baseline = self.pressure_test()
         else:
             baseline = self.benchmark()
-        # 保存每轮调优的结果，反思调优目标是否达到
-        historys = {
-            "best_result": {},
-            "worst_result": {},
-            "previous_result": {}
-        }
+
         best_result = baseline
         worst_result = baseline
         curr_recommend_params = {}
@@ -194,9 +192,29 @@ class ParamOptimizer:
             f"[{0}/{self.max_iterations}] performance baseline of {self.service_name} is: {baseline}"
         )
 
+        # normal模式，通过history保存每轮调优的结果，反思调优目标是否达到
+        historys = {
+            "best_result": {},
+            "worst_result": {},
+            "previous_result": {}
+        }
+
+        # slow模式，通过mem机制来记忆调优过程，以期达到更优的调优结果
+        mem = HierarchicalMemory(self.service_name)
+        mem.update(
+            f"**Task Objective**: Optimize GaussDB performance by tuning memory-related parameters to increase throughput and reduce latency.\n"
+            f"**Progress Status**：Baseline performance: {baseline}"
+        )
+
         for i in range(self.max_iterations):
+            current_mem = mem.get()
             # 未达成目标的情况下，根据调优结果与历史最优的参数，执行参数调优推荐，给出参数名和参数值
-            recommend_params = self.param_recommender.run(history_result=historys, is_positive=is_positive)
+            recommend_params = self.param_recommender.run(
+                long_mem = current_mem["long"],
+                short_mem = current_mem["short"], 
+                history_result = historys, 
+                is_positive = is_positive
+            )
 
             # 设置参数生效
             self.apply_params(recommend_params)
@@ -219,6 +237,7 @@ class ParamOptimizer:
                 script_path = '/tmp/euler-copilot-params.sh'
                 self.save_restart_params_to_script(recommend_params, script_path, i + 1)
                 self.recover_cluster()
+
             if performance_result is None:
                 historys["previous_result"] = {"previous_performance": "benchmark failed, because param is invalid.", "recommend_param": recommend_params}
                 self.apply_params(self.current_params)
@@ -252,6 +271,13 @@ class ParamOptimizer:
 
             ratio = self.calc_improve_rate(baseline, performance_result, symbol)
 
+            # mem更新
+            performance_test_result = (
+                f"Iteration: {i},This iteration performance result: {performance_result}, "
+                f"improvment: {ratio:.2%}, param change: {recommend_params}"
+            )
+            mem.update(performance_test_result)
+
             logging.info(
                 f"[{i + 1}/{self.max_iterations}] performance baseline of {self.service_name} is {baseline}, best result: {best_result}, this round result: {performance_result if performance_result is not None else '-'}, performance improvement: {ratio:.2%}"
             )
diff --git a/src/performance_optimizer/param_recommender.py b/src/performance_optimizer/param_recommender.py
index 17070a5575a309ada8f6ff6c0efffd94680df24b..edaa3638fcbcabbab7487c43d805aa4b17c1d34f 100644
--- a/src/performance_optimizer/param_recommender.py
+++ b/src/performance_optimizer/param_recommender.py
@@ -84,42 +84,90 @@ class ParamRecommender:
             for improve_text, recommend_params in history_entries
         ]
         return filtered_history
-
-    def _process_chunk(self, history_result, cur_params_set, is_positive):
+    
+    def fast_prompt_mode(self, params_set_str, history_result, allowed_set):
+        recommend_prompt_format = prompt_manager.get(
+            self.service_name, 
+            "fast", 
+            "recommender"
+        )['value']
+
+        allowed_set.add("params_set_str")
+        allowed_set.add("history_result")
+
+        recommend_prompt, extras = prompt_manager.render_by_parse(recommend_prompt_format, allowed_set)
+        if len(extras) != 0:
+            logging.warn(f"param not in custom offered param {extras}")
+        recommended_params = get_llm_response(recommend_prompt)
+        
+        return json_repair(recommended_params)
+
+    def normal_prompt_mode(self, params_set_str, history_result, allowed_set, is_positive):
+        idea_prompt_format = prompt_manager.get(
+            self.service_name, 
+            "normal", 
+            "idea"
+        )['value']
+
+        allowed_set.add("params_set_str")
+
+        idea_prompt, extras = prompt_manager.render_by_parse(idea_prompt_format, allowed_set)
+        if len(extras) != 0:
+            logging.warn(f"param not in custom offered param {extras}")
+        optimization_idea = get_llm_response(idea_prompt)
+        allowed_set.add("optimization_idea")
+
+        recommend_prompt_format = prompt_manager.get(
+            self.service_name, 
+            "normal",
+            'recommender_positive' if is_positive else 'recommender_negative'
+        )['value']
+
+        allowed_set.add("history_result")
+
+        recommend_prompt, extras = prompt_manager.render_by_parse(recommend_prompt_format, allowed_set)
+        if len(extras) != 0:
+            logging.warn(f"param not in custom offered param {extras}")
+        recommended_params = get_llm_response(recommend_prompt)
+
+        return json_repair(recommended_params)
+    
+    def slow_prompt_mode(self, params_set_str, long_mem, short_mem, allowed_set):
+        recommend_prompt_format = prompt_manager.get(
+            self.service_name, 
+            "slow", 
+            "recommender"
+        )['value']
+        
+        allowed_set.add("params_set_str")
+        allowed_set.add("long_mem")
+        allowed_set.add("short_mem")
+        
+        recommend_prompt, extras = prompt_manager.render_by_parse(recommend_prompt_format, allowed_set)
+        if len(extras) != 0:
+            logging.warn(f"param not in custom offered param {extras}")
+        recommended_params = get_llm_response(recommend_prompt)
+
+        return json_repair(recommended_params)["PARAM"]
+
+    def _process_chunk(self, long_mem, short_mem, history_result, cur_params_set, is_positive):
         history_result = self._get_histort(history_result, cur_params_set)
-
-        params_set_str = ",".join(cur_params_set)
         allowed_set = set([
             "self.service_name",
             "self.performance_metric.name",
             "self.performance_metric.value",
             "self.slo_goal",
             "self.static_profile",
-            "history_result",
             "self.performance_analysis_report",
-            "params_set_str",
         ])
+
         prompt_mode = prompt_manager.get_mode(self.service_name)
         if prompt_mode == "fast":
-            recommend_prompt_format = prompt_manager.get(self.service_name, prompt_mode, 'recommender')['value']
-            recommend_prompt, extras = prompt_manager.render_by_parse(recommend_prompt_format, allowed_set)
-            if len(extras) != 0:
-                logging.warn(f"param not in custom offered param {extras}")
-            recommended_params = get_llm_response(recommend_prompt)
-        elif prompt_mode == "normal":
-            idea_prompt_format = prompt_manager.get(self.service_name, prompt_mode, 'idea')['value']
-            idea_prompt, extras = prompt_manager.render_by_parse(idea_prompt_format, allowed_set)
-            optimization_idea = get_llm_response(idea_prompt)
-            allowed_set.add("optimization_idea")
-            recommend_prompt_format = prompt_manager.get(self.service_name, prompt_mode,
-                'recommender_positive' if is_positive else 'recommender_negative')['value']
-            recommend_prompt, extras = prompt_manager.render_by_parse(recommend_prompt_format, allowed_set)
-            recommended_params = get_llm_response(recommend_prompt)
+            recommended_params_set = self.fast_prompt_mode(",".join(cur_params_set), history_result,allowed_set)
+        elif prompt_mode == "slow":
+            recommended_params_set = self.slow_prompt_mode(",".join(cur_params_set), long_mem, short_mem, allowed_set)
         else:
-            # todo for slow prompt
-            recommended_params = get_llm_response(recommend_prompt)
-
-        recommended_params_set = json_repair(recommended_params)
+            recommended_params_set = self.normal_prompt_mode(",".join(cur_params_set), history_result, allowed_set, is_positive)
 
         result = {}
         for param_name, param_value in recommended_params_set.items():
@@ -127,14 +175,19 @@ class ParamRecommender:
                 result[param_name] = param_value
         return result
 
-    def run(self, history_result, is_positive=True):
+    def run(self, long_mem, short_mem, history_result, is_positive=True):
         resultset = {}
 
         for i in range(0, len(self.params_set), self.chunk_size):
             cur_params_set = self.params_set[i: i + self.chunk_size]
             # 提交任务给线程池，返回 future-like 对象（你线程池需要支持这个）
             thread_pool_manager.add_task(
-                self._process_chunk, history_result, cur_params_set, is_positive
+                self._process_chunk,
+                long_mem,
+                short_mem,
+                history_result,
+                cur_params_set,
+                is_positive
             )
 
         thread_pool_manager.run_all_tasks()