diff --git a/b4295.tar.gz b/b6602.tar.gz
similarity index 59%
rename from b4295.tar.gz
rename to b6602.tar.gz
index 0c324cf412af298ddfaf2bb013185adee3744822..f4ac243156c0f8c792ec58325ef5b09c67323e09 100644
Binary files a/b4295.tar.gz and b/b6602.tar.gz differ
diff --git a/backport-CVE-2025-49847.patch b/backport-CVE-2025-49847.patch
deleted file mode 100644
index f5835a9710c6c4d04704e60d4938081b63f37e27..0000000000000000000000000000000000000000
--- a/backport-CVE-2025-49847.patch
+++ /dev/null
@@ -1,45 +0,0 @@
-From e6d21d901a0e5aabd08a41d8000c5f4cd80c8b0f Mon Sep 17 00:00:00 2001
-From: Guy Goldenberg <guy110698@gmail.com>
-Date: Fri, 13 Jun 2025 19:20:25 +0300
-Subject: [PATCH] Merge commit from fork
-
-* vocab : prevent integer overflow during load
-
-* Add static cast and GGML_ABORT
-
----------
-
-Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
----
- src/llama-vocab.cpp | 7 +++++++
- 1 file changed, 7 insertions(+)
-
-diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
-index 8c9aaf5..6974a33 100644
---- a/src/llama-vocab.cpp
-+++ b/src/llama-vocab.cpp
-@@ -11,6 +11,9 @@
- #include <forward_list>
- #include <queue>
- #include <sstream>
-+#include <limits>
-+#include <cstdint>
-+#include <cinttypes>
- 
- //
- // helpers
-@@ -1785,6 +1788,10 @@ int32_t llama_token_to_piece_impl(const struct llama_vocab & vocab, llama_token
-     // copy piece chars to output text buffer
-     // skip up to 'lstrip' leading spaces before copying
-     auto _try_copy = [=] (const char * token, size_t size) -> int32_t {
-+        if (size >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
-+            GGML_ABORT("invalid token size: %zu exceeds int32_t limit", size);
-+        }
-+
-         for (int32_t i = 0; i < lstrip && size && *token == ' '; ++i) {
-             token++;
-             size--;
--- 
-2.43.0
-
-
diff --git a/backport-CVE-2025-52566.patch b/backport-CVE-2025-52566.patch
deleted file mode 100644
index 6a11a3aa50a6cef81b0ba54bd2a386f2b3eda3c5..0000000000000000000000000000000000000000
--- a/backport-CVE-2025-52566.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From 5084d9fc8b876172678ce3d3ba81223e7934be4b Mon Sep 17 00:00:00 2001
-From: Ruikai Peng <retr0@retr0.blog>
-Date: Fri, 20 Jun 2025 22:13:06 +0800
-Subject: [PATCH] vocab : prevent tokenizer overflow (#14301)
-
-* vocab : prevent stack overflow in tokenize
-
-* vocab : return error instead of aborting on oversized token count
-
-* vocab : INT32_MIN from llama_tokenize on overflow
----
- common/common.cpp   | 3 +++
- include/llama.h     | 1 +
- src/llama-vocab.cpp | 4 ++++
- 3 files changed, 8 insertions(+)
-
-diff --git a/common/common.cpp b/common/common.cpp
-index 6143516..c139773 100644
---- a/common/common.cpp
-+++ b/common/common.cpp
-@@ -1584,6 +1584,9 @@ std::vector<llama_token> common_tokenize(
-     int n_tokens = text.length() + 2 * add_special;
-     std::vector<llama_token> result(n_tokens);
-     n_tokens = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
-+    if (n_tokens == std::numeric_limits<int32_t>::min()) {
-+        throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
-+    }
-     if (n_tokens < 0) {
-         result.resize(-n_tokens);
-         int check = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
-diff --git a/include/llama.h b/include/llama.h
-index 36945cd..50a1ca3 100644
---- a/include/llama.h
-+++ b/include/llama.h
-@@ -929,6 +929,7 @@ extern "C" {
-     /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
-     /// @return Returns the number of tokens on success, no more than n_tokens_max
-     /// @return Returns a negative number on failure - the number of tokens that would have been returned
-+    /// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
-     /// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
-     /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
-     ///                      as plaintext. Does not insert a leading space.
-diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
-index 6974a33..497d780 100644
---- a/src/llama-vocab.cpp
-+++ b/src/llama-vocab.cpp
-@@ -1744,6 +1744,10 @@ int32_t llama_tokenize_impl(
-                             bool   add_special,
-                             bool   parse_special) {
-     auto res = llama_tokenize_internal(vocab, std::string(text, text_len), add_special, parse_special);
-+    if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
-+        LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());
-+        return std::numeric_limits<int32_t>::min();
-+    }
-     if (n_tokens_max < (int) res.size()) {
-         // LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
-         return -((int) res.size());
--- 
-2.43.0
-
-
diff --git a/backport-CVE-2025-53630.patch b/backport-CVE-2025-53630.patch
deleted file mode 100644
index b293355131b8549b4c2c9f7c381d8835cff391ad..0000000000000000000000000000000000000000
--- a/backport-CVE-2025-53630.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 7d00e32369b13b1820d4acbf453232cef6de3171 Mon Sep 17 00:00:00 2001
-From: Miaoqian Lin <linmq006@gmail.com>
-Date: Wed, 9 Jul 2025 20:33:53 +0800
-Subject: [PATCH] ggml : prevent integer overflow in gguf tensor size
- calculation (#14595)
-
----
- ggml/src/ggml.c | 9 ++++++++-
- 1 file changed, 8 insertions(+), 1 deletion(-)
-
-diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
-index 058941c..8845215 100644
---- a/ggml/src/ggml.c
-+++ b/ggml/src/ggml.c
-@@ -6854,7 +6854,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
- 
-             const size_t size_cur = ggml_row_size(info->type, ne);
- 
--            ctx->size += GGML_PAD(size_cur, ctx->alignment);
-+            size_t padded_size = GGML_PAD(size_cur, ctx->alignment);
-+            if (SIZE_MAX - ctx->size < padded_size) {
-+                GGML_LOG_ERROR("%s: tensor size overflow, cannot accumulate size %zu + %zu\n",
-+                    __func__, ctx->size, padded_size);
-+                gguf_free(ctx);
-+                return NULL;
-+            }
-+            ctx->size += padded_size;
-         }
-     }
- 
--- 
-2.43.0
-
-
diff --git a/llama.cpp.spec b/llama.cpp.spec
index cf9ca8bb6bba36d92a4f430009fc2c405294a8b6..7a078c75a18f09a929e72c12442e7defd18cc890 100644
--- a/llama.cpp.spec
+++ b/llama.cpp.spec
@@ -1,62 +1,63 @@
 %define debug_package %{nil}
-%global llama_commitid b4295
+%global llama_commitid b6602
 
-Name:       llama.cpp
-Version:    20241210
-Release:    4
-License:    MIT
-Summary:    Port of English lagre model LLaMA implemented based on C/C++
+Name:           llama.cpp
+Version:        20251009
+Release:        1
+License:        MIT
+Summary:        Port of English large model LLaMA implemented in C/C++
 
-URL:            https://github.com/ggerganov/llama.cpp
+URL:            https://github.com/ggerganov/llama.cpp 
 Source0:        https://github.com/ggerganov/llama.cpp/archive/refs/tags/%{llama_commitid}.tar.gz
 
-Patch001:        backport-CVE-2025-49847.patch
-Patch002:        backport-CVE-2025-52566.patch
-Patch003:        backport-CVE-2025-53630.patch
-
-BuildRequires:  gcc,gcc-c++,cmake
+BuildRequires:  gcc
+BuildRequires:  gcc-c++
+BuildRequires:  cmake libcurl libcurl-devel
 
 %description
-Port of English lagre model LLaMA implemented based on C/C++,
-it can be used for model dialogue based on local laptops.
+Port of English large model LLaMA implemented in C/C++,
+can be used for local inference on laptops.
 
 %package devel
-Summary:        Port of Facebook's LLaMA model in C/C++
+Summary:        Development headers and libraries for %{name}
 Requires:       %{name}%{?_isa} = %{version}-%{release}
 
 %description devel
-Port of English lagre model LLaMA implemented based on C/C++,
-it can be used for model dialogue based on local laptops.
+This package contains the header files, CMake config files and pkg-config
+files needed to develop against %{name}.
 
 %prep
-%autosetup -b 0 -n %{name}-%{llama_commitid} -p1
+%autosetup -n %{name}-%{llama_commitid}
 
 %build
-%cmake  -DCMAKE_INSTALL_PREFIX=%{_prefix} \
-        -DCMAKE_INSTALL_LIBDIR=%{_libdir} \
-        -DCMAKE_INSTALL_BINDIR=%{_bindir} \
-        -DCMAKE_INSTALL_INCLUDEDIR=%{_includedir}
-%cmake_build
+mkdir -p build
+cd build
+cmake .. \
+  -DCMAKE_INSTALL_PREFIX=%{_prefix} \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DCMAKE_INSTALL_LIBDIR=%{_libdir}
+%make_build
 
 %install
-%cmake_install
-
+cd build
+%make_install
+# 清理空目录
+find %{buildroot} -type d -empty -delete
 
 %files
 %{_bindir}/*
 %{_libdir}/*.so
 
 %files devel
-%dir %{_libdir}/cmake/llama
-%doc README.md
-%{_includedir}/ggml.h
-%{_includedir}/ggml-*.h
-%{_includedir}/llama.h
-%{_includedir}/llama-*.h
-%{_libdir}/cmake/llama/*.cmake
-%{_exec_prefix}/lib/pkgconfig/llama.pc
+%{_includedir}/*.h
+%{_libdir}/cmake/
+%{_libdir}/pkgconfig/
+%{_libdir}/*.so
 
 %changelog
+* Wed Oct 22 2025 GS_Stephen_Curry <wangshuyuan17@huawei.com> - 20251009-1
+- Upgrade to b6602
+
 * Mon Jul 21 2025 PshySimon <caixiaomeng2@huawei.com> - 20241210-4
 - fix CVE-2025-53630