diff --git a/src/main/java/com/hanserdev/interview/config/PgVectorConfig.java b/src/main/java/com/hanserdev/interview/config/PgVectorConfig.java new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/main/java/com/hanserdev/interview/enums/FileType.java b/src/main/java/com/hanserdev/interview/enums/FileType.java new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/main/java/com/hanserdev/interview/utils/DocumentParserUtils.java b/src/main/java/com/hanserdev/interview/utils/DocumentParserUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/main/java/com/hanserdev/interview/utils/FileReaderUtils.java b/src/main/java/com/hanserdev/interview/utils/FileReaderUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/main/java/com/hanserdev/interview/utils/JsonDocumentParser.java b/src/main/java/com/hanserdev/interview/utils/JsonDocumentParser.java new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/main/java/com/hanserdev/interview/utils/MarkdownDocumentParser.java b/src/main/java/com/hanserdev/interview/utils/MarkdownDocumentParser.java new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/main/java/com/hanserdev/interview/utils/PdfDocumentParser.java b/src/main/java/com/hanserdev/interview/utils/PdfDocumentParser.java new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/main/java/com/hanserdev/interview/utils/WordDocumentParser.java b/src/main/java/com/hanserdev/interview/utils/WordDocumentParser.java new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/main/resources/config/application.yml b/src/main/resources/config/application.yml index 845f0177f1f6ac3066c9510c2f0c342b26c93385..ae42fb906b4134bc77eba78f6527439da1d1de29 100644 --- a/src/main/resources/config/application.yml +++ b/src/main/resources/config/application.yml @@ -4,47 +4,33 @@ spring: banner: location: classpath:config/banner.txt datasource: - driver-class-name: org.postgresql.Driver # 指定数据库驱动类 - # 数据库连接信息 - url: jdbc:postgresql://127.0.0.1:5432/interview - username: postgres # 数据库用户名 - password: postgressql # 数据库密码 - type: com.alibaba.druid.pool.DruidDataSource - druid: # Druid 连接池 - initial-size: 5 # 初始化连接池大小 - min-idle: 5 # 最小连接池数量 - max-active: 20 # 最大连接池数量 - max-wait: 60000 # 连接时最大等待时间(单位:毫秒) - test-while-idle: true - time-between-eviction-runs-millis: 60000 # 配置多久进行一次检测,检测需要关闭的连接(单位:毫秒) - min-evictable-idle-time-millis: 300000 # 配置一个连接在连接池中最小生存的时间(单位:毫秒) - max-evictable-idle-time-millis: 900000 # 配置一个连接在连接池中最大生存的时间(单位:毫秒) - validation-query: SELECT 1 # 配置测试连接是否可用的查询 sql - test-on-borrow: false - test-on-return: false - pool-prepared-statements: false - web-stat-filter: - enabled: true - stat-view-servlet: - enabled: true - url-pattern: /druid/* # 配置监控后台访问路径 - login-username: admin # 配置监控后台登录的用户名、密码 - login-password: admin - filter: - stat: - enabled: true - log-slow-sql: true # 开启慢 sql 记录 - slow-sql-millis: 2000 # 若执行耗时大于 2s,则视为慢 sql - merge-sql: true - wall: # 防火墙 - config: - multi-statement-allow: true + driver-class-name: org.postgresql.Driver + url: jdbc:postgresql://localhost:5432/interview?serverTimezone=Asia/Shanghai + username: postgres + password: postgressql + # HikariCP 连接池配置 + hikari: + maximum-pool-size: 20 # 最大连接数设置为 20 + minimum-idle: 20 # 保持 20 个空闲连接(与最大连接数一致) + connection-timeout: 5000 # 获取连接超时 5 秒 + max-lifetime: 28800000 # 8 小时(确保在数据库连接超时前被回收) ai: + vectorstore: # 添加向量数据库配置 + pgvector: + initialize-schema: true + schema-name: public + table-name: interview + index-type: hnsw + dimensions: 1024 dashscope: api-key: sk-48f7b229942c4e179ea320f1a162f368 chat: options: model: qwen3-max + embedding: # 添加embedding模型配置 + options: + model: text-embedding-v4 + dimensions: 1024 mybatis-plus: configuration: map-underscore-to-camel-case: true diff --git a/src/test/java/com/hanserdev/interview/utils/FileReaderUtilsTest.java b/src/test/java/com/hanserdev/interview/utils/FileReaderUtilsTest.java new file mode 100644 index 0000000000000000000000000000000000000000..5b7f3fbc4e68309f524c6e59dcf6e8bf7820664a --- /dev/null +++ b/src/test/java/com/hanserdev/interview/utils/FileReaderUtilsTest.java @@ -0,0 +1,58 @@ +package com.hanserdev.interview.utils; + +import jakarta.annotation.Resource; +import org.junit.jupiter.api.Test; +import org.springframework.ai.document.Document; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.mock.web.MockMultipartFile; +import org.springframework.web.multipart.MultipartFile; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; + +@SpringBootTest +class FileReaderUtilsTest { + + @Resource + private FileReaderUtils fileReaderUtils; + + /** + * 测试上传文件实现向量化 + * + * @throws IOException + */ + @Test + void loadDocument() throws IOException { + // --- 1. 定义文件的路径 (确保文件放在 src/test/resources 目录下) --- + String fileName = "1、关于页面音频、视频捕获.md"; + String filePath = "src/test/resources/" + fileName; + + // 使用 NIO.2 API 获取文件路径 + Path path = Paths.get(filePath); + + // 确保文件存在 + if (!Files.exists(path)) { + // 如果文件不存在,可以考虑创建一个空文件,或者抛出异常 + throw new RuntimeException("没有找到指定文件: " + filePath); + } + + // --- 2. 将文件内容读取为字节数组 --- + byte[] content = Files.readAllBytes(path); + + // --- 3. 创建 MockMultipartFile 对象 --- + + MultipartFile multipartFile = new MockMultipartFile( + "file", + fileName, + "text/markdown", + content + ); + + // --- 4. 传入方法 --- + List documents = fileReaderUtils.loadDocument(multipartFile); + System.out.println(documents); + } +} \ No newline at end of file