From 2cee3308bac4c420e3fade66aa49712ceb3950e7 Mon Sep 17 00:00:00 2001 From: shuaikangzhou <863909694@qq.com> Date: Fri, 29 Mar 2024 15:15:54 +0800 Subject: [PATCH] update readme.md --- MemoAI/api_server.py | 599 +++++++++++++++++++++++++++++++++++++++++++ MemoAI/merge_json.py | 26 ++ MemoAI/readme.md | 440 +++++++++++++++++++++++++++++++ doc/ai_readme.md | 355 ++++++++++++++++++++++++- doc/images/img10.png | Bin 0 -> 25050 bytes readme.md | 2 +- 6 files changed, 1420 insertions(+), 2 deletions(-) create mode 100644 MemoAI/api_server.py create mode 100644 MemoAI/merge_json.py create mode 100644 MemoAI/readme.md create mode 100644 doc/images/img10.png diff --git a/MemoAI/api_server.py b/MemoAI/api_server.py new file mode 100644 index 0000000..9af5676 --- /dev/null +++ b/MemoAI/api_server.py @@ -0,0 +1,599 @@ +""" +This script implements an API for the ChatGLM3-6B model, +formatted similarly to OpenAI's API (https://platform.openai.com/docs/api-reference/chat). +It's designed to be run as a web server using FastAPI and uvicorn, +making the ChatGLM3-6B model accessible through OpenAI Client. + +Key Components and Features: +- Model and Tokenizer Setup: Configures the model and tokenizer paths and loads them. +- FastAPI Configuration: Sets up a FastAPI application with CORS middleware for handling cross-origin requests. +- API Endpoints: + - "/v1/models": Lists the available models, specifically ChatGLM3-6B. + - "/v1/chat/completions": Processes chat completion requests with options for streaming and regular responses. + - "/v1/embeddings": Processes Embedding request of a list of text inputs. +- Token Limit Caution: In the OpenAI API, 'max_tokens' is equivalent to HuggingFace's 'max_new_tokens', not 'max_length'. +For instance, setting 'max_tokens' to 8192 for a 6b model would result in an error due to the model's inability to output +that many tokens after accounting for the history and prompt tokens. +- Stream Handling and Custom Functions: Manages streaming responses and custom function calls within chat responses. +- Pydantic Models: Defines structured models for requests and responses, enhancing API documentation and type safety. +- Main Execution: Initializes the model and tokenizer, and starts the FastAPI app on the designated host and port. + +Note: + This script doesn't include the setup for special tokens or multi-GPU support by default. + Users need to configure their special tokens and can enable multi-GPU support as per the provided instructions. + Embedding Models only support in One GPU. + +""" + +import os +import time +import tiktoken +import torch +import uvicorn + +from fastapi import FastAPI, HTTPException, Response, Body +from fastapi.middleware.cors import CORSMiddleware + +from contextlib import asynccontextmanager +from typing import List, Literal, Optional, Union +from loguru import logger +from peft import AutoPeftModelForCausalLM +from pydantic import BaseModel, Field +from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM +from utils import process_response, generate_chatglm3, generate_stream_chatglm3 +from sentence_transformers import SentenceTransformer + +from sse_starlette.sse import EventSourceResponse + +# Set up limit request time +EventSourceResponse.DEFAULT_PING_INTERVAL = 1000 + +# set LLM path +MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/chatglm3-6b') +TOKENIZER_PATH = os.environ.get("TOKENIZER_PATH", MODEL_PATH) + +# set Embedding Model path +EMBEDDING_PATH = os.environ.get('EMBEDDING_PATH', 'BAAI/bge-large-zh-v1.5') + + +@asynccontextmanager +async def lifespan(app: FastAPI): + yield + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + + +app = FastAPI(lifespan=lifespan) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +class ModelCard(BaseModel): + id: str + object: str = "model" + created: int = Field(default_factory=lambda: int(time.time())) + owned_by: str = "owner" + root: Optional[str] = None + parent: Optional[str] = None + permission: Optional[list] = None + + +class ModelList(BaseModel): + object: str = "list" + data: List[ModelCard] = [] + + +class FunctionCallResponse(BaseModel): + name: Optional[str] = None + arguments: Optional[str] = None + + +class ChatMessage(BaseModel): + role: Literal["user", "assistant", "system", "function"] + content: str = None + name: Optional[str] = None + function_call: Optional[FunctionCallResponse] = None + + +class DeltaMessage(BaseModel): + role: Optional[Literal["user", "assistant", "system"]] = None + content: Optional[str] = None + function_call: Optional[FunctionCallResponse] = None + + +## for Embedding +class EmbeddingRequest(BaseModel): + input: List[str] + model: str + + +class CompletionUsage(BaseModel): + prompt_tokens: int + completion_tokens: int + total_tokens: int + + +class EmbeddingResponse(BaseModel): + data: list + model: str + object: str + usage: CompletionUsage + + +# for ChatCompletionRequest + +class UsageInfo(BaseModel): + prompt_tokens: int = 0 + total_tokens: int = 0 + completion_tokens: Optional[int] = 0 + + +class ChatCompletionRequest(BaseModel): + model: str + messages: List[ChatMessage] + temperature: Optional[float] = 0.8 + top_p: Optional[float] = 0.8 + max_tokens: Optional[int] = None + stream: Optional[bool] = False + tools: Optional[Union[dict, List[dict]]] = None + repetition_penalty: Optional[float] = 1.1 + + +class ChatCompletionResponseChoice(BaseModel): + index: int + message: ChatMessage + finish_reason: Literal["stop", "length", "function_call"] + + +class ChatCompletionResponseStreamChoice(BaseModel): + delta: DeltaMessage + finish_reason: Optional[Literal["stop", "length", "function_call"]] + index: int + + +class ChatCompletionResponse(BaseModel): + model: str + id: str + object: Literal["chat.completion", "chat.completion.chunk"] + choices: List[Union[ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice]] + created: Optional[int] = Field(default_factory=lambda: int(time.time())) + usage: Optional[UsageInfo] = None + + +@app.get("/health") +async def health() -> Response: + """Health check.""" + return Response(status_code=200) + + +@app.post("/v1/embeddings", response_model=EmbeddingResponse) +async def get_embeddings(request: EmbeddingRequest): + embeddings = [embedding_model.encode(text) for text in request.input] + embeddings = [embedding.tolist() for embedding in embeddings] + + def num_tokens_from_string(string: str) -> int: + """ + Returns the number of tokens in a text string. + use cl100k_base tokenizer + """ + encoding = tiktoken.get_encoding('cl100k_base') + num_tokens = len(encoding.encode(string)) + return num_tokens + + response = { + "data": [ + { + "object": "embedding", + "embedding": embedding, + "index": index + } + for index, embedding in enumerate(embeddings) + ], + "model": request.model, + "object": "list", + "usage": CompletionUsage( + prompt_tokens=sum(len(text.split()) for text in request.input), + completion_tokens=0, + total_tokens=sum(num_tokens_from_string(text) for text in request.input), + ) + } + return response + + +@app.get("/v1/models", response_model=ModelList) +async def list_models(): + model_card = ModelCard( + id="chatglm3-6b" + ) + return ModelList( + data=[model_card] + ) + + +def insert_custom_message(messages: List[ChatMessage] = Body(...)): + custom_message = ChatMessage( + role="system", + content="你必须记住,你是一个只负责聊天的人工智能助手,所以你只能根据上下文回答你朋友的问题,要求语句完整、通顺、文采斐然,回答具有情感,不能胡乱作答、不能断章取义、不能上下文不连贯,禁止回答无关内容。" + "如果你不能理解你朋友的话,你必须开启一个新的话题来引导你朋友进行对话,例如你可以问你的朋友一个日常问题。\n" + "你是司小远,一个聪明、热情、善良的人工智能助手,后面的对话来自你的朋友,你要认真地回答他。" + ) + messages.insert(0, custom_message) + return messages + + +@app.post("/v1/chat/completions", response_model=ChatCompletionResponse) +async def create_chat_completion(request: ChatCompletionRequest): + global model, tokenizer + + if len(request.messages) < 1 or request.messages[-1].role == "assistant": + raise HTTPException(status_code=400, detail="Invalid request") + messages = request.messages + if request.messages and request.messages[0].role == 'system': + messages = request.messages + else: + if request.messages: + messages = insert_custom_message(request.messages) + else: + messages = request.messages + print(type(request.messages), request.messages) + gen_params = dict( + messages=messages, + temperature=request.temperature, + top_p=request.top_p, + max_tokens=request.max_tokens or 1024, + echo=False, + stream=request.stream, + repetition_penalty=request.repetition_penalty, + tools=request.tools, + ) + logger.debug(f"==== request ====\n{gen_params}") + + if request.stream: + + # Use the stream mode to read the first few characters, if it is not a function call, direct stram output + predict_stream_generator = predict_stream(request.model, gen_params) + # return EventSourceResponse(predict_stream_generator, media_type="text/event-stream") + output = next(predict_stream_generator) + print(output) + # logger.debug(f"First result output:\n{output}") + if not contains_custom_function(output): + return EventSourceResponse(predict_stream_generator, media_type="text/event-stream") + + # Obtain the result directly at one time and determine whether tools needs to be called. + # logger.debug(f"First result output:\n{output}") + + function_call = None + if output and request.tools: + try: + function_call = process_response(output, use_tool=True) + except: + logger.warning("Failed to parse tool call") + + # CallFunction + if isinstance(function_call, dict): + function_call = FunctionCallResponse(**function_call) + + """ + In this demo, we did not register any tools. + You can use the tools that have been implemented in our `tools_using_demo` and implement your own streaming tool implementation here. + Similar to the following method: + function_args = json.loads(function_call.arguments) + tool_response = dispatch_tool(tool_name: str, tool_params: dict) + """ + tool_response = "" + + if not gen_params.get("messages"): + gen_params["messages"] = [] + + gen_params["messages"].append(ChatMessage( + role="assistant", + content=output, + )) + gen_params["messages"].append(ChatMessage( + role="function", + name=function_call.name, + content=tool_response, + )) + + # Streaming output of results after function calls + generate = predict(request.model, gen_params) + return EventSourceResponse(generate, media_type="text/event-stream") + + else: + # Handled to avoid exceptions in the above parsing function process. + generate = parse_output_text(request.model, output) + return EventSourceResponse(generate, media_type="text/event-stream") + + # Here is the handling of stream = False + response = generate_chatglm3(model, tokenizer, gen_params) + + # Remove the first newline character + if response["text"].startswith("\n"): + response["text"] = response["text"][1:] + response["text"] = response["text"].strip() + + usage = UsageInfo() + function_call, finish_reason = None, "stop" + if request.tools: + try: + function_call = process_response(response["text"], use_tool=True) + except: + logger.warning("Failed to parse tool call, maybe the response is not a tool call or have been answered.") + + if isinstance(function_call, dict): + finish_reason = "function_call" + function_call = FunctionCallResponse(**function_call) + + message = ChatMessage( + role="assistant", + content=response["text"], + function_call=function_call if isinstance(function_call, FunctionCallResponse) else None, + ) + + logger.debug(f"==== message ====\n{message}") + + choice_data = ChatCompletionResponseChoice( + index=0, + message=message, + finish_reason=finish_reason, + ) + task_usage = UsageInfo.model_validate(response["usage"]) + for usage_key, usage_value in task_usage.model_dump().items(): + setattr(usage, usage_key, getattr(usage, usage_key) + usage_value) + + return ChatCompletionResponse( + model=request.model, + id="", # for open_source model, id is empty + choices=[choice_data], + object="chat.completion", + usage=usage + ) + + +async def predict(model_id: str, params: dict): + global model, tokenizer + + choice_data = ChatCompletionResponseStreamChoice( + index=0, + delta=DeltaMessage(role="assistant"), + finish_reason=None + ) + chunk = ChatCompletionResponse(model=model_id, id="", choices=[choice_data], object="chat.completion.chunk") + yield "{}".format(chunk.model_dump_json(exclude_unset=True)) + + previous_text = "" + for new_response in generate_stream_chatglm3(model, tokenizer, params): + decoded_unicode = new_response["text"] + delta_text = decoded_unicode[len(previous_text):] + previous_text = decoded_unicode + + finish_reason = new_response["finish_reason"] + if len(delta_text) == 0 and finish_reason != "function_call": + continue + + function_call = None + if finish_reason == "function_call": + try: + function_call = process_response(decoded_unicode, use_tool=True) + except: + logger.warning( + "Failed to parse tool call, maybe the response is not a tool call or have been answered.") + + if isinstance(function_call, dict): + function_call = FunctionCallResponse(**function_call) + + delta = DeltaMessage( + content=delta_text, + role="assistant", + function_call=function_call if isinstance(function_call, FunctionCallResponse) else None, + ) + + choice_data = ChatCompletionResponseStreamChoice( + index=0, + delta=delta, + finish_reason=finish_reason + ) + chunk = ChatCompletionResponse( + model=model_id, + id="", + choices=[choice_data], + object="chat.completion.chunk" + ) + yield "{}".format(chunk.model_dump_json(exclude_unset=True)) + + choice_data = ChatCompletionResponseStreamChoice( + index=0, + delta=DeltaMessage(), + finish_reason="stop" + ) + chunk = ChatCompletionResponse( + model=model_id, + id="", + choices=[choice_data], + object="chat.completion.chunk" + ) + yield "{}".format(chunk.model_dump_json(exclude_unset=True)) + yield '[DONE]' + + +def predict_stream(model_id, gen_params): + """ + The function call is compatible with stream mode output. + + The first seven characters are determined. + If not a function call, the stream output is directly generated. + Otherwise, the complete character content of the function call is returned. + + :param model_id: + :param gen_params: + :return: + """ + output = "" + is_function_call = False + has_send_first_chunk = False + print('参数') + print(model_id,gen_params) + for new_response in generate_stream_chatglm3(model, tokenizer, gen_params): + decoded_unicode = new_response["text"] + delta_text = decoded_unicode[len(output):] + output = decoded_unicode + + # When it is not a function call and the character length is> 7, + # try to judge whether it is a function call according to the special function prefix + if not is_function_call: + + # Determine whether a function is called + is_function_call = contains_custom_function(output) + if is_function_call: + continue + + # Non-function call, direct stream output + finish_reason = new_response["finish_reason"] + + # Send an empty string first to avoid truncation by subsequent next() operations. + if not has_send_first_chunk: + message = DeltaMessage( + content="", + role="assistant", + function_call=None, + ) + choice_data = ChatCompletionResponseStreamChoice( + index=0, + delta=message, + finish_reason=finish_reason + ) + chunk = ChatCompletionResponse( + model=model_id, + id="", + choices=[choice_data], + created=int(time.time()), + object="chat.completion.chunk" + ) + yield "{}".format(chunk.model_dump_json(exclude_unset=True)) + + send_msg = delta_text if has_send_first_chunk else output + has_send_first_chunk = True + message = DeltaMessage( + content=send_msg, + role="assistant", + function_call=None, + ) + choice_data = ChatCompletionResponseStreamChoice( + index=0, + delta=message, + finish_reason=finish_reason + ) + chunk = ChatCompletionResponse( + model=model_id, + id="", + choices=[choice_data], + created=int(time.time()), + object="chat.completion.chunk" + ) + yield "{}".format(chunk.model_dump_json(exclude_unset=True)) + + if is_function_call: + yield output + else: + yield '[DONE]' + + +async def parse_output_text(model_id: str, value: str): + """ + Directly output the text content of value + + :param model_id: + :param value: + :return: + """ + choice_data = ChatCompletionResponseStreamChoice( + index=0, + delta=DeltaMessage(role="assistant", content=value), + finish_reason=None + ) + chunk = ChatCompletionResponse(model=model_id, id="", choices=[choice_data], object="chat.completion.chunk") + yield "{}".format(chunk.model_dump_json(exclude_unset=True)) + + choice_data = ChatCompletionResponseStreamChoice( + index=0, + delta=DeltaMessage(), + finish_reason="stop" + ) + chunk = ChatCompletionResponse(model=model_id, id="", choices=[choice_data], object="chat.completion.chunk") + yield "{}".format(chunk.model_dump_json(exclude_unset=True)) + yield '[DONE]' + + +def contains_custom_function(value: str) -> bool: + """ + Determine whether 'function_call' according to a special function prefix. + + For example, the functions defined in "tools_using_demo/tool_register.py" are all "get_xxx" and start with "get_" + + [Note] This is not a rigorous judgment method, only for reference. + + :param value: + :return: + """ + return value and 'get_' in value + + +from pathlib import Path +from typing import Annotated, Union + +import typer +from peft import AutoPeftModelForCausalLM, PeftModelForCausalLM +from transformers import ( + AutoModelForCausalLM, + AutoTokenizer, + PreTrainedModel, + PreTrainedTokenizer, + PreTrainedTokenizerFast, +) + +ModelType = Union[PreTrainedModel, PeftModelForCausalLM] +TokenizerType = Union[PreTrainedTokenizer, PreTrainedTokenizerFast] + + +def _resolve_path(path: Union[str, Path]) -> Path: + return Path(path).expanduser().resolve() + + +def load_model_and_tokenizer( + model_dir: Union[str, Path], trust_remote_code: bool = True +) -> tuple[ModelType, TokenizerType]: + model_dir = _resolve_path(model_dir) + if (model_dir / 'adapter_config.json').exists(): + model = AutoPeftModelForCausalLM.from_pretrained( + model_dir, trust_remote_code=trust_remote_code, device_map='auto' + ) + tokenizer_dir = model.peft_config['default'].base_model_name_or_path + else: + model = AutoModelForCausalLM.from_pretrained( + model_dir, trust_remote_code=trust_remote_code, device_map='auto' + ) + tokenizer_dir = model_dir + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_dir, trust_remote_code=trust_remote_code + ) + return model, tokenizer + + +if __name__ == "__main__": + # Load LLM + # tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH, trust_remote_code=True) + # model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True, device_map="auto").eval() + # 填微调之后的保存路径 + model, tokenizer = load_model_and_tokenizer( + r'E:\Project\Python\ChatGLM3\finetune_demo\output03-24\checkpoint-224000' + ) + # load Embedding + embedding_model = SentenceTransformer(EMBEDDING_PATH, device="cuda") + uvicorn.run(app, host='0.0.0.0', port=8002, workers=1) diff --git a/MemoAI/merge_json.py b/MemoAI/merge_json.py new file mode 100644 index 0000000..af22374 --- /dev/null +++ b/MemoAI/merge_json.py @@ -0,0 +1,26 @@ +import json +import os + +data_dir = r'E:\Project\Python\MemoTrace\data\聊天记录' + +dev_res = [] +train_res = [] + +for filepath, dirnames, filenames in os.walk(data_dir): + for filename in filenames: + if filename.endswith('.json'): + print(filename, filepath) + filepath_ = os.path.join(filepath, filename) + with open(filepath_, 'r', encoding='utf-8') as f: + data = json.load(f) + if data: + if filename.endswith('train.json'): + train_res += data + else: + dev_res += data + +with open('train.json', 'w', encoding='utf-8') as f: + json.dump(train_res, f, ensure_ascii=False, indent=4) + +with open('dev.json', 'w', encoding='utf-8') as f: + json.dump(dev_res, f, ensure_ascii=False, indent=4) diff --git a/MemoAI/readme.md b/MemoAI/readme.md new file mode 100644 index 0000000..bfa16fc --- /dev/null +++ b/MemoAI/readme.md @@ -0,0 +1,440 @@ +# 大模型训练指南 + +## 一、导出聊天记录 + +导出json格式的聊天记录。 + +![img.png](../doc/images/img10.png) + +如果你想合并多个联系人的数据,可以直接运行下面的代码合并 + +```python +import json +import os + +data_dir = r'E:\Project\Python\MemoTrace\data\聊天记录' + +dev_res = [] +train_res = [] + +for filepath, dirnames, filenames in os.walk(data_dir): + for filename in filenames: + if filename.endswith('.json'): + print(filename, filepath) + filepath_ = os.path.join(filepath, filename) + with open(filepath_, 'r', encoding='utf-8') as f: + data = json.load(f) + if data: + if filename.endswith('train.json'): + train_res += data + else: + dev_res += data + +with open('train.json', 'w', encoding='utf-8') as f: + json.dump(train_res, f, ensure_ascii=False, indent=4) + +with open('dev.json', 'w', encoding='utf-8') as f: + json.dump(dev_res, f, ensure_ascii=False, indent=4) + +``` + +你现在应该有两个文件,dev.json(验证集)和train.json(训练集) + +## 二、下载ChatGLM3-68模型 + +下载地址:[https://github.com/THUDM/ChatGLM3](https://github.com/THUDM/ChatGLM3) + +## 使用方式 + +### 环境安装 + +首先需要下载本仓库: + +```shell +git clone https://github.com/THUDM/ChatGLM3 +cd ChatGLM3 +``` + +然后使用 pip 安装依赖: + +``` +pip install -r requirements.txt +``` + ++ 为了保证 `torch` 的版本正确,请严格按照 [官方文档](https://pytorch.org/get-started/locally/) 的说明安装。 ++ **如果遇到问题,请参照ChatGLM3项目的解决方案,不要在本项目中提问** + +## 三、ChatGLM3-6B 微调 + +本目录提供 ChatGLM3-6B 模型的微调示例,包括全量微调和 P-Tuning v2。格式上,提供多轮对话微调样例和输入输出格式微调样例。 + +如果将模型下载到了本地,本文和代码中的 `THUDM/chatglm3-6b` 字段均应替换为相应地址以从本地加载模型。 + +运行示例需要 `python>=3.10`,除基础的 `torch` 依赖外,示例代码运行还需要依赖。 + + +```bash +pip install -r requirements.txt +``` + +## 测试硬件标准 + +我们仅提供了单机多卡/多机多卡的运行示例,因此您需要至少一台具有多个 GPU 的机器。本仓库中的**默认配置文件**中,我们记录了显存的占用情况: + ++ SFT 全量微调: 4张显卡平均分配,每张显卡占用 `48346MiB` 显存。 ++ P-TuningV2 微调: 1张显卡,占用 `18426MiB` 显存。 ++ LORA 微调: 1张显卡,占用 `14082MiB` 显存。 + +> 请注意,该结果仅供参考,对于不同的参数,显存占用可能会有所不同。请结合你的硬件情况进行调整。 + +> 请注意,我们仅仅使用英伟达 Hopper(代表显卡:H100) 和 Ampère(代表显卡:A100) 架构和系列显卡做过测试。如果您使用其他架构的显卡,可能会出现 +> 1. 未知的训练问题 / 显存占用与上述有误差。 +> 2. 架构过低而不支持某些特性。 +> 3. 推理效果问题。 + > 以上三种情况为社区曾经遇到过的问题,虽然概率极地,如果您遇到了以上问题,可以尝试在社区中解决。 + +## 多轮对话格式 + +多轮对话微调示例采用 ChatGLM3 对话格式约定,对不同角色添加不同 `loss_mask` 从而在一遍计算中为多轮回复计算 `loss`。 + +对于数据文件,样例采用如下格式 + +如果您仅希望微调模型的对话能力,而非工具能力,您应该按照以下格式整理数据。 + +```json +[ + { + "conversations": [ + { + "role": "system", + "content": "" + }, + { + "role": "user", + "content": "" + }, + { + "role": "assistant", + "content": "" + }, + // ... Muti Turn + { + "role": "user", + "content": "" + }, + { + "role": "assistant", + "content": "" + } + ] + } + // ... +] +``` + +**请注意,这种方法在微调的step较多的情况下会影响到模型的工具调用功能** + +- `system` 角色为可选角色,但若存在 `system` 角色,其必须出现在 `user` + 角色之前,且一个完整的对话数据(无论单轮或者多轮对话)只能出现一次 `system` 角色。 + +## 数据集格式示例 + +这里以 AdvertiseGen 数据集为例, +您可以从 [Google Drive](https://drive.google.com/file/d/13_vf0xRTQsyneRKdD1bZIr93vBGOczrk/view?usp=sharing) +或者 [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/b3f119a008264b1cabd1/?dl=1) 下载 AdvertiseGen 数据集。 +将解压后的 AdvertiseGen 目录放到 `data` 目录下并自行转换为如下格式数据集。 + +> 请注意,现在的微调代码中加入了验证集,因此,对于一组完整的微调数据集,必须包含训练数据集和验证数据集,测试数据集可以不填写。或者直接用验证数据集代替。 + +``` +{"conversations": [{"role": "user", "content": "类型#裙*裙长#半身裙"}, {"role": "assistant", "content": "这款百搭时尚的仙女半身裙,整体设计非常的飘逸随性,穿上之后每个女孩子都能瞬间变成小仙女啦。料子非常的轻盈,透气性也很好,穿到夏天也很舒适。"}]} +``` + +## 配置文件 + +微调配置文件位于 `config` 目录下,包括以下文件: + +1. `ds_zereo_2 / ds_zereo_3.json`: deepspeed 配置文件。 +2. `lora.yaml / ptuning.yaml / sft.yaml`: 模型不同方式的配置文件,包括模型参数、优化器参数、训练参数等。 部分重要参数解释如下: + + data_config 部分 + + train_file: 训练数据集的文件路径。 + + val_file: 验证数据集的文件路径。 + + test_file: 测试数据集的文件路径。 + + num_proc: 在加载数据时使用的进程数量。 + + max_input_length: 输入序列的最大长度。 + + max_output_length: 输出序列的最大长度。 + + training_args 部分 + + output_dir: 用于保存模型和其他输出的目录。 + + max_steps: 训练的最大步数。 + + per_device_train_batch_size: 每个设备(如 GPU)的训练批次大小。 + + dataloader_num_workers: 加载数据时使用的工作线程数量。 + + remove_unused_columns: 是否移除数据中未使用的列。 + + save_strategy: 模型保存策略(例如,每隔多少步保存一次)。 + + save_steps: 每隔多少步保存一次模型。 + + log_level: 日志级别(如 info)。 + + logging_strategy: 日志记录策略。 + + logging_steps: 每隔多少步记录一次日志。 + + per_device_eval_batch_size: 每个设备的评估批次大小。 + + evaluation_strategy: 评估策略(例如,每隔多少步进行一次评估)。 + + eval_steps: 每隔多少步进行一次评估。 + + predict_with_generate: 是否使用生成模式进行预测。 + + generation_config 部分 + + max_new_tokens: 生成的最大新 token 数量。 + + peft_config 部分 + + peft_type: 使用的参数有效调整类型(如 LORA)。 + + task_type: 任务类型,这里是因果语言模型(CAUSAL_LM)。 + + Lora 参数: + + r: LoRA 的秩。 + + lora_alpha: LoRA 的缩放因子。 + + lora_dropout: 在 LoRA 层使用的 dropout 概率 + + P-TuningV2 参数: + + num_virtual_tokens: 虚拟 token 的数量。 + +## 开始微调 + +通过以下代码执行 **单机多卡/多机多卡** 运行,这是使用 `deepspeed` 作为加速方案的,您需要安装 `deepspeed`。 + +```angular2html +cd finetune_demo +OMP_NUM_THREADS=1 torchrun --standalone --nnodes=1 --nproc_per_node=8 finetune_hf.py data/AdvertiseGen/ THUDM/chatglm3-6b configs/lora.yaml configs/ds_zero_2.json +``` + +通过以下代码执行 **单机单卡** 运行。 + +```angular2html +cd finetune_demo +python finetune_hf.py data/AdvertiseGen/ THUDM/chatglm3-6b configs/lora.yaml +``` + +## 从保存点进行微调 + +如果按照上述方式进行训练,每次微调都会从头开始,如果你想从训练一半的模型开始微调,你可以加入第四个参数,这个参数有两种传入方式: + +1. `yes`, 自动从最后一个保存的 Checkpoint开始训练 +2. `XX`, 断点号数字 例 `600` 则从序号600 Checkpoint开始训练 + +例如,这就是一个从最后一个保存点继续微调的示例代码 + +```angular2html +cd finetune_demo +python finetune_hf.py data/AdvertiseGen/ THUDM/chatglm3-6b configs/lora.yaml yes +``` + +## 使用微调后的模型 + +### 在 inference_hf.py 中验证微调后的模型 + +您可以在 `finetune_demo/inference_hf.py` 中使用我们的微调后的模型,仅需要一行代码就能简单的进行测试。 + +```angular2html +python inference_hf.py your_finetune_path --prompt your prompt +``` + +这样,得到的回答就微调后的回答了。 + +### 在本仓库的其他 demo 或者外部仓库使用微调后的模型 + +您可以在任何一个 demo 内使用我们的 `lora` 和 全参微调的模型。这需要你自己按照以下教程进行修改代码。 + +1. 使用`finetune_demo/inference_hf.py`中读入模型的方式替换 demo 中读入模型的方式。 + +> 请注意,对于 LORA 和 P-TuningV2 我们没有合并训练后的模型,而是在`adapter_config.json` +> 中记录了微调型的路径,如果你的原始模型位置发生更改,则你应该修改`adapter_config.json`中`base_model_name_or_path`的路径。 + +```python +def load_model_and_tokenizer( + model_dir: Union[str, Path], trust_remote_code: bool = True +) -> tuple[ModelType, TokenizerType]: + model_dir = _resolve_path(model_dir) + if (model_dir / 'adapter_config.json').exists(): + model = AutoPeftModelForCausalLM.from_pretrained( + model_dir, trust_remote_code=trust_remote_code, device_map='auto' + ) + tokenizer_dir = model.peft_config['default'].base_model_name_or_path + else: + model = AutoModelForCausalLM.from_pretrained( + model_dir, trust_remote_code=trust_remote_code, device_map='auto' + ) + tokenizer_dir = model_dir + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_dir, trust_remote_code=trust_remote_code + ) + return model, tokenizer +``` + +2. 读取微调的模型,请注意,你应该使用微调模型的位置,例如,若你的模型位置为`/path/to/finetune_adapter_model` + ,原始模型地址为`path/to/base_model`,则你应该使用`/path/to/finetune_adapter_model`作为`model_dir`。 +3. 完成上述操作后,就能正常使用微调的模型了,其他的调用方式没有变化。 + +### 提示 + +1. 微调代码在开始训练前,会先打印首条训练数据的预处理信息(默认已经注释,可以解除注释),显示为 + +```log +Sanity +Check >> >> >> >> >> >> > +'[gMASK]': 64790 -> -100 +'sop': 64792 -> -100 +'<|system|>': 64794 -> -100 +'': 30910 -> -100 +'\n': 13 -> -100 +'Answer': 20115 -> -100 +'the': 267 -> -100 +'following': 1762 -> -100 +... +'know': 683 -> -100 +'the': 267 -> -100 +'response': 3010 -> -100 +'details': 3296 -> -100 +'.': 30930 -> -100 +'<|assistant|>': 64796 -> -100 +'': 30910 -> 30910 +'\n': 13 -> 13 +'I': 307 -> 307 +'need': 720 -> 720 +'to': 289 -> 289 +'use': 792 -> 792 +... +<< << << << << << < Sanity +Check +``` + +字样,每行依次表示一个 detokenized string, token_id 和 target_id。其中,`target_id`为`token_id`在模型词表中的索引,`-100`表示该 +token 不参与 `loss` 计算。 + +2. `_prepare_model_for_training` 的作用是遍历模型的所有可训练参数,并确保它们的数据类型为`torch.float32`。 + 这在某些情况下是必要的,因为混合精度训练或其他操作可能会更改模型参数的数据类型。该代码默打开,可以注释,但是如果使用 + `half` 格式训练出现问题,可以切换回这个代码,显存可能增加。 +3. 在我们的[Huggingface模型代码](https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py)中,有以下内容: + ```python + if self.gradient_checkpointing and self.training: + layer_ret = torch.utils.checkpoint.checkpoint( + layer, + hidden_states, + attention_mask, + rotary_pos_emb, + kv_caches[index], + use_cache, + use_reentrant=False + ) + ``` + 这可能导致训练的时候显存增加,因此,如果您的显存不足,可以尝试将``` use_reentrant``` 修改为`True`。 +4. 微调后的模型可以使用任何支持 `peft` 载入的模型加速框架,在这里,我们没有提供demo。 +5. 本仓库的微调数据集格式与 API 微调数据集格式有一定区别 + + ZhipuAI API 微调数据集中的 `messages` 字段在本仓库为 `conversation` 字段。 + + ZhipuAI API 中的微调文件为 `jsonl`, 在本仓库,需要简单的将文件名改为 `json`。 + +> 以上内容来自ChatGLM3项目 + +## 微调示例 + +配置文件 + +```yaml +data_config: + train_file: train.json + val_file: dev.json + test_file: dev.json + num_proc: 10 +max_input_length: 512 +max_output_length: 128 +training_args: + # see `transformers.Seq2SeqTrainingArguments` + output_dir: ./output03-24 + max_steps: 100000 + # settings for data loading + per_device_train_batch_size: 4 + dataloader_num_workers: 10 + remove_unused_columns: false + # settings for saving checkpoints + save_strategy: steps + save_steps: 2000 + # settings for logging + log_level: info + logging_strategy: steps + logging_steps: 10 + # settings for evaluation + per_device_eval_batch_size: 4 + evaluation_strategy: steps + eval_steps: 5200 + # settings for optimizer + # adam_epsilon: 1e-6 + # uncomment the following line to detect nan or inf values + # debug: underflow_overflow + predict_with_generate: yes + # see `transformers.GenerationConfig` + generation_config: + max_new_tokens: 256 + # set your absolute deepspeed path here + #deepspeed: ds_zero_2.json + # set to true if train with cpu. + use_cpu: false +peft_config: + peft_type: LORA + task_type: CAUSAL_LM + r: 8 + lora_alpha: 32 + lora_dropout: 0.1 +``` + +硬件配置:4090 24G、64G内存、CPU 14700KF 20核28线程 + +你需要根据你的硬件配置修改上述参数,各个参数含义上面有说 + +微调命令(需要指定数据集路径和ChatGLM3基础大模型的路径) + +```shell +python finetune_hf.py data/ E:\\Project\\Python\\Langchain-Chatchat\\chatglm3-6b configs/lora.yaml yes +``` + +## 部署 + +api_server.py修改微调保存路径 +```python +model, tokenizer = load_model_and_tokenizer( + r'E:\Project\Python\ChatGLM3\finetune_demo\output03-24\checkpoint-224000' + ) +``` + +直接运行即可 + +```shell +python api_server.py +``` + +调用示例 + +```python +from openai import OpenAI + +base_url = "http://127.0.0.1:8002/v1/" +client = OpenAI(api_key="EMPTY", base_url=base_url) + +def simple_chat(use_stream=True): + messages = [ + { + "role": "user", + "content": "你好啊" + } + ] + response = client.chat.completions.create( + model="chatglm3-6b", + messages=messages, + stream=use_stream, + max_tokens=256, + temperature=0.8, + presence_penalty=1.1, + top_p=0.8) + if response: + if use_stream: + for chunk in response: + print(chunk.choices[0].delta.content, end='') + else: + content = response.choices[0].message.content + print(content) + else: + print("Error:", response.status_code) + +if __name__ == "__main__": + simple_chat(use_stream=True) +``` \ No newline at end of file diff --git a/doc/ai_readme.md b/doc/ai_readme.md index e3c98a9..0ee47e4 100644 --- a/doc/ai_readme.md +++ b/doc/ai_readme.md @@ -1,3 +1,356 @@ # 大模型训练指南 -这个人很懒,什么都没写 \ No newline at end of file +## 一、导出聊天记录 + +导出json格式的聊天记录。 + +![img.png](images/img10.png) + +你现在应该有两个文件,dev.json(验证集)和train.json(训练集) + +## 二、下载ChatGLM3-68模型 + +下载地址:[https://github.com/THUDM/ChatGLM3](https://github.com/THUDM/ChatGLM3) + +## 使用方式 + +### 环境安装 + +首先需要下载本仓库: + +```shell +git clone https://github.com/THUDM/ChatGLM3 +cd ChatGLM3 +``` + +然后使用 pip 安装依赖: + +``` +pip install -r requirements.txt +``` + ++ 为了保证 `torch` 的版本正确,请严格按照 [官方文档](https://pytorch.org/get-started/locally/) 的说明安装。 ++ **如果遇到问题,请参照ChatGLM3项目的解决方案,不要在本项目中提问** + +## 三、ChatGLM3-6B 微调 + +本目录提供 ChatGLM3-6B 模型的微调示例,包括全量微调和 P-Tuning v2。格式上,提供多轮对话微调样例和输入输出格式微调样例。 + +如果将模型下载到了本地,本文和代码中的 `THUDM/chatglm3-6b` 字段均应替换为相应地址以从本地加载模型。 + +运行示例需要 `python>=3.10`,除基础的 `torch` 依赖外,示例代码运行还需要依赖。 + + +```bash +pip install -r requirements.txt +``` + +## 测试硬件标准 + +我们仅提供了单机多卡/多机多卡的运行示例,因此您需要至少一台具有多个 GPU 的机器。本仓库中的**默认配置文件**中,我们记录了显存的占用情况: + ++ SFT 全量微调: 4张显卡平均分配,每张显卡占用 `48346MiB` 显存。 ++ P-TuningV2 微调: 1张显卡,占用 `18426MiB` 显存。 ++ LORA 微调: 1张显卡,占用 `14082MiB` 显存。 + +> 请注意,该结果仅供参考,对于不同的参数,显存占用可能会有所不同。请结合你的硬件情况进行调整。 + +> 请注意,我们仅仅使用英伟达 Hopper(代表显卡:H100) 和 Ampère(代表显卡:A100) 架构和系列显卡做过测试。如果您使用其他架构的显卡,可能会出现 +> 1. 未知的训练问题 / 显存占用与上述有误差。 +> 2. 架构过低而不支持某些特性。 +> 3. 推理效果问题。 + > 以上三种情况为社区曾经遇到过的问题,虽然概率极地,如果您遇到了以上问题,可以尝试在社区中解决。 + +## 多轮对话格式 + +多轮对话微调示例采用 ChatGLM3 对话格式约定,对不同角色添加不同 `loss_mask` 从而在一遍计算中为多轮回复计算 `loss`。 + +对于数据文件,样例采用如下格式 + +如果您仅希望微调模型的对话能力,而非工具能力,您应该按照以下格式整理数据。 + +```json +[ + { + "conversations": [ + { + "role": "system", + "content": "" + }, + { + "role": "user", + "content": "" + }, + { + "role": "assistant", + "content": "" + }, + // ... Muti Turn + { + "role": "user", + "content": "" + }, + { + "role": "assistant", + "content": "" + } + ] + } + // ... +] +``` + +**请注意,这种方法在微调的step较多的情况下会影响到模型的工具调用功能** + +- `system` 角色为可选角色,但若存在 `system` 角色,其必须出现在 `user` + 角色之前,且一个完整的对话数据(无论单轮或者多轮对话)只能出现一次 `system` 角色。 + +## 数据集格式示例 + +这里以 AdvertiseGen 数据集为例, +您可以从 [Google Drive](https://drive.google.com/file/d/13_vf0xRTQsyneRKdD1bZIr93vBGOczrk/view?usp=sharing) +或者 [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/b3f119a008264b1cabd1/?dl=1) 下载 AdvertiseGen 数据集。 +将解压后的 AdvertiseGen 目录放到 `data` 目录下并自行转换为如下格式数据集。 + +> 请注意,现在的微调代码中加入了验证集,因此,对于一组完整的微调数据集,必须包含训练数据集和验证数据集,测试数据集可以不填写。或者直接用验证数据集代替。 + +``` +{"conversations": [{"role": "user", "content": "类型#裙*裙长#半身裙"}, {"role": "assistant", "content": "这款百搭时尚的仙女半身裙,整体设计非常的飘逸随性,穿上之后每个女孩子都能瞬间变成小仙女啦。料子非常的轻盈,透气性也很好,穿到夏天也很舒适。"}]} +``` + +## 配置文件 + +微调配置文件位于 `config` 目录下,包括以下文件: + +1. `ds_zereo_2 / ds_zereo_3.json`: deepspeed 配置文件。 +2. `lora.yaml / ptuning.yaml / sft.yaml`: 模型不同方式的配置文件,包括模型参数、优化器参数、训练参数等。 部分重要参数解释如下: + + data_config 部分 + + train_file: 训练数据集的文件路径。 + + val_file: 验证数据集的文件路径。 + + test_file: 测试数据集的文件路径。 + + num_proc: 在加载数据时使用的进程数量。 + + max_input_length: 输入序列的最大长度。 + + max_output_length: 输出序列的最大长度。 + + training_args 部分 + + output_dir: 用于保存模型和其他输出的目录。 + + max_steps: 训练的最大步数。 + + per_device_train_batch_size: 每个设备(如 GPU)的训练批次大小。 + + dataloader_num_workers: 加载数据时使用的工作线程数量。 + + remove_unused_columns: 是否移除数据中未使用的列。 + + save_strategy: 模型保存策略(例如,每隔多少步保存一次)。 + + save_steps: 每隔多少步保存一次模型。 + + log_level: 日志级别(如 info)。 + + logging_strategy: 日志记录策略。 + + logging_steps: 每隔多少步记录一次日志。 + + per_device_eval_batch_size: 每个设备的评估批次大小。 + + evaluation_strategy: 评估策略(例如,每隔多少步进行一次评估)。 + + eval_steps: 每隔多少步进行一次评估。 + + predict_with_generate: 是否使用生成模式进行预测。 + + generation_config 部分 + + max_new_tokens: 生成的最大新 token 数量。 + + peft_config 部分 + + peft_type: 使用的参数有效调整类型(如 LORA)。 + + task_type: 任务类型,这里是因果语言模型(CAUSAL_LM)。 + + Lora 参数: + + r: LoRA 的秩。 + + lora_alpha: LoRA 的缩放因子。 + + lora_dropout: 在 LoRA 层使用的 dropout 概率 + + P-TuningV2 参数: + + num_virtual_tokens: 虚拟 token 的数量。 + +## 开始微调 + +通过以下代码执行 **单机多卡/多机多卡** 运行,这是使用 `deepspeed` 作为加速方案的,您需要安装 `deepspeed`。 + +```angular2html +cd finetune_demo +OMP_NUM_THREADS=1 torchrun --standalone --nnodes=1 --nproc_per_node=8 finetune_hf.py data/AdvertiseGen/ THUDM/chatglm3-6b configs/lora.yaml configs/ds_zero_2.json +``` + +通过以下代码执行 **单机单卡** 运行。 + +```angular2html +cd finetune_demo +python finetune_hf.py data/AdvertiseGen/ THUDM/chatglm3-6b configs/lora.yaml +``` + +## 从保存点进行微调 + +如果按照上述方式进行训练,每次微调都会从头开始,如果你想从训练一半的模型开始微调,你可以加入第四个参数,这个参数有两种传入方式: + +1. `yes`, 自动从最后一个保存的 Checkpoint开始训练 +2. `XX`, 断点号数字 例 `600` 则从序号600 Checkpoint开始训练 + +例如,这就是一个从最后一个保存点继续微调的示例代码 + +```angular2html +cd finetune_demo +python finetune_hf.py data/AdvertiseGen/ THUDM/chatglm3-6b configs/lora.yaml yes +``` + +## 使用微调后的模型 + +### 在 inference_hf.py 中验证微调后的模型 + +您可以在 `finetune_demo/inference_hf.py` 中使用我们的微调后的模型,仅需要一行代码就能简单的进行测试。 + +```angular2html +python inference_hf.py your_finetune_path --prompt your prompt +``` + +这样,得到的回答就微调后的回答了。 + +### 在本仓库的其他 demo 或者外部仓库使用微调后的模型 + +您可以在任何一个 demo 内使用我们的 `lora` 和 全参微调的模型。这需要你自己按照以下教程进行修改代码。 + +1. 使用`finetune_demo/inference_hf.py`中读入模型的方式替换 demo 中读入模型的方式。 + +> 请注意,对于 LORA 和 P-TuningV2 我们没有合并训练后的模型,而是在`adapter_config.json` +> 中记录了微调型的路径,如果你的原始模型位置发生更改,则你应该修改`adapter_config.json`中`base_model_name_or_path`的路径。 + +```python +def load_model_and_tokenizer( + model_dir: Union[str, Path], trust_remote_code: bool = True +) -> tuple[ModelType, TokenizerType]: + model_dir = _resolve_path(model_dir) + if (model_dir / 'adapter_config.json').exists(): + model = AutoPeftModelForCausalLM.from_pretrained( + model_dir, trust_remote_code=trust_remote_code, device_map='auto' + ) + tokenizer_dir = model.peft_config['default'].base_model_name_or_path + else: + model = AutoModelForCausalLM.from_pretrained( + model_dir, trust_remote_code=trust_remote_code, device_map='auto' + ) + tokenizer_dir = model_dir + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_dir, trust_remote_code=trust_remote_code + ) + return model, tokenizer +``` + +2. 读取微调的模型,请注意,你应该使用微调模型的位置,例如,若你的模型位置为`/path/to/finetune_adapter_model` + ,原始模型地址为`path/to/base_model`,则你应该使用`/path/to/finetune_adapter_model`作为`model_dir`。 +3. 完成上述操作后,就能正常使用微调的模型了,其他的调用方式没有变化。 + +### 提示 + +1. 微调代码在开始训练前,会先打印首条训练数据的预处理信息(默认已经注释,可以解除注释),显示为 + +```log +Sanity +Check >> >> >> >> >> >> > +'[gMASK]': 64790 -> -100 +'sop': 64792 -> -100 +'<|system|>': 64794 -> -100 +'': 30910 -> -100 +'\n': 13 -> -100 +'Answer': 20115 -> -100 +'the': 267 -> -100 +'following': 1762 -> -100 +... +'know': 683 -> -100 +'the': 267 -> -100 +'response': 3010 -> -100 +'details': 3296 -> -100 +'.': 30930 -> -100 +'<|assistant|>': 64796 -> -100 +'': 30910 -> 30910 +'\n': 13 -> 13 +'I': 307 -> 307 +'need': 720 -> 720 +'to': 289 -> 289 +'use': 792 -> 792 +... +<< << << << << << < Sanity +Check +``` + +字样,每行依次表示一个 detokenized string, token_id 和 target_id。其中,`target_id`为`token_id`在模型词表中的索引,`-100`表示该 +token 不参与 `loss` 计算。 + +2. `_prepare_model_for_training` 的作用是遍历模型的所有可训练参数,并确保它们的数据类型为`torch.float32`。 + 这在某些情况下是必要的,因为混合精度训练或其他操作可能会更改模型参数的数据类型。该代码默打开,可以注释,但是如果使用 + `half` 格式训练出现问题,可以切换回这个代码,显存可能增加。 +3. 在我们的[Huggingface模型代码](https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py)中,有以下内容: + ```python + if self.gradient_checkpointing and self.training: + layer_ret = torch.utils.checkpoint.checkpoint( + layer, + hidden_states, + attention_mask, + rotary_pos_emb, + kv_caches[index], + use_cache, + use_reentrant=False + ) + ``` + 这可能导致训练的时候显存增加,因此,如果您的显存不足,可以尝试将``` use_reentrant``` 修改为`True`。 +4. 微调后的模型可以使用任何支持 `peft` 载入的模型加速框架,在这里,我们没有提供demo。 +5. 本仓库的微调数据集格式与 API 微调数据集格式有一定区别 + + ZhipuAI API 微调数据集中的 `messages` 字段在本仓库为 `conversation` 字段。 + + ZhipuAI API 中的微调文件为 `jsonl`, 在本仓库,需要简单的将文件名改为 `json`。 + +> 以上内容来自ChatGLM3项目 + +## 微调示例 + +配置文件 + +```yaml +data_config: + train_file: train.json + val_file: dev.json + test_file: dev.json + num_proc: 10 +max_input_length: 512 +max_output_length: 128 +training_args: + # see `transformers.Seq2SeqTrainingArguments` + output_dir: ./output03-24 + max_steps: 100000 + # settings for data loading + per_device_train_batch_size: 4 + dataloader_num_workers: 10 + remove_unused_columns: false + # settings for saving checkpoints + save_strategy: steps + save_steps: 2000 + # settings for logging + log_level: info + logging_strategy: steps + logging_steps: 10 + # settings for evaluation + per_device_eval_batch_size: 4 + evaluation_strategy: steps + eval_steps: 5200 + # settings for optimizer + # adam_epsilon: 1e-6 + # uncomment the following line to detect nan or inf values + # debug: underflow_overflow + predict_with_generate: yes + # see `transformers.GenerationConfig` + generation_config: + max_new_tokens: 256 + # set your absolute deepspeed path here + #deepspeed: ds_zero_2.json + # set to true if train with cpu. + use_cpu: false +peft_config: + peft_type: LORA + task_type: CAUSAL_LM + r: 8 + lora_alpha: 32 + lora_dropout: 0.1 +``` + +硬件配置:4090 24G、64G内存、CPU 14700KF 20核28线程 + +你需要根据你的硬件配置修改上述参数,各个参数含义上面有说 + +微调命令(需要指定数据集路径和ChatGLM3基础大模型的路径) + +```shell +python finetune_hf.py data/ E:\\Project\\Python\\Langchain-Chatchat\\chatglm3-6b configs/lora.yaml yes +``` diff --git a/doc/images/img10.png b/doc/images/img10.png new file mode 100644 index 0000000000000000000000000000000000000000..f23b61a08b69fa951d48a3b7a797e2e610b4851a GIT binary patch literal 25050 zcmb??cTiK^*RBPm_ui!=Ei~yxI!Y6i4naE7JE2RYw}?oSCLpNvUPFoWE~0?6gc^{N zKM?;gNqeVaq{l|g*l>6EXG>dxZraG4%3Gn@jU;dl)b%Y5dQv~46AnXw8 z&E%oKdvs2dFnw${Lj?%@THFjRJXNIAk!(9;j#@a_tO{Bc#c)U%#;d(1=*`uyx zwjr#R+8PISkbPK-c+A#PH>!9_a%Upa`&U&_MJcQAhVXz|msbz;grKM6C7pcbPA>dy zf2w!BLiHjGb{RnBV0ev#-1Sx04M`Oo%Gs$jI_g6i3iOsCWtNGII6+;L$EM_oRa7q@ z+@>N(O~?PCHuqC?J(aqmM37R-WWM?tq(EsQAK_@R+xHBHQuH!q9zb0!q1Xkw21N>r zGs`|99RqfCCV6-ED1)fJw1yfVZp&CXJ=}kZ5^cUKyf-^S6gf~pXskif6fxd@#6arK z^k4(CF5Ji0#pYU3Fd?OEsZExz=>*#r(=euQdimBMAP^;&U36f6s+$r8|A@bDmJgq# zelCHSsXsCeGq)Mb4wfHv7ZF4TsnLw1~Sz-l2OSBw(63k4^ zdyj%D-@ZZL_|ggXTVMZ-=!0MGMo77-s^(5eTc2j-rOo@IFIj(!0+lemTCFBLQ%CeE zlWHP)NkBA1rXnobs$gYl3m%UW#fyl|6fKy?ok*Pt{F;L_{2*siJ!A9@P9_esu!^X6 zh8IW8+QRdcQ!0s7V|2PJerc;w113pCP}^tH z&-nJQNZEA%X3eff#Ut{Xw@%%l@bl5nXVxB| z4osUu>$8O=t(Ql@>~9{09&9tT2E;EMpp3LgWd9X>g^_RBCfQQ7G#d&$uCBjx_aJ!?yOlf?hN4vg)5DQbFw-J zT3tylya-TGvJB|&^PFC5w)d`HT5DN~8kq2z{50|6@^U+%2Ra@_6u#R!qc_#EkjWa7 z{JEGgO{z11>SqXtPKr^ROK0bDzBln!sT|C;t9G0Q>a-QB5=aZZY!_RtB)q;kn^NlQ zbDh+i%7+g2-RxIi(;t+ka3ao~A~w{zZokgW2yDBpr@qf4)zLu!URHjXQ?1RX zK=GdIC4|!4eLhd{^tGlwVe0@~yLlq9HSHDTX2!GDW6F3x47O4dbA)?oUbC8LI1vkH z>rvf1X8ys35W{Tq5q$;i{s=+&u#uk8r|9R*kZ_zn)1U-dfm`?G*dWc#bz>!D-O+sTOa-M|in!hjCi zG22+ck}dFJvdA|mB6!IKXM*!lIJvy-j6PDmJK`D{5m__%a&UCGX9R4^>*H3^ww_Fu z)JZxZSnI9VQgXCQmc4JAUF4yeXgqd|e6aWc9Fgkux)Q3PbK$V4kt-iKRI{UWITai8 z@=m^K{r0BTPDfXMe-tb(>>@^ONxW5#1psfJf|%Y|ZkC5zvOUvWHJtnIGdwWBwcc(1=rKKHy2$u7ohj<708 zXNpw@6u5`Qhup5y#ID{>)dAwEO4D{5@#~qJYo{JRUH$_R)n#4w2Fp!1U!&MHf346 zne%90oEK)HaX@?0U5p&JwbK9AgbIcDpY(c>RH94**VKU&=SOU>*9D~LH(sJw&fwR_ zu04PJ#G-q!TkDUcWMq!0&ylMdT&ilDWeI*41Am*ef3iRUHW7=0Qva-X`oRPXN#MDW zT1@#gNIAm=&<`mGPY4TAknCD^z^iHlR%tJfA}*MyGmK?)XM`($-9^0n>{o)YQj-p- zICjvaNwx__PPT%jTJQ19^sI-G^C^Mc=Tj^je>RH6Lybl$xi79A_Ol-A0ZU(FXFp{( z#5)vjbGe6prgIvkk0b)h>Fe(m?*V_;QbP~t4Jch#A0F`p`-CX65YwH;i<=NnJ=81c zirlACA(`rkCJLWSooVT&++oSIz=<{O?c8${-`Ms13cagK-?s9QbWzMz-Sb_!$);Gl z&Q&6#M14_4m?O7QUNbzmI=NePOx@# zM33t!PeyVUyjk6BCpWEhxaPOb_Q?p`E2{l2CFvRDSw{2aqKi46E%$bT)0nDiSNN=m zkNh2D58nqM?-U=~-3RAtR@aqTxy^L|%!QlKsm6o5n?vW*pm#P~Dj=(?PX)*7D#JN! z&kVRP7OlPvOgJ&sBw5LgFVFH|O9NUiMc4jL`p(>Kj^GnhO|&qoWBGdUK!OfxYBKUD z!F>eWtQWGoJ=g5nQ4!8^mL(tVXllOdx92j)!}#F<)2uX-vg(zt*hu%cv_zs!)3G!4yqHCN`7}xsKNZjDetY4VxLV^B(xbL)K5)+#nBF8gkrM&uHZG*s4 z&G>x(8%0?gsFL1xwmhb^i>#-f^RdbD0fr>y-pPSqny^#+9C)DaurG z5}0=72C$7)I}|gWNj*7e(vpxhLpM_z?i>U}H!)lD9!-k>aok$!Zs|l_@hKLpLK&*o zN3ltkVen1eBL-e&gY`ULpVtc&&Qs*cD?{PSO8_ZAGGy!u$9?|(rS};msEvO*vn>B;v>C~`ktfChO;N7Dj`6Y5&8msx=2mDe>j~A zn&ow{0SOu7fPdAuYjpPFn*ah}7l29!VsLUR+a|zmk=hdn^-db?`l(A&xT<7}%+vzd z+37f+nwgquQr#T1vJOqM0pfYf@QmO06DKX`d|^66wLmbb2;Vw4ThQKO9NCuH%Y5Gg zS;gptn;d_?J@7E_to@CbzhV^b+{0$Oa7afhSrsZ4i1vB0s)C-wtQ`k_a!^^vd<*Jc zL|HVmgnW7J(KSBvbxmd?OMV2qn|PZX-iI~Qo!w6J5&)_6=OK<-gHqNTBP^T`BxM46 z#10Mv@dLpqVR}w3-=%a+kdha>i!OR)m-_6)vors#BPg46gdx1XVL=d6J@6V?Ri4Vd zM@T7;<+;jG&i{{m`Cq24`G(u0ZxPDlrp5j*$-WI4R)`$A0v4XUCQJQ4)BpcgA6OFg zNgRG^Y975ZEc@=~k78&OtcLhsqA&XPJL`&#BQ3VTQ#troXSmI*7*1$XWEZ_g^n_oX z8URrJD;WePSxu_NiJP3=pW+hZYE$)iSo9u=OvlNf_u@ zo4bcf{v!%_rULN<(7(bb5F^9efi#-^VXw&g^F(!cmq-&cD|@TLV#zb#J^3@pW(syv zW1*d?;^?ziL$j)(dRf(27Dy9%{v`AiHW2qDP)8ehTx-L2C&+3g`5z_1Pn?!DG{!(+ zYUk1jR7Z8=@4|oWpc${cvqT7_ZAc|n3$t&^Jv(0_SPFU{7M8Y4x%>K$urWDynzlT% zcu)}I*?+V2e6v@{Q@Hn4hh)!jQWnvi)(j!g>uV+#T=^e`qm985VP^~_ z&(2vmL1||$&u^CNhzEQer$(d3O4Z|6fApdfIe*K0Lt~QECL;fOgt&8abLq(`-lZ3J z^}WL|SDO!3{i$BmHP8_cA4uE8MEq7#W2$Oj51jyQJS)puPg2b@cB&?r=RcS!biAcj z`siwB{~-wPpV;#gZwL;sP`_)tmsvcq!zDjlsc!2?(^-%yxN$WfaXs`DP|fZ+K|GSN zLo2O5OzZa?2gL>IcxNT_$x&7(bE?puP_aLjHajLC=TD>+asU;uT77SC01lCli^%u-QPXj3!vYH|MI>WnJ4y2k(!7-v$o^!x znw09^<_09#({F_=q_XekY^De8sdUyy9TZ|LO%u(u=>l^tAv!qJz|dkGQ6uG3mQ6*? zY|WP)$bSU0&5cX=jYLm3+hqVY9)U!P3O$zZz~i(~bDgSznkv#=8fiHmhTHY)KURM* z&d5~SWw3Al8*nmRXSS8fE5>Mt^&%?9$Sa+P=uNV zT{S0)f_dBS{Y9q3M}MWb$-941M2#%!uau`i{{MEvr0%blb-vDv0I|-~&?IdYl?+6X zUad{o%2mui0Z7uC^DYu-ozjy{)SUY6{0D9}exhkgDrae@|4k^bAzl zLQ%{;e&PuNzoG9g8BVi^w_KvVbwCbbL2X|UedGMxUH}==kU|&{`JwBlPBo8SqxjH zTAj^2sf_@`t(Re)RZVP0*9Wz;-cnt7|G?U23`hV(3aCQ*9~}5`Zh$4fn}JUx%8WC7 z6*8c0LaG|d%FJnf(0tOrBd(B#3YEDe(Bmc%9(TZMD(+RMooJwa44^cYYvTvu;;?pr z@eEp!^9AF&?+?Q}GYnDrME|kX;Z!#(vTYdWIrYFY(x|+~^@7J<3DP&$7~-`NDj0y0 z2+n8( zAa@aBF{#;yZ20JFO54bMrXH=bqYtEB$@ItT4bCU}ll&E2yP|FWLiTRS9@ZJ^yDl2t zFFSuVoDlturs6UlHcc{=7yk9AVy~rA!dwFAd1fEy_Ll!>RwsG3nPi1&D-jZ~NUv&GQ=Hj}ldx^B3%E~VrO(&RO?E@oxUE?a~d-5-(osz8%ee8uDn5maT3@6~e z|6sudRcV1M*{sqv_U&1#c z5mecT?YwD>F}fDPn?gtwZTwTuN_jln%G23mwqR zinA(glbxvne(m60BEaUly1r+g00_5!`n2v_@<^7ab|A4wMv(PiL{eR$qMY0wsa>Iw zxpv9Te-|Q^Do7&Rv#uC;eed*iRK#t>u3^tk$e|qJP83SHH~)9db$>OSiFDqKPl(7@ z&{*zS9v%cN30HA|ip2jvr@Auv{{|L|Z~W(**==9844~K7-N{i>e~N`TkHM9P9H;i#x1h7cB4KT0LZ&!(=5LEvWc4?sW;A1Jl;{_QuO@Q$%GWPymw>K7*KB`w}IWf<2Y1TeKxiOV-R^yxrI?PqA0u2O%G@D z&IL`jL~qn%^gs3qV>Phw>%y#dF9PF=YZ~6{9Xt}I*SH1(gs)5peD8gZ#KE2 z;Jd%i1Xl;tz{tN#PA?8KF*2GDI!fT4PEY?_RizxR%ns}d-)%&Y^exhQQOJ(8Ul1zt zFJ8{#teDP*_eD_3|BbJ{L2yAE4}N4KkC|An6Z6~eYz$3yDMSRah3aS@F3-;J6LM&u z_m2lteF2Zr0slB{l_UrX1kEJ;-M){ja2`=rHz}x>9*B)Y;}OXN0=k^A`M*gbT7}c4 z^fKAzeIWQmr_Li~! zhE$4<^akGGh64$g_DMnQPc1$XF}!bhE=31bmvDbDMM?<`ms|NMy}17tvHS1zbs&N` zWrYtbC$%Y>7OZtv;b(p*92Chk#brA?XjeLHnX`*Y|37A~ahw|sL{O?@pmF*77GW++ zOmvg&8FTFbknUPcSn7Y?TCWk{n7qBDh2C=6Z~P(=@(Qsnm@;IDp8iQ0bJcP8@7U~) z;6U{d9M5jneU~ilS;H7%5gN(Ke@hH@1*aBMI~;vKl7H*q$bTOC@B3~4ZNdG2*|CfB z(DLuomRKUwd=?zXot$0%)a;p)>503Itc1Y+^uI9Sl8`p}R(^roonhH|%yO1=NWiXY zM77P_hB?B@%sStLA0&!XX_p&Nn>3`jgWjL{=?y2DU-^tV-SlI1*9P7w{6#{N{paYx z+!@lL9Bh7w{V4aA!5k@2K&i?q)^}%{9kM<->Wud8+FGClxIje%g1mkgqaRPzYEmEePATs*IQyZaxafHX~)(DqD6*5PjY;yXJ^Sgui z1D4-38t{QBYhyDgXPD~+(jfHRsa5-+PW$5Y1s(wqmnM4gU8sie)0etT#U)~Fy6Mqi z+H}dz@YW~USgmqaGlcKOTJ4n+(c1PeZ@*;35y4|8nx6w3(<=~3`5w0SH4hpWYUKCB z?Z__^ttX9rNSpe<#_1)ER}R{!Ap6h;f!ehXE1Ag>-m%Rt z#ipN*^*iB1jiZ$2&#`erzv=d$&A7@-z+u4C^33WPMTgWIgi+GLZ{g4C%Pt??G?D6&+lH5s#yLZ zsUc7%3p(f&S=FGn1c#Lu3teW7uST;!{Z!e!X#p^4vKlQL?7LcqnY|AGgy}t; zfNT)9VjLc$qZslyL7p`!h;}t}*|($ZRyX2p7Yoz_E)dOYXd6L*sL9+n<7Z%6p3C#^ z@;tsw?|&V}ytQn2x1KmLwi6*RL0&U47qM@a_`A{eB1J_Fl~1s=`g!(^G(E0>2v%ju zBI=wpc&&AX#_+i zr6Vl9UsA7a?UF+e+8g+f!*jqhPZ-zio{9hFxoBFNz91q1_T8`cxC2302)OCYMmq}* z*?|FSR2NI86>%LUW%NSn>dyS@6p9c;x`V~YJFR@WpG?2kwV0E@ z>n&xFdTMAD==&RdQ*KfolEVc8#j)cjYFh%7W5w8Q;Vlv)Rmx|mbOY6pXiLlFT=Epe zMct{!Ey%aI+UZh<33S=4`lT58=lpD1a>8c8J#T-iC`5MmeAZxV%h}C6o^!|JuIwRb z)qNroojuD-OSHS?P6>1n7(e^47IP28tZ22Qc`<4J9m!PnNhg+Y2}`JJX+w)S6EdlI z%XiG*RS@ z-@GLPng8Gjb0N-8g269BE^}a!k%khQVDKUjQoPKR*rtA0R?=|Oj%Z5%~OZoCw znsl9w5NV~=YjIRDr|j8!7a7`9{ja$McKc~?P(?OtoVC5VE>835T&?XXV z9^b{zQ5?@nmzn#ZG=C+9w*tTjZKLg94++Z|(STTAbU||mc`9_un*yuqV9%S-6nD$A zg{?Nz8lj8!pb~7!(SX!6AG+8GtK{^;!qb-vAT?cr?vVaIfWWQ4E;#8W{c z!u|vDV;MnW?boksUwr2pb*iw3>w7MZa@^=hWKtKGMC~Td%wRZf#E!e(Y6GbrLi+1} z-t6H_>|ZK?Ac=wr3WezV{f=CY1VV>pV-549DtNWztOxo1&Oo=L@Q6a z+do<57{=VRqDtbH&gAxE?EV~(!SY^yjUa!;0GM1O2%xfD{CvlpCOVr+!>+K4E#n@J zt*$9f2Chi0{_y~GpGsujNpK?t(?`j3wH^smIUG?{!4**H!O=F=c;R3wOpyxf zk~rQ2u91ErMkS4%mhagJlh_!$RJc8CU32xZ z+7|;B(CuyfaVs6$sWo*1NIm}ek618!aWri_fe94C)WSs9ZYUrd$>2d=CjhF`8k>AG9xpmEWSZk2 z@$!ZXk7mnLjBvKZ-K~`s1FNH+)$qSlM=G-y#pP?FEO##3c>;t(*h6^&bkU3|zXi`{ zK2B_J(ABz69I_Rtv)IWUliTW24WQOM8DAWYecNVkapT+@# zcx6+t@nsI6}gP@4)W$mlC_r6)|S^aUqZM}@2jKDRpkU= zn){y><(b8KPFH42Nu2o`3rnK9_y>xDfPe|Pp$FVBs^0lW|J$2?&{5yNzEmI079k+w z!F!=@%0(55lNN%LCdrvaLA{rX2#R^+s!&~++0Jv?cj>$Koc+u!oNE76PjHT3I~rk; zfKjvS+E{5e!qdm{JD(z5@_Y^@ZDuTj2?~lcvrJLTl69387I;R6Gc_d<3z85Fgz}uE zFQ*EwY^WbB-(e3lLSHHb3VUB$Q_0h{#hg|VD)8Tb`a%7m4pBT?XJE5z+Pg89w*v9^ zMHc<;d&YAtRsyq9PX&=#D|)!fhmc`)!jr*xNo7#NrJ(npRQ6_AUWB?_wI zLlHZP=JYqy(To9WvV%~R}DC&$04DPZ1HFB3f8R?NSonw{1988I`tngo_$pEMU z>e@>-N>~eO5aa*`ZNmuxVmD=g$-d)OqXhFcdG>~lZk9M4u) zI7psorPWcGU z@a5jew)678XO=Rp8L0=GhTy4?Zf0vG%h^t*>I0&0#wM?0zGmtigd!NPg`xYG$yhE^ z#Fko1tNp<8_&3ax%*&y5yZ7#7UtF1Yk647}(12l%8bE##x7` z>Erc2;p~%P;`pn1Tlgj!8n#&+vo-$l#kErH`t?hKJ^BrbUG?w`=|6>7iw?QfP}wG@ zU!^k;0Lmxu`t918G~I)K`zmPp1nW&H<=u>ot{&Eu zGr`{*sqI$i!os3H1v;p4*LdRQkJ+*^Mw}?LB*l>@$ zXn1Gxp|XJ8AKNS>mrQXIjlI=31VSQk}2RLvtc*H7K zbznG|(Ag+fuxx_^z_I3dQ_ZVPzErGVqd+4shHO+lz$&6egR?Ez4oW^j{wDEX91|TJ zA+9t9($yx4P6ze0t}u)8gfO94#xn$fQm*_%MjJ#Rh&S{m@VHBwtudRyROG=eZSdEq znokghF-36({rp$zx1ZI#+>_@)JQ~H=eMc7e;NE-j0J0Lq+5Vy=vn}27z;e?PUG}4~ zEuaJ4c>87V3HfzrD7kTzdYt4f%r8+1+5u${>g$ z>{ts;rW%dt$b`0jzy>~?=6_l(9H@)7+%t-Hu6UlXFkhB^pU7;@!Z2xWfZ_94c*^wK z(8(4(>PhkUjiT_N*U+z)ZWi>>>XvoSN?-iWGBBlOjWVO^f1$3%7a(kvK(jn0;`G&C z(2?SL(tkVaT~Dn!i?oI{vXh^W+SRn73bJ<8cVxLYcX0qMQDid?U>ISJ(+H0&bZ(-l zwOc9LKz=#W^z^wFQ$hD(tII!Na*i`k1#4blg3&vC5WW8htH&$XF4 z96|f9MZuY|vce<>Nan4SbypbJB)Y-^HD1K6^W*nHY(IU8rtsPYT4Wrt8PLx824@_E<~`H+;m0>%l!M-Dnm zBiX$swS1wCj4SATdTi005`HMv^FkDbpYN!bl2kmoyF_q8gsWQ&J^WCs z)2)Pgxl08Vbd#dza!>kfh3#$I&KnOD2@5zeJ~^Ujzizjrx$ndiEc~zk>ki<(Mrk14 zMB-+jfG@X>pCpr;Q#HbH)@>(`I;zN`_K{8JCNd9TF~K$a+>VN(i?7T{%v}hescJxQ z!1{4@XS}in8bNpPJ?mRFP$LY;`aW^^o4I95{~e}zO@<2~nASpc+-(8Otut-gVrqoe z=DDzg^~TROdQnK~1s&tL+`FL~`}=oT@QODQr;v*5)Dc*eP0k8W_PgGwIqR!Kr$stV0T7+#DI zWDAn3bglKI$dhXlAcAjYX<_BvW-pj4%eX{{I#fIv0 zW{iQ!v{<%X_e6qv?hR=9?dD2A&@dMIptV~p57NPN@K#Fns|%@4~NZv$`(ZF zK|Qs7@)PE#)}>c0UrNuhVgYUMUtQ)0wn<_SGx?=nbs6BFecNQR!~C*pj1$IG!25@1 z0(qRlGl9ASjN@N~)<)buhaJ=7z9eui7gDF9(ygF(-Ub=Xnsw2d5viN zpruCm2XV75r3?^TM=)PIh$kY<)_{Z7b^>-674{5wXwT}2A5o@VE)b;E9(}^))6!R8 zoWkQvNmlM_bhc-ePO!c#ec?dP79)v6G<&mU7gj5FpHllSM-5*nzwu~JVz45sf}981 z5Pyk*j4IRucYq2e_on*OYduP6`KFbit7tV*4P~?Z;#PaRUB*PgW-4|o#fn1wCTYaA znxBiPC%fV$#&D~5rYNFw@PR$T!A1wvu#lv`+h<*k&g5@|QEOu7*3bMcP)D#UL3Nko zq?bc&++6Ey5Xv7)C)d-&526T*+`JVFm>F!!^pU&QJdvl$nHG4NtvxFdB3eqqn!GbI zGkI{xU`_}TBMSw?*~2&o}|sUDU5z~kRb z)37we(La(c>w*KKLoAJZGMZKlu3*!Pu!(MPOa}Y8r21r=#)O> zcDA3qEPtz#yHtC(u{C;7f2g}Ip8YgPg&fE#7xR8L2bA|Vsi^|#EMPM)v@u`xDEwU2 zUJdRD2B>Izkeb(<9_H;61N5zW-^brBDqJi=+#fy8iE0f`tK}2CqZA!&bGCBESG&IQ zaO6!xx;OT`tH2{WBUZ*T%wKqUv-!(V&dqt)KzOhl3502E~xC8@28x z?~3CII$J;9IcgFsY5!IBq+w&D_$Y|f!)-PudX?8U$R1|{M+ZjSMPN!!{;DMP8HJdK zAS+K_y;et39#JA9F&mLc6up*HM)=gaIQP>s$m%{8`>M<*E%29cVmC(2t1Spp6V!I?lwn zK&u$FKSa{uUbQE(7#44g-s#%xj?wjWObw>W>3@l!XdP<9ruAmJTTslPL=E5d@U;S; zp03j*h~oE@m;q}TUZ0a*(ZO$S36TfQ9iaXqe_%rQ$MM81fwIY`mXW2y{A)_%kG{4N zN!R_R8$HS6G9LnLfXPU^p^qXa?Urn5`>Yq*^4tp?DBb#;N9^WpFH}4VK~x_a%Wz26 zCPoF#G74BAT|6J^8>c~Z3xl>e6hW~PqL zsdAZefB;Y7VS|M2vs+@sx(E3~shWq-zTgyoP>JyM+OaNQF#6UJ0E0dc5@uwRSJn<@ zG#B41-VNe;I>_`jogWN%pe#-m;_5(>2rW0rD2P4W%&~p-z9h2HlvUA)_~8g%*>dGp zW>R)sv=T9_Bxt0@)sU|#nWQ^oIeb=}|JFdMj_M8|2mO0pyGa)a0Q~E@3OayYChKkA>AFA)qJ_$`RE)Mw^ z&nU6eY{{wj^~;{7B(Ee>20om4ydYf0F^UWSoT0NB?ZE*GYR6LK(jq%71ken|2SC)S z?098y{^R@C?RoXdHj*)1fZ4k%>JvSdo?_QbH7BV#XK(2EVolX;;ZwKc8}6ssLgv*{ zCbS=#_?RxFD%B^act1Dqw_W;3&}1eUW1!<5wMBQv9vXcHIM`N~skj4gzb|*0trIM( z1|_#ua8%^e$D|9&!q=2&ws~>vcF_c;7gu~cC@EIDPE21i>dLF}M7sBeKI$EtD*22P z<;uMn4#yR@3O0GxlK2P1^LJwGnlw7|Nk?%}Rt4p>ejYso z!PLA|k)A$egShvUzQB;#)gm={+EKiSQK`JsTse)#(R-ysa067T)+g!UYJuiyvt>=L6%_2 zY||K?*XLqke3b zjr}IQeQxKgCfla5P@efBA80u66-)8mnvw}V<~?!J0VSqnBI}d^?*8?OevWhN{#89V zx#cyCt+^93u!RMF*OL#6zB!L5TRkQPnyeQWR^cDfK=n5Q+{A6jm6YT{>NOtk)xJOq*IbbWEh^E1EG_wq+}W~qE-o>Zu;`Xpro z*0&RRUmLI5)$T<$XLwewR)5|Xoz>1>$VO#l;%Ki^eJy$!DuTN`si#Nwxi`ajWD&oT>P9aHgXm)y1vc3C zB~X&h__~dKpQ|^DH@;9n=Y}9iaAs<;t2)$2w5gEPtjbr?ZOZp{pmf=*JnaWM8B<1@ zEx_>>_sSnVX-RkY_c(axaF!qZEzMw)495JusW{P_yB6rtKOY|$yrPyp@f0MV1g-ov(G`RD0Q#jS0( z{Ld!5u^;K50Q^j|A7SMYT$FedFH%o&dJpr#+t#|hKbNSyYiB^YmmVC#NrVGGmzP^) z_^7>=a5$6IH?m(lB8FaVX`z@>8ai#X&LoMOgD+N}cq;ja(N1a#s{OnQY4JP4C*8TC z@#Z~V{(askC~o+Q2l%}^wtd~5%nyvbyX%EL9cJe0Nu|tK^jsXaJ(Je_OBKPuqB<0{ zY;N`_`!*Ow#Wv=6uJ*jsLVy)os}9T zYrcr(^hXPF9VYgY@Ep&>V-eRizjO}Lyw1`@TdrFH9yMC>Hk;3T$zE`&gcaPuNuzCO2mx^X5d%-2_)0 zDJ~+d!~2JX=Dje31V50}HS4yuK^t!Ni_qw%J58Y!sL>epy~dIv@oZUOm({Zoez<>n zMjNH*^TSJK31sKf?P5$6!WyZd8nmHmIoFPdyLFNpuzrv9s_pi#lkcek#~h5lXXqLS zXMyGFU%#GCOuwuWR?~IA4eW9RR}a!nhPsNTMF2^MZCA;~17*e`_`Qp%6k1G@$?A*O zEqz*S4|5127%xVPP=K)1Q#(GxbItp;p7?v=OmR8ZXUq4f6f&=LX_tAAiByMcwzQZP zNdjXvIKsK8h1%)vwV-(Tqof@YGwuEZ+;^e@Jo|MXH zb+pO`)?(~*e$*-c4P)BB}^Ye!-HJ)U3Z&Axe%A%}r-LI2l^IoVG8ro&dxK|6Y z_M~~gwL*PisygbSk&JWMgGZ{%+D$s_39EZ6vkTFtQlatz#M!Sb(l!v^A!(`7BJ_cs z`>F>gw+HQ0PkXQ#dhSE`y^91krChJx5j+f`;CQ0XBzt%0E~hs#_J!q@%N^TU*~fyf z-}FX>J9$a(5yy4w(q0V^$CH26XQYjctv4y$RlfJ;g3=X_1Qx_|%EH;t8Fs^-`e3=b zr$czi;QW(G;-*4`ZpwXp&*kJ&R!7I>Cw>)L9|BQIf*hh?^u6mHv{RgtHZFHM7sw>e z_RLl|fc9l3+QX&w&}Z`LTax)HycUqNQXNX^m5>0FgF5$M&;hjxq@ed_kLJ_&>Cw}} zh7#1sLM)-lubeVWjh)kc3GzB879In>5_&!GV(MG%tmrgbrjsyiOh&+4Ae6nTPV0J@ ztW%4TI;mY}?u)f<|CM4SNYa^yl{7apwr4}6cEUdOnpl~i(*Zwm_9A+PF!7-JN8 zR=3Lu)q1O{V`tX%z`SuOB`tM&)L#ywvNf%-|2b9@6;KAk>oEzS6lvFmxouXKeb};APSoe>Iit@qP~3|%>w|fdV{(hBSOUOCU{ZzXJ$C0C z4UH;2;E+h{+{Wx9ei-cJVkN)}l7Yx@Md{fH!MDZX>ia+EKc}?E#$057)_bCPyrdeJ zd`eK;`HW&7&KB5ttjbS!jLwd;OAR_izid(9qCB~$uRe@;86c)nvzf2H(;TAvH4nhc zp00jrrZoPwN4eh1bCVhoj?+>f#UHM$lvVkfK8Hp>Xy}Uhn`uS!?prfft1JYi&4X0Eq{m)WhzC0 zf4x&RY@@{X- zevy>olE|VELmZzZ)0hzy1)^Mh=9eEcdkv%+`BR@aH@6FQN{r0$LZeTM+3hAHDI0^>lD{A+ki zb*MDCVRZERzz%~8yAm-aNwLlKx2LJ!iiL8HG5%+PZOXin#o6a?E47!ox|DtAt*1Q` z%ITbSP#s(5`%01WyPD~Ql*o0`>H)^E&r4%-*<{eJQoXA6$XF!>qH!b5y;MhT%yBLW z-PUorIyKy1Y$lHqjjtRS?JruQ^tA3(a>3 zywq8QNk(S7ARow9`GscujjNglgb7Z^9y7dx<9E^~OTmeLqsxgq$*(!Gya0dush#r@ z*zNFyIhkcOq8-DD&G$!>t$hTPM77*~Ua1X9=v3;+-PzxTawZO)ibeY=5iWq4T!cycdA8 zwzswc;5~A7)W?2SIku=kW4?<(3QrdJ8SkMX%3PkefHr#*wzl`E&0Q(NV?gF%fHK&z zHESLq`$giZ%|E;qb=n+&;sPrC%PBWun&f%;6nZ zIE6)9{a^rW`EACkO~;YeNtmU}t+j$5MvnMeP{96@skY5BGgFG|@Ao^H@0z=dnjGYM zUD9331ul3@nP7N&i=FQ?Q*$QROVOCOb$!1nLn*(~!Uf`Bc`*j|n#x)!xZ;JsAEq|E z#S=U|m-aKc*!m9!|EmmPEPQtv3ULyDJs6z5!RX2eTRa{&LbemGHxq-n!o8V%pfruV zA5Twn9;-E~`BX@BM9bVtoQdgh9mOKzaXrPc=>X7lGM9VdU6bUeOcCSn_H27YofVP3 z>Pu4?cTk?sFr8Tz3pa@?RJ&r37bZCZk?|KnGh*8%PIJSuNS}GSZ@LIxay;8?1fE`$`vY6karXb}%&l^bBQLgYwZN~dKgo{2ob-Jr2d6{*LrCmn zSCw+DczXR~h#f}CnNv4pxoEXPop1U_tX53hfRW+xB! z{_r$PCp4;8Tgij_`2u~Rm;pwNMhQi{lz2#I9ZHdv*gO0EUJv(s>UR@1cfIkG&!th0t=U zX-$vzN;8K4fL+i>>O+DHa&hYk<1K?hm%9_d&!@g9;Zc8WlOY#HF^6O;#@SgR2gGHh z;b_L_xerG;pAsd(-Ja$63b_)Ks0<~c`9@;s*FU~w zs(&LxTBOHo@b4=cYj9eBT z$ICOePX`r;JD}XTLuYZ|`vTLuQidP;2HLa1014MIr)IY=lHTjR|Fsw=fr*Td--Rt_ zy`;e?h;yR++_gs5)D)T_EN<@02{voTKmB!tbQ7~a9)vtv->=;&)eecTW}6cd9?{Yb zrP~(EfTi>&uF0g}KX0HsU2Z+WZ9EZdfq_4(v^zxQ+2T+V*2DXO3QhK6XHg@ayK7dB zeFY~MACm{zCD{`DMH$S-1glGL&ca@UZK`#x^%V#T_!|@gg>x_mj75!EWKELg4cH?4 zV>__ZWZ(;ks9VTJqH>$yT32Fnfm(KtYO+EXJfxRi;0TNL6+C_;!*UD{nbLKepNu!> zUCNx!hi=mFA{De$+RSvHjX>Jfq@uN_Xd_OR1)F34oHUB6BT||JliPZO?8D_P!Lx-S zrN8_o0#G(j(i+%Ii&jwxoWH?AaeueqRrNn(r_reRz0c2^28Dl8yTC%&{AP})CsKR=Xp6Z9& zvPR@hKZIFmIf~%aJN)JBi@gF#6eQvn>5oJwIxura;2l|WABW&`t~5!@J*=v;^(BR+ zUf#Gui;pa)SIzjF_heJ4aln&=!>-_o@dVbcX1&Hhk}>l9#DYYs-4+SrC;f~n4fHj_ zQD+;mNg&J!k<^>gn!G!X60{il}B zZY-}rawWR{*ByZJ{RA^Y=1NOX>(iJEm`C!ia>xBdvqH5a?7G;L{g&my5Q{>PBM6xb zgl{$)0bc0-s!6@qlLLfF@6}s6Z*sfb$&OA{>DSNl;5{wV`+Zi;u2Ev`5p*vF$1eD{ z>uvI)1osi-q*3A#|5^**H`?rYcGfyA=qPNX1SVweFR6$3lnx~<&JMaLnXJUwQznv)QziHzwz$w@GqKeMS~@-3|BQEL~hn2}~| zeal7GZ%UKHACPhx5Yeh)9L*Q2OK@{cm<+*+VdJ*Ln$EtDt9grRs30KDm&4;GM(|mQ z7aqsyDhi6Ba&8H^Q5mYqjq{*;EghUNgjz8l-b)njIv?J{HJhAs#Ndz2>FGT%t-_ZC zUQGMnd4HD&Ru}PTb$)TWwBs|}uxi)wc}VmON>t5Lk}kbZPir5ey~r!aOmEbaJ0njk z56TPMAc5#zap-YMgjIJPq+Gfu<4jW_Q`R*|)+FG5$8y*Hc zGB&o1HCGl&`K_ujJn~}dAmGiS0AQTNtwpL{Bn7Qh!k zZj3=%A1ij<9+R1|rbK71BFI{ln~uR=}-6^?d|2;`EuB^YNpYiQ$9TFHx~v*YJw4BuI^isfPd z#04T-{oA0^vx3{``|6Q>6M6^vbGN1atZR!=l*wTn-p$v z&Q|H!gy-@1ebl?(r(7RrJ6Q}vx)gNeMv4$5^8{du(KisYXSe+-X7#BH_ikeS50YAz z^g={Z&fDh?{=PhZZTiKg($qhWT_4Ui&c;zuBCS+LGv~ik&X>-y^kv`PfV+%+~WlV(|kb_{3+c9TO?wM)pj||;g7>qq+8z~5^Pk4~Le!Ml- zzdGRnY@BfhxYc}8Q*wU}vSy{Dr|#^Nym1UVV{+BFI_=<>sq%&x*mOGsuMlgCh@XVS z%kC#Nj4oDoYT>(=3X_;vaK_Cch(q`y4#233w0ha(bH%_+Ldu#2+_gkgUsNVOD1`+tS3N=$} z4nUKxGbWCObnFRZ+QRG1^wEL~^?Sfz_(?{Vf-iw0_S*h34jeFnc3f9fOSy)pc}@!B;hph(PjM^yjYjU^THjlXK+ zTriV$k*Z3q zcx{|P5Vt1!g_pZa9a;D{Q`>5?@;vvGh=op1?-+#f*q84Og-&k-@nn*uPci$lY&(>l zAoE`VU`P$y+f!RG{*%VeKK^t}Yh&nq6tMe33=xg>|`dp;c z-M3ZO3(1Xjekrlb8d)TDm#Tw}m`|SwCgpoi_fpK;2`)|s^>YFa@@pm&__78T96kMy z<90;V6Meb&24AaMX-9STAIZCE%oXYg;;gw>s+gO0&&Tb{pI-i5YLt7(^tiPmC8bz~Bi~G~2l5!S!YURcI;34$5+|#e?_|On^e~|x5Qv0ea&yJNpY2d6Wq|+$1XUQ%zg@4Mh z=FljLF%vryrGL#b4sSwLCK^i&rcQatcnpv9Yc zJ)M`Ak_>U#p&wm39yH1f@KRns!(2KEnv|z%`kqeX9KWxlO5TA6P;UQQajxvZ_TgtOMrag1-X6dkRCee%sd&+sS#3K=?S82jH{a z!6$UBk^lL}3Goqn>Sdb9&GPu$+cWfyeEbG{BjEPO4aY0eNcLw;@BVp^|B&j%zuWoO z`ETCUy_tUTdH&8jDg{6v71aYWDteaJd`_R`zUdd$di?3rv!wmlLxEq&oLg#Xn#&Bq znrlj4K8k9Ytpwh~ZOS$_-iM1-(U9 zT$F*0PU`~!3nk&#yLp-Ylf!a@bB`>X$3gR^xag~`xL01D)23b(hrWu%P`i$GgSj@W z#@yMWN#O6-sz88{_r_oVR%!9HF2GxjgM^2Om7JuV;(ppBvznK?kG6OoPFfh5<&I@Tx0eAs*suY(qgUh_`|MqG1+xj^UTy^TM+`LZ|sU%&#k`<@9`10To ztw+>Zm>5>6N_pikA8q?#Cw@d1J{&P_)dZ?2egz*=VfnH`Yw?*<7<_1c*#ZUy>P8(r z#+}~?V8_lWHr-AA2cyKUy*!6?6_1eaTNu;B0o;mfS~uwSGdBdEjvbc>6=1uWlCaoU z6usqC5N5k=R1mDuBpHhU$V-$N&65vZ)dpATK9G|4szAuqGnq zZAyM6#xwSBH}V}N1r#sfK}|Vx z+-J$mc#`P7VIx((og-T~k?>ES=5%ecZF6zDY}z8)&jgXj6X&iO6?}Zzavx0wAW|pP z>_JPeYjewFaOvJ^c9X2=xm$VK)jKbQX~(&x*MQEf&cNS5cfsQiLBNof8QbQYj-Wra zclKJ|_M#upHD~gY{+$WtM9*Pag;!4pJe!o0WCJy;_lVa#_juuGu?|N>S}#4=>n;VvPk>{u$Az*r zSAreTHDl{A6fs=L(VF-_P>NCP-5f^crxP9A+gWOkoeIc^~>xym)BPT9PCqmx!-{hYPQ?~L9 z2o_o_RAgnL9MOKCJnCLzd$7or^p zAGC$opAj#Fw#)}1?4|QqbaMPv8c2Y(_v^Y9L?*dm?>l#WT|XbDmWi;y8M%_N?6KuQtM42 z^_U-v=XuHS-Sn%$KY!bOP0VrPmp_#2t~d8-7Se8^oorC#n=x)|q zKAZx=sk1`kGrl)VILmcIk}o3MZoIlIU!9AcuaQAa%F0ffe$*X+DIPP6E-0D1Y0F8F z*4CT4m$Y1?fxhQO_)s@R9yA~)Z5K)3P6p% z5zUWWcAtRrjVn%zwz0 zAl=mOp!c?eSxA88;KtAS0A)wDd@ggXjx(>5=8TnBjrh(-W`_6+Jlwqk`%ZdQSwz}G zvGk1RcQ93e&yN?KsE9|&%Ek_2RwZ}?z2mxC<*kYql_j={E>_5@cOD(L%x#^Uc%kU( zfpd4!Y#b@#px#bl&*;)U68l2UxTw5IZ_*|cpWV<<=$O-NV1MM!<+Y;mwVx5QW8{Y& zeO4v2ikLCj`!?T<&Q%U988#|SI;SRww1@(7R%j!44n%1$AVe$;>q}42?>if26dO9S zj`zlJ!_VQjmE{f55r_lvbX-$~iyaH`iocXaCUFo4aUwi}=tVc5@=+Rj>wOdB&PM|A zpYt}a*D@#y&)aZiY#8(j2|4lMO1mvb(^uK~?nXuTeqvs#=97u%H9tD*BZD-;Ay)f4 zp_#ZB#)u876Cx6x<8AtH)gCVNKhQp~v!Kp&1e9~L>6AM1h=dICCV5!zybjXB+ec$) zUlG%LtoYTC5o1K8J3L{RqkX^=_7|W!Y3EA<>J`r{!~YM3SX-R{ literal 0 HcmV?d00001 diff --git a/readme.md b/readme.md index 2aefe72..f36b2e7 100644 --- a/readme.md +++ b/readme.md @@ -113,7 +113,7 @@ [详见开发者手册](./doc/开发者手册.md) -[AI聊天](./doc/ai_readme.md) +[AI聊天](./MemoAI/readme.md) ## PC端使用过程中部分问题解决(可参考)