From 4318347d3f1982c773dad1074636ec7b550770fd Mon Sep 17 00:00:00 2001 From: hiyouga Date: Tue, 22 Aug 2023 19:46:09 +0800 Subject: [PATCH] update template --- README.md | 2 +- README_zh.md | 2 +- data/README.md | 15 ++++++++------- data/README_zh.md | 16 ++++++++-------- src/llmtuner/extras/template.py | 16 ++++++++++++++++ src/llmtuner/hparams/finetuning_args.py | 7 ++++--- 6 files changed, 38 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index dcb0e1b7..0db73b16 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ | [Baichuan](https://github.com/baichuan-inc/baichuan-13B) | 7B/13B | W_pack | baichuan | | [InternLM](https://github.com/InternLM/InternLM) | 7B | q_proj,v_proj | intern | | [Qwen](https://github.com/QwenLM/Qwen-7B) | 7B | c_attn | chatml | -| [XVERSE](https://github.com/xverse-ai/XVERSE-13B) | 13B | q_proj,v_proj | - | +| [XVERSE](https://github.com/xverse-ai/XVERSE-13B) | 13B | q_proj,v_proj | xverse | | [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B) | 6B | query_key_value | chatglm2 | - **Default module** is used for the `--lora_target` argument. Please use `python src/train_bash.py -h` to see all available options. diff --git a/README_zh.md b/README_zh.md index a021af21..ec4a524c 100644 --- a/README_zh.md +++ b/README_zh.md @@ -58,7 +58,7 @@ | [Baichuan](https://github.com/baichuan-inc/baichuan-13B) | 7B/13B | W_pack | baichuan | | [InternLM](https://github.com/InternLM/InternLM) | 7B | q_proj,v_proj | intern | | [Qwen](https://github.com/QwenLM/Qwen-7B) | 7B | c_attn | chatml | -| [XVERSE](https://github.com/xverse-ai/XVERSE-13B) | 13B | q_proj,v_proj | - | +| [XVERSE](https://github.com/xverse-ai/XVERSE-13B) | 13B | q_proj,v_proj | xverse | | [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B) | 6B | query_key_value | chatglm2 | - **默认模块**是 `--lora_target` 参数的部分可选项。请使用 `python src/train_bash.py -h` 查看全部可选项。 diff --git a/data/README.md b/data/README.md index 45ea7dad..dc1c8bce 100644 --- a/data/README.md +++ b/data/README.md @@ -17,14 +17,15 @@ If you are using a custom dataset, please provide your dataset definition in the where the `prompt` and `response` columns should contain non-empty values. The `query` column will be concatenated with the `prompt` column and used as input for the model. The `history` column should contain a list where each element is a string tuple representing a query-response pair. -For Reward-Modeling(rm) dataset, the first n examples represent chosen examples and the last n examples represent rejected examples. +For datasets used in reward modeling or DPO training, the `response` column should be a string list, with the preferred answers appearing first, for example: + ```json { - "instruction": "Question?", - "input": "", - "output": [ - "chosen answer", - "rejected answer" - ] + "instruction": "Question", + "input": "", + "output": [ + "Chosen answer", + "Rejected answer" + ] } ``` diff --git a/data/README_zh.md b/data/README_zh.md index a36b3750..054ee8ea 100644 --- a/data/README_zh.md +++ b/data/README_zh.md @@ -17,15 +17,15 @@ 其中 `prompt` 和 `response` 列应当是非空的字符串。`query` 列的内容将会和 `prompt` 列拼接作为模型输入。`history` 列应当是一个列表,其中每个元素是一个字符串二元组,分别代表用户请求和模型答复。 -对于奖励模型(rm)的数据集,头N个输出表示`chosen`的数据,后N个输出表示`rejected`的数据,例如: +对于奖励模型或 DPO 训练的数据集,`response` 列应当是一个字符串列表,排在前面的代表更优的答案,例如: + ```json { - "instruction": "Question?", - "input": "", - "output": [ - "chosen answer", - "rejected answer" - ] + "instruction": "Question", + "input": "", + "output": [ + "Chosen answer", + "Rejected answer" + ] } - ``` diff --git a/src/llmtuner/extras/template.py b/src/llmtuner/extras/template.py index b53c5893..2eedfa9d 100644 --- a/src/llmtuner/extras/template.py +++ b/src/llmtuner/extras/template.py @@ -589,3 +589,19 @@ register_template( "\n\n" ] ) + + +r""" +Supports: https://huggingface.co/xverse/XVERSE-13B-Chat +""" +register_template( + name="xverse", + prefix=[ + "{{system}}" + ], + prompt=[ + "Human: {{query}}\n\nAssistant: " + ], + system="", + sep=[] +) diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py index 5af4549e..bda0adaf 100644 --- a/src/llmtuner/hparams/finetuning_args.py +++ b/src/llmtuner/hparams/finetuning_args.py @@ -21,7 +21,8 @@ class FinetuningArguments: Falcon choices: [\"32\", \"60\"], \ Baichuan choices: [\"32\", \"40\"] \ Qwen choices: [\"32\"], \ - XVERSE choices: [\"40\"]"} + XVERSE choices: [\"40\"], \ + ChatGLM2 choices: [\"28\"]"} ) num_layer_trainable: Optional[int] = field( default=3, @@ -31,7 +32,7 @@ class FinetuningArguments: default="mlp", metadata={"help": "Name of trainable modules for partial-parameter (freeze) fine-tuning. \ LLaMA choices: [\"mlp\", \"self_attn\"], \ - BLOOM & Falcon choices: [\"mlp\", \"self_attention\"], \ + BLOOM & Falcon & ChatGLM2 choices: [\"mlp\", \"self_attention\"], \ Baichuan choices: [\"mlp\", \"self_attn\"], \ Qwen choices: [\"mlp\", \"attn\"], \ LLaMA-2, InternLM, XVERSE choices: the same as LLaMA."} @@ -52,7 +53,7 @@ class FinetuningArguments: default=None, metadata={"help": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules. \ LLaMA choices: [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \ - BLOOM & Falcon choices: [\"query_key_value\", \"self_attention.dense\", \"mlp.dense\"], \ + BLOOM & Falcon & ChatGLM2 choices: [\"query_key_value\", \"self_attention.dense\", \"mlp.dense\"], \ Baichuan choices: [\"W_pack\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \ Qwen choices: [\"c_attn\", \"attn.c_proj\", \"w1\", \"w2\", \"mlp.c_proj\"], \ LLaMA-2, InternLM, XVERSE choices: the same as LLaMA."}