From 4318347d3f1982c773dad1074636ec7b550770fd Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Tue, 22 Aug 2023 19:46:09 +0800
Subject: [PATCH] update template

---
 README.md                               |  2 +-
 README_zh.md                            |  2 +-
 data/README.md                          | 15 ++++++++-------
 data/README_zh.md                       | 16 ++++++++--------
 src/llmtuner/extras/template.py         | 16 ++++++++++++++++
 src/llmtuner/hparams/finetuning_args.py |  7 ++++---
 6 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index dcb0e1b7..0db73b16 100644
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@
 | [Baichuan](https://github.com/baichuan-inc/baichuan-13B) | 7B/13B                      | W_pack            | baichuan |
 | [InternLM](https://github.com/InternLM/InternLM)         | 7B                          | q_proj,v_proj     | intern   |
 | [Qwen](https://github.com/QwenLM/Qwen-7B)                | 7B                          | c_attn            | chatml   |
-| [XVERSE](https://github.com/xverse-ai/XVERSE-13B)        | 13B                         | q_proj,v_proj     | -        |
+| [XVERSE](https://github.com/xverse-ai/XVERSE-13B)        | 13B                         | q_proj,v_proj     | xverse   |
 | [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)         | 6B                          | query_key_value   | chatglm2 |
 
 - **Default module** is used for the `--lora_target` argument. Please use `python src/train_bash.py -h` to see all available options.
diff --git a/README_zh.md b/README_zh.md
index a021af21..ec4a524c 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -58,7 +58,7 @@
 | [Baichuan](https://github.com/baichuan-inc/baichuan-13B) | 7B/13B                      | W_pack            | baichuan |
 | [InternLM](https://github.com/InternLM/InternLM)         | 7B                          | q_proj,v_proj     | intern   |
 | [Qwen](https://github.com/QwenLM/Qwen-7B)                | 7B                          | c_attn            | chatml   |
-| [XVERSE](https://github.com/xverse-ai/XVERSE-13B)        | 13B                         | q_proj,v_proj     | -        |
+| [XVERSE](https://github.com/xverse-ai/XVERSE-13B)        | 13B                         | q_proj,v_proj     | xverse   |
 | [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)         | 6B                          | query_key_value   | chatglm2 |
 
 - **默认模块**是 `--lora_target` 参数的部分可选项。请使用 `python src/train_bash.py -h` 查看全部可选项。
diff --git a/data/README.md b/data/README.md
index 45ea7dad..dc1c8bce 100644
--- a/data/README.md
+++ b/data/README.md
@@ -17,14 +17,15 @@ If you are using a custom dataset, please provide your dataset definition in the
 
 where the `prompt` and `response` columns should contain non-empty values. The `query` column will be concatenated with the `prompt` column and used as input for the model. The `history` column should contain a list where each element is a string tuple representing a query-response pair.
 
-For Reward-Modeling(rm) dataset, the first n examples represent chosen examples and the last n examples represent rejected examples.
+For datasets used in reward modeling or DPO training, the `response` column should be a string list, with the preferred answers appearing first, for example:
+
 ```json
 {
-    "instruction": "Question?",
-    "input": "",
-    "output": [
-       "chosen answer",
-       "rejected answer"
-    ]
+  "instruction": "Question",
+  "input": "",
+  "output": [
+    "Chosen answer",
+    "Rejected answer"
+  ]
 }
 ```
diff --git a/data/README_zh.md b/data/README_zh.md
index a36b3750..054ee8ea 100644
--- a/data/README_zh.md
+++ b/data/README_zh.md
@@ -17,15 +17,15 @@
 
 其中 `prompt` 和 `response` 列应当是非空的字符串。`query` 列的内容将会和 `prompt` 列拼接作为模型输入。`history` 列应当是一个列表，其中每个元素是一个字符串二元组，分别代表用户请求和模型答复。
 
-对于奖励模型(rm)的数据集，头N个输出表示`chosen`的数据，后N个输出表示`rejected`的数据，例如：
+对于奖励模型或 DPO 训练的数据集，`response` 列应当是一个字符串列表，排在前面的代表更优的答案，例如：
+
 ```json
 {
-    "instruction": "Question?",
-    "input": "",
-    "output": [
-       "chosen answer",
-       "rejected answer"
-    ]
+  "instruction": "Question",
+  "input": "",
+  "output": [
+    "Chosen answer",
+    "Rejected answer"
+  ]
 }
-
 ```
diff --git a/src/llmtuner/extras/template.py b/src/llmtuner/extras/template.py
index b53c5893..2eedfa9d 100644
--- a/src/llmtuner/extras/template.py
+++ b/src/llmtuner/extras/template.py
@@ -589,3 +589,19 @@ register_template(
         "\n\n"
     ]
 )
+
+
+r"""
+Supports: https://huggingface.co/xverse/XVERSE-13B-Chat
+"""
+register_template(
+    name="xverse",
+    prefix=[
+        "{{system}}"
+    ],
+    prompt=[
+        "Human: {{query}}\n\nAssistant: "
+    ],
+    system="",
+    sep=[]
+)
diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py
index 5af4549e..bda0adaf 100644
--- a/src/llmtuner/hparams/finetuning_args.py
+++ b/src/llmtuner/hparams/finetuning_args.py
@@ -21,7 +21,8 @@ class FinetuningArguments:
                   Falcon choices: [\"32\", \"60\"], \
                   Baichuan choices: [\"32\", \"40\"] \
                   Qwen choices: [\"32\"], \
-                  XVERSE choices: [\"40\"]"}
+                  XVERSE choices: [\"40\"], \
+                  ChatGLM2 choices: [\"28\"]"}
     )
     num_layer_trainable: Optional[int] = field(
         default=3,
@@ -31,7 +32,7 @@ class FinetuningArguments:
         default="mlp",
         metadata={"help": "Name of trainable modules for partial-parameter (freeze) fine-tuning. \
                   LLaMA choices: [\"mlp\", \"self_attn\"], \
-                  BLOOM & Falcon choices: [\"mlp\", \"self_attention\"], \
+                  BLOOM & Falcon & ChatGLM2 choices: [\"mlp\", \"self_attention\"], \
                   Baichuan choices: [\"mlp\", \"self_attn\"], \
                   Qwen choices: [\"mlp\", \"attn\"], \
                   LLaMA-2, InternLM, XVERSE choices: the same as LLaMA."}
@@ -52,7 +53,7 @@ class FinetuningArguments:
         default=None,
         metadata={"help": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules. \
                   LLaMA choices: [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
-                  BLOOM & Falcon choices: [\"query_key_value\", \"self_attention.dense\", \"mlp.dense\"], \
+                  BLOOM & Falcon & ChatGLM2 choices: [\"query_key_value\", \"self_attention.dense\", \"mlp.dense\"], \
                   Baichuan choices: [\"W_pack\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
                   Qwen choices: [\"c_attn\", \"attn.c_proj\", \"w1\", \"w2\", \"mlp.c_proj\"], \
                   LLaMA-2, InternLM, XVERSE choices: the same as LLaMA."}