diff --git a/README.md b/README.md index 2b74bdb5..78312e07 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ [24/02/05] Qwen1.5 (Qwen2 beta version) series models are supported in LLaMA-Factory. Check this [blog post](https://qwenlm.github.io/blog/qwen1.5/) for details. -[24/01/18] We supported **agent tuning** for most models, equipping model with tool using abilities by fine-tuning with `dataset: glaive_toolcall`. +[24/01/18] We supported **agent tuning** for most models, equipping model with tool using abilities by fine-tuning with `dataset: glaive_toolcall_en`. [23/12/23] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s implementation to boost LoRA tuning for the LLaMA, Mistral and Yi models. Try `use_unsloth: true` argument to activate unsloth patch. It achieves **170%** speed in our benchmark, check [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison) for details. diff --git a/README_zh.md b/README_zh.md index fa59a554..5acf3dd1 100644 --- a/README_zh.md +++ b/README_zh.md @@ -107,7 +107,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd [24/02/05] Qwen1.5(Qwen2 测试版)系列模型已在 LLaMA-Factory 中实现微调支持。详情请查阅该[博客页面](https://qwenlm.github.io/zh/blog/qwen1.5/)。 -[24/01/18] 我们针对绝大多数模型实现了 **Agent 微调**,微调时指定 `dataset: glaive_toolcall` 即可使模型获得工具调用能力。 +[24/01/18] 我们针对绝大多数模型实现了 **Agent 微调**,微调时指定 `dataset: glaive_toolcall_zh` 即可使模型获得工具调用能力。 [23/12/23] 我们针对 LLaMA, Mistral 和 Yi 模型支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的 LoRA 训练加速。请使用 `use_unsloth: true` 参数启用 unsloth 优化。该方法可提供 **170%** 的训练速度,详情请查阅[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。 diff --git a/data/dataset_info.json b/data/dataset_info.json index 5a90e077..0506e23d 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -262,6 +262,36 @@ "ruozhiba_gpt4": { "hf_hub_url": "hfl/ruozhiba_gpt4_turbo" }, + "llava_1k_en": { + "hf_hub_url": "BUAADreamer/llava-en-zh-2k", + "subset": "en", + "formatting": "sharegpt", + "columns": { + "messages": "messages", + "images": "images" + }, + "tags": { + "role_tag": "role", + "content_tag": "content", + "user_tag": "user", + "assistant_tag": "assistant" + } + }, + "llava_1k_zh": { + "hf_hub_url": "BUAADreamer/llava-en-zh-2k", + "subset": "zh", + "formatting": "sharegpt", + "columns": { + "messages": "messages", + "images": "images" + }, + "tags": { + "role_tag": "role", + "content_tag": "content", + "user_tag": "user", + "assistant_tag": "assistant" + } + }, "llava_150k_en": { "hf_hub_url": "BUAADreamer/llava-en-zh-300k", "subset": "en", diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/lora_single_gpu/llama3_lora_dpo.yaml index 958be1b5..f68244b7 100644 --- a/examples/lora_single_gpu/llama3_lora_dpo.yaml +++ b/examples/lora_single_gpu/llama3_lora_dpo.yaml @@ -6,6 +6,7 @@ stage: dpo do_train: true finetuning_type: lora lora_target: q_proj,v_proj +pref_beta: 0.1 pref_loss: sigmoid # [sigmoid (dpo), orpo, simpo] ### dataset