From 194064fdae0226dd22522586c9d47c5866a71a8e Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 2 Sep 2024 01:02:25 +0800 Subject: [PATCH] add pokemon dataset --- README.md | 1 + README_zh.md | 1 + data/dataset_info.json | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/README.md b/README.md index eca3d1a1..e0ccb8e0 100644 --- a/README.md +++ b/README.md @@ -275,6 +275,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) - [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) +- [Pokemon-gpt4o-captions](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) - [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de) diff --git a/README_zh.md b/README_zh.md index 256ebd1d..94a004ec 100644 --- a/README_zh.md +++ b/README_zh.md @@ -276,6 +276,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 - [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) - [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) +- [Pokemon-gpt4o-captions](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) - [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de) diff --git a/data/dataset_info.json b/data/dataset_info.json index 02597150..e95599c6 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -340,6 +340,14 @@ "assistant_tag": "assistant" } }, + "pokemon_cap": { + "hf_hub_url": "llamafactory/pokemon-gpt4o-captions", + "formatting": "sharegpt", + "columns": { + "messages": "conversations", + "images": "images" + } + }, "mllm_pt_demo": { "hf_hub_url": "BUAADreamer/mllm_pt_demo", "formatting": "sharegpt",