diff --git a/.env.local b/.env.local index d3013934..a58ed445 100644 --- a/.env.local +++ b/.env.local @@ -24,10 +24,12 @@ WANDB_DISABLED= WANDB_PROJECT=huggingface WANDB_API_KEY= # gradio ui -GRADIO_SHARE=0 +GRADIO_SHARE=False GRADIO_SERVER_NAME=0.0.0.0 GRADIO_SERVER_PORT= GRADIO_ROOT_PATH= +# setup +ENABLE_SHORT_CONSOLE=1 # reserved (do not use) LLAMABOARD_ENABLED= LLAMABOARD_WORKDIR= diff --git a/README.md b/README.md index a84980e5..8bc99730 100644 --- a/README.md +++ b/README.md @@ -275,7 +275,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) - [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) -- [Pokemon-gpt4o-captions](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions) +- [Pokemon-gpt4o-captions (en&zh)](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) - [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de) diff --git a/README_zh.md b/README_zh.md index 692b2c4a..e80a2104 100644 --- a/README_zh.md +++ b/README_zh.md @@ -276,7 +276,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 - [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) - [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) -- [Pokemon-gpt4o-captions](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions) +- [Pokemon-gpt4o-captions (en&zh)](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) - [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de) diff --git a/scripts/cal_mfu.py b/scripts/cal_mfu.py index a04c388a..36562e8a 100644 --- a/scripts/cal_mfu.py +++ b/scripts/cal_mfu.py @@ -131,6 +131,9 @@ def calculate_mfu( "dataset": "c4_demo", "cutoff_len": seq_length, "output_dir": os.path.join("saves", "test_mfu"), + "logging_strategy": "no", + "save_strategy": "no", + "save_only_model": True, "overwrite_output_dir": True, "per_device_train_batch_size": batch_size, "max_steps": num_steps, diff --git a/setup.py b/setup.py index e3184823..a80cb81b 100644 --- a/setup.py +++ b/setup.py @@ -14,11 +14,12 @@ import os import re +from typing import List from setuptools import find_packages, setup -def get_version(): +def get_version() -> str: with open(os.path.join("src", "llamafactory", "extras", "env.py"), "r", encoding="utf-8") as f: file_content = f.read() pattern = r"{}\W*=\W*\"([^\"]+)\"".format("VERSION") @@ -26,13 +27,21 @@ def get_version(): return version -def get_requires(): +def get_requires() -> List[str]: with open("requirements.txt", "r", encoding="utf-8") as f: file_content = f.read() lines = [line.strip() for line in file_content.strip().split("\n") if not line.startswith("#")] return lines +def get_console_scripts() -> List[str]: + console_scripts = ["llamafactory-cli = llamafactory.cli:main"] + if os.environ.get("ENABLE_SHORT_CONSOLE", "1").lower() in ["true", "1"]: + console_scripts.append("lmf = llamafactory.cli:main") + + return console_scripts + + extra_require = { "torch": ["torch>=1.13.1"], "torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"], @@ -72,7 +81,7 @@ def main(): python_requires=">=3.8.0", install_requires=get_requires(), extras_require=extra_require, - entry_points={"console_scripts": ["llamafactory-cli = llamafactory.cli:main"]}, + entry_points={"console_scripts": get_console_scripts()}, classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 00af7d75..89fcfb91 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -829,7 +829,7 @@ register_model_group( register_model_group( models={ - "MiniCPM3-4B": { + "MiniCPM3-4B-Chat": { DownloadSource.DEFAULT: "openbmb/MiniCPM3-4B", DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM3-4B", }, diff --git a/src/llamafactory/train/callbacks.py b/src/llamafactory/train/callbacks.py index 69c6bf43..105acadb 100644 --- a/src/llamafactory/train/callbacks.py +++ b/src/llamafactory/train/callbacks.py @@ -96,38 +96,45 @@ def fix_valuehead_checkpoint( class FixValueHeadModelCallback(TrainerCallback): + r""" + A callback for fixing the checkpoint for valuehead models. + """ + @override def on_save(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): r""" Event called after a checkpoint save. """ if args.should_save: + output_dir = os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step)) fix_valuehead_checkpoint( - model=kwargs.pop("model"), - output_dir=os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step)), - safe_serialization=args.save_safetensors, + model=kwargs.pop("model"), output_dir=output_dir, safe_serialization=args.save_safetensors ) class SaveProcessorCallback(TrainerCallback): + r""" + A callback for saving the processor. + """ + def __init__(self, processor: "ProcessorMixin") -> None: - r""" - Initializes a callback for saving the processor. - """ self.processor = processor + @override + def on_save(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): + if args.should_save: + output_dir = os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step)) + getattr(self.processor, "image_processor").save_pretrained(output_dir) + @override def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): - r""" - Event called at the end of training. - """ if args.should_save: getattr(self.processor, "image_processor").save_pretrained(args.output_dir) class PissaConvertCallback(TrainerCallback): r""" - Initializes a callback for converting the PiSSA adapter to a normal one. + A callback for converting the PiSSA adapter to a normal one. """ @override @@ -147,9 +154,6 @@ class PissaConvertCallback(TrainerCallback): @override def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): - r""" - Event called at the end of training. - """ if args.should_save: model = kwargs.pop("model") pissa_init_dir = os.path.join(args.output_dir, "pissa_init") @@ -177,21 +181,22 @@ class PissaConvertCallback(TrainerCallback): class LogCallback(TrainerCallback): + r""" + A callback for logging training and evaluation status. + """ + def __init__(self) -> None: - r""" - Initializes a callback for logging training and evaluation status. - """ - """ Progress """ + # Progress self.start_time = 0 self.cur_steps = 0 self.max_steps = 0 self.elapsed_time = "" self.remaining_time = "" self.thread_pool: Optional["ThreadPoolExecutor"] = None - """ Status """ + # Status self.aborted = False self.do_train = False - """ Web UI """ + # Web UI self.webui_mode = os.environ.get("LLAMABOARD_ENABLED", "0").lower() in ["true", "1"] if self.webui_mode: signal.signal(signal.SIGABRT, self._set_abort) @@ -233,9 +238,6 @@ class LogCallback(TrainerCallback): @override def on_init_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): - r""" - Event called at the end of the initialization of the `Trainer`. - """ if ( args.should_save and os.path.exists(os.path.join(args.output_dir, TRAINER_LOG)) @@ -246,9 +248,6 @@ class LogCallback(TrainerCallback): @override def on_train_begin(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): - r""" - Event called at the beginning of training. - """ if args.should_save: self.do_train = True self._reset(max_steps=state.max_steps) @@ -256,50 +255,32 @@ class LogCallback(TrainerCallback): @override def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): - r""" - Event called at the end of training. - """ self._close_thread_pool() @override def on_substep_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): - r""" - Event called at the end of an substep during gradient accumulation. - """ if self.aborted: control.should_epoch_stop = True control.should_training_stop = True @override def on_step_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): - r""" - Event called at the end of a training step. - """ if self.aborted: control.should_epoch_stop = True control.should_training_stop = True @override def on_evaluate(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): - r""" - Event called after an evaluation phase. - """ if not self.do_train: self._close_thread_pool() @override def on_predict(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): - r""" - Event called after a successful prediction. - """ if not self.do_train: self._close_thread_pool() @override def on_log(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): - r""" - Event called after logging the last logs. - """ if not args.should_save: return @@ -342,9 +323,6 @@ class LogCallback(TrainerCallback): def on_prediction_step( self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs ): - r""" - Event called after a prediction step. - """ if self.do_train: return