fix #3602

2024-05-07 17:50:27 +08:00 · 2024-05-07 17:50:27 +08:00 · b0888262e3
parent 6159acbaa0
commit b0888262e3
7 changed files with 43 additions and 5 deletions
--- a/examples/README.md
+++ b/examples/README.md
@ -148,6 +148,8 @@ bash examples/full_multi_gpu/predict.sh
 #### Merge LoRA Adapters
 Note: DO NOT use quantized model or `quantization_bit` when merging LoRA adapters.
 ```bash
 CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@ -148,6 +148,8 @@ bash examples/full_multi_gpu/predict.sh
 #### 合并 LoRA 适配器
 注：请勿使用量化后的模型或 `quantization_bit` 参数来合并 LoRA 适配器。
 ```bash
 CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```
--- a/examples/full_multi_gpu/multi_node.sh
+++ b/examples/full_multi_gpu/multi_node.sh
@ -1,6 +1,12 @@
 #!/bin/bash
-python -m torch.distributed.run \
+NPROC_PER_NODE=4
 NNODES=2
 RANK=0
 MASTER_ADDR=192.168.0.1
 MASTER_PORT=29500
 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.run \
    --nproc_per_node $NPROC_PER_NODE \
    --nnodes $NNODES \
    --node_rank $RANK \
--- a/examples/full_multi_gpu/single_node.sh
+++ b/examples/full_multi_gpu/single_node.sh
@ -1,4 +1,9 @@
 #!/bin/bash
-deepspeed --include "localhost:0,1,2,3" \
+NPROC_PER_NODE=4
 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.run \
    --nproc_per_node $NPROC_PER_NODE \
    --nnodes 1 \
    --standalone \
    src/train.py examples/full_multi_gpu/llama3_full_sft.yaml
--- a/examples/lora_multi_gpu/ds_zero3.sh
+++ b/examples/lora_multi_gpu/ds_zero3.sh
@ -1,5 +1,9 @@
 #!/bin/bash
 # ZeRO-3 enables weight sharding on multiple GPUs
-deepspeed --include "localhost:0,1,2,3" \
+NPROC_PER_NODE=4
 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.run \
    --nproc_per_node $NPROC_PER_NODE \
    --nnodes 1 \
    --standalone \
    src/train.py examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
--- a/examples/merge_lora/llama3_lora_sft.yaml
+++ b/examples/merge_lora/llama3_lora_sft.yaml
@ -1,4 +1,4 @@
-# Note: DO NOT use quantized model or quantization_bit when merging lora weights
+# Note: DO NOT use quantized model or quantization_bit when merging lora adapters
 # model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
--- a/src/api.py
+++ b/src/api.py
@ -0,0 +1,19 @@
 import os
 import uvicorn
 from llmtuner.api.app import create_app
 from llmtuner.chat import ChatModel
 def main():
    chat_model = ChatModel()
    app = create_app(chat_model)
    api_host = os.environ.get("API_HOST", "0.0.0.0")
    api_port = int(os.environ.get("API_PORT", "8000"))
    print("Visit http://localhost:{}/docs for API document.".format(api_port))
    uvicorn.run(app, host=api_host, port=api_port)
 if __name__ == "__main__":
    main()