diff --git a/examples/accelerate/fsdp_config.yaml b/examples/accelerate/fsdp_config.yaml index abfbf8f6..60025597 100644 --- a/examples/accelerate/fsdp_config.yaml +++ b/examples/accelerate/fsdp_config.yaml @@ -15,8 +15,8 @@ fsdp_config: machine_rank: 0 main_training_function: main mixed_precision: fp16 -num_machines: 1 -num_processes: 2 +num_machines: 1 # the number of nodes +num_processes: 2 # the number of GPUs in all nodes rdzv_backend: static same_network: true tpu_env: [] diff --git a/examples/accelerate/master_config.yaml b/examples/accelerate/master_config.yaml index aa41f7e1..9c8fc275 100644 --- a/examples/accelerate/master_config.yaml +++ b/examples/accelerate/master_config.yaml @@ -8,8 +8,8 @@ main_process_ip: 192.168.0.1 main_process_port: 29555 main_training_function: main mixed_precision: fp16 -num_machines: 2 -num_processes: 16 +num_machines: 2 # the number of nodes +num_processes: 16 # the number of GPUs in all nodes rdzv_backend: static same_network: true tpu_env: [] diff --git a/examples/accelerate/single_config.yaml b/examples/accelerate/single_config.yaml index ddb5c910..97f8c633 100644 --- a/examples/accelerate/single_config.yaml +++ b/examples/accelerate/single_config.yaml @@ -6,8 +6,8 @@ gpu_ids: all machine_rank: 0 main_training_function: main mixed_precision: fp16 -num_machines: 1 -num_processes: 4 +num_machines: 1 # the number of nodes +num_processes: 4 # the number of GPUs in all nodes rdzv_backend: static same_network: true tpu_env: [] diff --git a/examples/accelerate/slave_config.yaml b/examples/accelerate/slave_config.yaml index fcb4bb93..e4a63e82 100644 --- a/examples/accelerate/slave_config.yaml +++ b/examples/accelerate/slave_config.yaml @@ -8,8 +8,8 @@ main_process_ip: 192.168.0.1 main_process_port: 29555 main_training_function: main mixed_precision: fp16 -num_machines: 2 -num_processes: 16 +num_machines: 2 # the number of nodes +num_processes: 16 # the number of GPUs in all nodes rdzv_backend: static same_network: true tpu_env: [] diff --git a/examples/merge_lora/merge.sh b/examples/merge_lora/merge.sh index bd2babb8..c1f15fce 100644 --- a/examples/merge_lora/merge.sh +++ b/examples/merge_lora/merge.sh @@ -1,4 +1,5 @@ #!/bin/bash +# DO NOT use quantized model or quantization_bit when merging lora weights CUDA_VISIBLE_DEVICES= python ../../src/export_model.py \ --model_name_or_path meta-llama/Llama-2-7b-hf \