From 577de2fa078ee62d61bca24b75fa59590e2ef66b Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 12 Jun 2024 16:50:11 +0800 Subject: [PATCH] fix #4242 --- Dockerfile | 2 +- src/llamafactory/chat/vllm_engine.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 45849601..3932ff30 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,7 +32,7 @@ RUN EXTRA_PACKAGES="metrics"; \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ pip install -e .[$EXTRA_PACKAGES] && \ - pip uninstall -y transformer-engine + pip uninstall -y transformer-engine flash-attn # Set up volumes VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index d096f6eb..d2850a6e 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -13,10 +13,11 @@ from .base_engine import BaseEngine, Response if is_vllm_available(): from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams from vllm.lora.request import LoRARequest + try: - from vllm.multimodal import MultiModalData # vllm==0.5.0 + from vllm.multimodal import MultiModalData # type: ignore (for vllm>=0.5.0) except ImportError: - from vllm.sequence import MultiModalData # vllm<0.5.0 + from vllm.sequence import MultiModalData # for vllm<0.5.0 if TYPE_CHECKING: