LLaMA-Factory/tests/model/model_utils/test_attention.py

# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available

from llamafactory.hparams import get_infer_args
from llamafactory.model import load_model, load_tokenizer


TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")

INFER_ARGS = {
    "model_name_or_path": TINY_LLAMA,
    "template": "llama3",
}


def test_attention():
    attention_available = ["disabled"]
    if is_torch_sdpa_available():
        attention_available.append("sdpa")

    if is_flash_attn_2_available():
        attention_available.append("fa2")

    llama_attention_classes = {
        "disabled": "LlamaAttention",
        "sdpa": "LlamaSdpaAttention",
        "fa2": "LlamaFlashAttention2",
    }
    for requested_attention in attention_available:
        model_args, _, finetuning_args, _ = get_infer_args({"flash_attn": requested_attention, **INFER_ARGS})
        tokenizer_module = load_tokenizer(model_args)
        model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args)
        for module in model.modules():
            if "Attention" in module.__class__.__name__:
                assert module.__class__.__name__ == llama_attention_classes[requested_attention]
add license 2024-06-15 09:54:33 +00:00			`# Copyright 2024 the LlamaFactory team.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

init unittest 2024-06-07 17:35:58 +00:00			`import os`

			`from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available`

			`from llamafactory.hparams import get_infer_args`
			`from llamafactory.model import load_model, load_tokenizer`


fix #2666 2024-06-10 13:24:15 +00:00			`TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")`

			`INFER_ARGS = {`
			`"model_name_or_path": TINY_LLAMA,`
			`"template": "llama3",`
			`}`
init unittest 2024-06-07 17:35:58 +00:00

			`def test_attention():`
Update test_attention.py 2024-06-24 13:35:34 +00:00			`attention_available = ["disabled"]`
init unittest 2024-06-07 17:35:58 +00:00			`if is_torch_sdpa_available():`
			`attention_available.append("sdpa")`

			`if is_flash_attn_2_available():`
			`attention_available.append("fa2")`

			`llama_attention_classes = {`
Update test_attention.py 2024-06-24 13:35:34 +00:00			`"disabled": "LlamaAttention",`
init unittest 2024-06-07 17:35:58 +00:00			`"sdpa": "LlamaSdpaAttention",`
			`"fa2": "LlamaFlashAttention2",`
			`}`
			`for requested_attention in attention_available:`
fix #2666 2024-06-10 13:24:15 +00:00			`model_args, _, finetuning_args, _ = get_infer_args({"flash_attn": requested_attention, **INFER_ARGS})`
release v0.8.0 2024-06-07 21:20:54 +00:00			`tokenizer_module = load_tokenizer(model_args)`
			`model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args)`
init unittest 2024-06-07 17:35:58 +00:00			`for module in model.modules():`
			`if "Attention" in module.__class__.__name__:`
fix ci 2024-06-07 17:57:36 +00:00			`assert module.__class__.__name__ == llama_attention_classes[requested_attention]`