Fix: Add Transformers 4.47+ compatibility - Wraps LlamaFlashAttention2 import in try-except for backward compatibility - Falls back to LlamaAttention when FlashAttention2 unavailable - Tested on Transformers 4.46.3 with macOS MPS - Minimal change: 1 file, 13 insertions, 5 deletions

Files changed (1) hide show

modeling_deepseekv2.py CHANGED Viewed

@@ -34,10 +34,18 @@ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 from transformers.activations import ACT2FN
 from transformers.cache_utils import Cache, DynamicCache
 from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask
-from transformers.models.llama.modeling_llama import (
-    LlamaAttention,
-    LlamaFlashAttention2
-)
 from transformers.modeling_outputs import (
     BaseModelOutputWithPast,
     CausalLMOutputWithPast,
@@ -1235,7 +1243,7 @@ ATTENTION_CLASSES = {
     "mla_flash_attention_2": DeepseekV2FlashAttention2,
     "mha_eager": LlamaAttention,
-    "mha_flash_attention_2": LlamaFlashAttention2
 }

 from transformers.activations import ACT2FN
 from transformers.cache_utils import Cache, DynamicCache
 from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask
+# Handle different transformers versions
+try:
+    from transformers.models.llama.modeling_llama import (
+        LlamaAttention,
+        LlamaFlashAttention2
+    )
+except ImportError:
+    # Newer transformers versions (4.47+) don't have LlamaFlashAttention2
+    from transformers.models.llama.modeling_llama import LlamaAttention
+    LlamaFlashAttention2 = None  # Will use fallback
 from transformers.modeling_outputs import (
     BaseModelOutputWithPast,
     CausalLMOutputWithPast,
     "mla_flash_attention_2": DeepseekV2FlashAttention2,
     "mha_eager": LlamaAttention,
+    "mha_flash_attention_2": LlamaFlashAttention2 if LlamaFlashAttention2 is not None else LlamaAttention
 }