Spaces:
Sleeping
Sleeping
Update app_fa3.py
Browse files- app_fa3.py +1 -1
app_fa3.py
CHANGED
|
@@ -101,7 +101,7 @@ def load_model():
|
|
| 101 |
torch_dtype=torch.bfloat16 if HAS_CUDA else torch.float32,
|
| 102 |
device_map="auto" if HAS_CUDA else {"": "cpu"},
|
| 103 |
trust_remote_code=True,
|
| 104 |
-
attn_implementation="
|
| 105 |
)
|
| 106 |
model_pt = PeftModel.from_pretrained(base_model, LORA_NAME, device_map="auto" if HAS_CUDA else {"": "cpu"})
|
| 107 |
|
|
|
|
| 101 |
torch_dtype=torch.bfloat16 if HAS_CUDA else torch.float32,
|
| 102 |
device_map="auto" if HAS_CUDA else {"": "cpu"},
|
| 103 |
trust_remote_code=True,
|
| 104 |
+
attn_implementation="kernels-community/vllm-flash-attn3",
|
| 105 |
)
|
| 106 |
model_pt = PeftModel.from_pretrained(base_model, LORA_NAME, device_map="auto" if HAS_CUDA else {"": "cpu"})
|
| 107 |
|