[Attention] Flash Attention 3 - fp8 (#14570)

Signed-off-by: Mickael Seznec <mickael@mistral.ai>
This commit is contained in:
Mickaël Seznec
2025-03-20 06:14:20 +01:00
committed by GitHub
parent ae65f3e237
commit a597a57595
15 changed files with 272 additions and 76 deletions

View File

@@ -5,6 +5,7 @@ import pickle
import signal
import sys
import time
import traceback
import weakref
from dataclasses import dataclass
from enum import Enum, auto
@@ -370,6 +371,9 @@ class WorkerProc:
func = partial(cloudpickle.loads(method), self.worker)
output = func(*args, **kwargs)
except Exception as e:
# Notes have been introduced in python 3.11
if hasattr(e, "add_note"):
e.add_note(traceback.format_exc())
self.worker_response_mq.enqueue(
(WorkerProc.ResponseStatus.FAILURE, e))
logger.exception("WorkerProc hit an exception: %s", exc_info=e)