[Feature] Support sequence parallelism for static fp8 quantization (#19181)

Signed-off-by: cascade812 <cascade812@outlook.com>
This commit is contained in:
cascade
2025-06-23 13:09:02 -07:00
committed by GitHub
parent d0132f025d
commit e6327c9b3e
7 changed files with 531 additions and 195 deletions

View File

@@ -345,8 +345,8 @@ class FusedAddRMSNormStaticQuantPattern(RMSNormQuantPattern):
# 0 is always None
fused_return_mapping = {1: (quant_node, 1), 2: (rms_node, 2)}
self.insert_fused_node(fused_return_mapping,
epsilon=rms_node.kwargs["epsilon"],
**kwargs)
**kwargs,
epsilon=rms_node.kwargs["epsilon"])
class RMSNormDynamicQuantPattern(RMSNormQuantPattern):