diff --git a/dsv4/layers/linear.py b/dsv4/layers/linear.py index c67cadad..fb007010 100644 --- a/dsv4/layers/linear.py +++ b/dsv4/layers/linear.py @@ -176,6 +176,7 @@ class Nvfp4Linear: x_fp4, x_sf, gsa_gpu = quantize_nvfp4_gpu_fused(hidden_states) self._gsa_buf.copy_(gsa_gpu[:1].reshape(1)) # GPU → GPU, no sync else: + from dsv4.ops.quantize import quantize_nvfp4_gpu self._gsa_buf.fill_(self._activation_global_scale) x_fp4, x_sf = quantize_nvfp4_gpu(hidden_states, self._activation_global_scale)