[Core] Move test utility to test file (#35672)
Signed-off-by: Turner Jabbour <doubleujabbour@gmail.com>
This commit is contained in:
@@ -31,27 +31,6 @@ def is_layer_moe_router_gate(prefix: str) -> bool:
|
||||
return prefix.rsplit(".", 1)[-1] in MOE_LAYER_ROUTER_GATE_SUFFIXES
|
||||
|
||||
|
||||
def shuffle_weight(w: torch.Tensor) -> torch.Tensor:
|
||||
# Shuffle weight along the last dimension so that
|
||||
# we folded the weights to adjance location
|
||||
# Example:
|
||||
# input:
|
||||
# [[1, 2, 3, 4, 5, 6],
|
||||
# [7, 8, 9, 10, 11, 12]]
|
||||
# output:
|
||||
# [[1, 4, 2, 5, 3, 6],
|
||||
# [7, 10, 8, 11, 9, 12]]
|
||||
# This will be used together with triton swiglu kernel
|
||||
shape = w.shape
|
||||
N = shape[-1]
|
||||
first = w[..., : N // 2]
|
||||
second = w[..., N // 2 :]
|
||||
|
||||
stacked = torch.stack((first, second), dim=-1)
|
||||
w_shuffled = stacked.reshape(shape)
|
||||
return w_shuffled
|
||||
|
||||
|
||||
def get_token_bin_counts_and_mask(
|
||||
tokens: torch.Tensor,
|
||||
vocab_size: int,
|
||||
|
||||
Reference in New Issue
Block a user