[Model] Enable Step3p5ForCausalLM testing (#33755)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -471,7 +471,7 @@ th {
|
||||
| `StableLMEpochForCausalLM` | StableLM Epoch | `stabilityai/stablelm-zephyr-3b`, etc. | | ✅︎ |
|
||||
| `Starcoder2ForCausalLM` | Starcoder2 | `bigcode/starcoder2-3b`, `bigcode/starcoder2-7b`, `bigcode/starcoder2-15b`, etc. | | ✅︎ |
|
||||
| `Step1ForCausalLM` | Step-Audio | `stepfun-ai/Step-Audio-EditX`, etc. | ✅︎ | ✅︎ |
|
||||
| `Step3p5ForCausalLM` | Step-3.5-flash | `stepfun-ai/step-3.5-flash`, etc. | | ✅︎ |
|
||||
| `Step3p5ForCausalLM` | Step-3.5-flash | `stepfun-ai/Step-3.5-Flash`, etc. | | ✅︎ |
|
||||
| `TeleChatForCausalLM` | TeleChat | `chuhac/TeleChat2-35B`, etc. | ✅︎ | ✅︎ |
|
||||
| `TeleChat2ForCausalLM` | TeleChat2 | `Tele-AI/TeleChat2-3B`, `Tele-AI/TeleChat2-7B`, `Tele-AI/TeleChat2-35B`, etc. | ✅︎ | ✅︎ |
|
||||
| `TeleFLMForCausalLM` | TeleFLM | `CofeAI/FLM-2-52B-Instruct-2407`, `CofeAI/Tele-FLM`, etc. | ✅︎ | ✅︎ |
|
||||
|
||||
@@ -481,16 +481,21 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
|
||||
"ByteDance-Seed/Seed-OSS-36B-Instruct",
|
||||
trust_remote_code=True,
|
||||
),
|
||||
"Step1ForCausalLM": _HfExamplesInfo(
|
||||
"stepfun-ai/Step-Audio-EditX", trust_remote_code=True
|
||||
),
|
||||
"Step3p5ForCausalLM": _HfExamplesInfo(
|
||||
"stepfun-ai/step-3.5-flash", is_available_online=False
|
||||
),
|
||||
"SmolLM3ForCausalLM": _HfExamplesInfo("HuggingFaceTB/SmolLM3-3B"),
|
||||
"StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b"),
|
||||
"StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"),
|
||||
"Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"),
|
||||
"Step1ForCausalLM": _HfExamplesInfo(
|
||||
"stepfun-ai/Step-Audio-EditX", trust_remote_code=True
|
||||
),
|
||||
"Step3p5ForCausalLM": _HfExamplesInfo(
|
||||
"stepfun-ai/Step-3.5-Flash",
|
||||
use_original_num_layers=True,
|
||||
# Initialize at least one MoE layer
|
||||
hf_overrides={
|
||||
"num_hidden_layers": 4,
|
||||
},
|
||||
),
|
||||
"Step3TextForCausalLM": _HfExamplesInfo("stepfun-ai/step3", trust_remote_code=True),
|
||||
"SolarForCausalLM": _HfExamplesInfo(
|
||||
"upstage/solar-pro-preview-instruct", trust_remote_code=True
|
||||
@@ -1129,8 +1134,12 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
|
||||
),
|
||||
"Step3p5MTP": _HfExamplesInfo(
|
||||
"stepfun-ai/Step-3.5-Flash",
|
||||
trust_remote_code=True,
|
||||
speculative_model="stepfun-ai/Step-3.5-Flash",
|
||||
use_original_num_layers=True,
|
||||
# Initialize at least one MoE layer
|
||||
hf_overrides={
|
||||
"num_hidden_layers": 4,
|
||||
},
|
||||
is_available_online=False,
|
||||
),
|
||||
}
|
||||
|
||||
@@ -36,7 +36,6 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
|
||||
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
||||
from vllm.model_executor.layers.rotary_embedding import get_rope
|
||||
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||
DEFAULT_VOCAB_PADDING_SIZE,
|
||||
ParallelLMHead,
|
||||
VocabParallelEmbedding,
|
||||
)
|
||||
@@ -770,37 +769,17 @@ class Step3p5ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
|
||||
):
|
||||
super().__init__()
|
||||
config = vllm_config.model_config.hf_config
|
||||
lora_config = vllm_config.lora_config
|
||||
self.config = config
|
||||
self.vllm_config = vllm_config
|
||||
|
||||
self.model = Step3p5Model(
|
||||
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
|
||||
)
|
||||
|
||||
self.moe_layers: list[FusedMoEBlock] = []
|
||||
for layer in self.model.layers:
|
||||
if isinstance(layer, PPMissingLayer):
|
||||
continue
|
||||
assert isinstance(layer, Step3p5DecoderLayer)
|
||||
if hasattr(layer, "moe") and isinstance(layer.moe, FusedMoEBlock):
|
||||
self.moe_layers.append(layer.moe)
|
||||
|
||||
if get_pp_group().is_last_rank:
|
||||
self.unpadded_vocab_size = config.vocab_size
|
||||
if lora_config:
|
||||
self.unpadded_vocab_size += lora_config.lora_extra_vocab_size
|
||||
self.lm_head = ParallelLMHead(
|
||||
self.unpadded_vocab_size,
|
||||
config.vocab_size,
|
||||
config.hidden_size,
|
||||
org_num_embeddings=config.vocab_size,
|
||||
padding_size=DEFAULT_VOCAB_PADDING_SIZE
|
||||
if not lora_config
|
||||
else lora_config.lora_vocab_padding_size,
|
||||
)
|
||||
self.logits_processor = LogitsProcessor(
|
||||
self.unpadded_vocab_size, config.vocab_size
|
||||
quant_config=vllm_config.quant_config,
|
||||
prefix=maybe_prefix(prefix, "lm_head"),
|
||||
)
|
||||
self.logits_processor = LogitsProcessor(config.vocab_size)
|
||||
else:
|
||||
self.lm_head = PPMissingLayer()
|
||||
|
||||
@@ -809,6 +788,14 @@ class Step3p5ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts):
|
||||
)
|
||||
|
||||
# Set MoE hyperparameters
|
||||
self.moe_layers: list[FusedMoEBlock] = []
|
||||
for layer in self.model.layers:
|
||||
if isinstance(layer, PPMissingLayer):
|
||||
continue
|
||||
assert isinstance(layer, Step3p5DecoderLayer)
|
||||
if hasattr(layer, "moe") and isinstance(layer.moe, FusedMoEBlock):
|
||||
self.moe_layers.append(layer.moe)
|
||||
|
||||
self.expert_weights = []
|
||||
assert len(self.moe_layers) > 0, "No MoE layers found in the model."
|
||||
example_layer = self.moe_layers[0]
|
||||
|
||||
Reference in New Issue
Block a user