Fix some typing issues found by mypy==1.18.2 (#26596)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-10 19:21:25 +01:00
committed by GitHub
parent 3b780a4bbb
commit 7c12763b24
6 changed files with 19 additions and 18 deletions

View File

@@ -807,8 +807,10 @@ def test_schedule_spec_decoding_stats(spec_tokens, output_tokens, expected):
engine_core_outputs[0].scheduler_stats if engine_core_outputs else None engine_core_outputs[0].scheduler_stats if engine_core_outputs else None
) )
if expected[0] == 0: if expected[0] == 0:
assert scheduler_stats is not None
assert scheduler_stats.spec_decoding_stats is None assert scheduler_stats.spec_decoding_stats is None
else: else:
assert scheduler_stats is not None
assert scheduler_stats.spec_decoding_stats is not None assert scheduler_stats.spec_decoding_stats is not None
stats = scheduler_stats.spec_decoding_stats stats = scheduler_stats.spec_decoding_stats
assert stats.num_drafts == expected[0] assert stats.num_drafts == expected[0]

View File

@@ -1229,10 +1229,10 @@ class QKVParallelLinear(ColumnParallelLinear):
param_data = param_data.narrow(output_dim, shard_offset, shard_size) param_data = param_data.narrow(output_dim, shard_offset, shard_size)
if loaded_shard_id == "q": if loaded_shard_id == "q":
shard_id = self.tp_rank shard_rank = self.tp_rank
else: else:
shard_id = self.tp_rank // self.num_kv_head_replicas shard_rank = self.tp_rank // self.num_kv_head_replicas
start_idx = shard_id * shard_size start_idx = shard_rank * shard_size
if not is_sharded_weight: if not is_sharded_weight:
loaded_weight = loaded_weight.narrow(output_dim, start_idx, shard_size) loaded_weight = loaded_weight.narrow(output_dim, start_idx, shard_size)

View File

@@ -49,16 +49,16 @@ class CompressedTensors24(CompressedTensorsScheme):
self.quantized = quantized self.quantized = quantized
self.weight_quant = weight_quant self.weight_quant = weight_quant
self.input_quant = input_quant self.input_quant = input_quant
self.model_compressor = ( model_compressor = ModelCompressor.from_compression_config(
ModelCompressor.from_compression_config(model_compression_config) model_compression_config
if model_compression_config is not None
else None
) )
self.do_sparse_decompress = ( self.do_sparse_decompress = (
self.model_compressor is not None model_compressor is not None
and self.model_compressor.sparsity_config.format and model_compressor.sparsity_config.format
== CompressionFormat.sparse_24_bitmask.value == CompressionFormat.sparse_24_bitmask.value
) )
if self.do_sparse_decompress:
self.model_compressor = model_compressor
if ( if (
quantized quantized

View File

@@ -200,12 +200,10 @@ class BaseResampler(nn.Module):
self.ln_q = norm_layer(embed_dim) self.ln_q = norm_layer(embed_dim)
self.ln_kv = norm_layer(embed_dim) self.ln_kv = norm_layer(embed_dim)
self.do_post_projection = do_post_projection self.do_post_projection = do_post_projection
self.ln_post = norm_layer(embed_dim) if do_post_projection else None if self.do_post_projection:
self.proj = ( self.ln_post = norm_layer(embed_dim)
nn.Parameter((embed_dim**-0.5) * torch.empty(embed_dim, embed_dim)) data = (embed_dim**-0.5) * torch.empty(embed_dim, embed_dim)
if do_post_projection self.proj = nn.Parameter(data=data)
else None
)
def _repeat(self, query, N: int): def _repeat(self, query, N: int):
return query.unsqueeze(1).repeat(1, N, 1) return query.unsqueeze(1).repeat(1, N, 1)

View File

@@ -542,8 +542,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
) )
quant_config = getattr(model_config.hf_config, "quantization_config", None) quant_config = getattr(model_config.hf_config, "quantization_config", None)
if quant_config is not None: if quant_config and (quant_method := quant_config.get("quant_method")):
quant_method = quant_config.get("quant_method")
if quant_method == "bitsandbytes": if quant_method == "bitsandbytes":
self.pre_quant = True self.pre_quant = True
else: else:
@@ -558,7 +557,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
"Prequant BitsAndBytes models with tensor parallelism is not " "Prequant BitsAndBytes models with tensor parallelism is not "
"supported. Please try with pipeline parallelism." "supported. Please try with pipeline parallelism."
) )
if self.pre_quant: if quant_config and self.pre_quant:
self.load_8bit = quant_config.get("load_in_8bit", False) self.load_8bit = quant_config.get("load_in_8bit", False)
def _initialize_loader_state( def _initialize_loader_state(

View File

@@ -397,6 +397,8 @@ class Ovis2_5Processor(ProcessorMixin):
images.append(image) images.append(image)
elif isinstance(video, list): elif isinstance(video, list):
images = video images = video
else:
raise ValueError("Either images or video should be provided.")
min_pixels = min( min_pixels = min(
max_pixels if max_pixels is not None else MAX_PIXELS, max_pixels if max_pixels is not None else MAX_PIXELS,
min_pixels if min_pixels is not None else MIN_PIXELS, min_pixels if min_pixels is not None else MIN_PIXELS,