Fix some typing issues found by mypy==1.18.2 (#26596)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -807,8 +807,10 @@ def test_schedule_spec_decoding_stats(spec_tokens, output_tokens, expected):
|
|||||||
engine_core_outputs[0].scheduler_stats if engine_core_outputs else None
|
engine_core_outputs[0].scheduler_stats if engine_core_outputs else None
|
||||||
)
|
)
|
||||||
if expected[0] == 0:
|
if expected[0] == 0:
|
||||||
|
assert scheduler_stats is not None
|
||||||
assert scheduler_stats.spec_decoding_stats is None
|
assert scheduler_stats.spec_decoding_stats is None
|
||||||
else:
|
else:
|
||||||
|
assert scheduler_stats is not None
|
||||||
assert scheduler_stats.spec_decoding_stats is not None
|
assert scheduler_stats.spec_decoding_stats is not None
|
||||||
stats = scheduler_stats.spec_decoding_stats
|
stats = scheduler_stats.spec_decoding_stats
|
||||||
assert stats.num_drafts == expected[0]
|
assert stats.num_drafts == expected[0]
|
||||||
|
|||||||
@@ -1229,10 +1229,10 @@ class QKVParallelLinear(ColumnParallelLinear):
|
|||||||
|
|
||||||
param_data = param_data.narrow(output_dim, shard_offset, shard_size)
|
param_data = param_data.narrow(output_dim, shard_offset, shard_size)
|
||||||
if loaded_shard_id == "q":
|
if loaded_shard_id == "q":
|
||||||
shard_id = self.tp_rank
|
shard_rank = self.tp_rank
|
||||||
else:
|
else:
|
||||||
shard_id = self.tp_rank // self.num_kv_head_replicas
|
shard_rank = self.tp_rank // self.num_kv_head_replicas
|
||||||
start_idx = shard_id * shard_size
|
start_idx = shard_rank * shard_size
|
||||||
|
|
||||||
if not is_sharded_weight:
|
if not is_sharded_weight:
|
||||||
loaded_weight = loaded_weight.narrow(output_dim, start_idx, shard_size)
|
loaded_weight = loaded_weight.narrow(output_dim, start_idx, shard_size)
|
||||||
|
|||||||
@@ -49,16 +49,16 @@ class CompressedTensors24(CompressedTensorsScheme):
|
|||||||
self.quantized = quantized
|
self.quantized = quantized
|
||||||
self.weight_quant = weight_quant
|
self.weight_quant = weight_quant
|
||||||
self.input_quant = input_quant
|
self.input_quant = input_quant
|
||||||
self.model_compressor = (
|
model_compressor = ModelCompressor.from_compression_config(
|
||||||
ModelCompressor.from_compression_config(model_compression_config)
|
model_compression_config
|
||||||
if model_compression_config is not None
|
|
||||||
else None
|
|
||||||
)
|
)
|
||||||
self.do_sparse_decompress = (
|
self.do_sparse_decompress = (
|
||||||
self.model_compressor is not None
|
model_compressor is not None
|
||||||
and self.model_compressor.sparsity_config.format
|
and model_compressor.sparsity_config.format
|
||||||
== CompressionFormat.sparse_24_bitmask.value
|
== CompressionFormat.sparse_24_bitmask.value
|
||||||
)
|
)
|
||||||
|
if self.do_sparse_decompress:
|
||||||
|
self.model_compressor = model_compressor
|
||||||
|
|
||||||
if (
|
if (
|
||||||
quantized
|
quantized
|
||||||
|
|||||||
@@ -200,12 +200,10 @@ class BaseResampler(nn.Module):
|
|||||||
self.ln_q = norm_layer(embed_dim)
|
self.ln_q = norm_layer(embed_dim)
|
||||||
self.ln_kv = norm_layer(embed_dim)
|
self.ln_kv = norm_layer(embed_dim)
|
||||||
self.do_post_projection = do_post_projection
|
self.do_post_projection = do_post_projection
|
||||||
self.ln_post = norm_layer(embed_dim) if do_post_projection else None
|
if self.do_post_projection:
|
||||||
self.proj = (
|
self.ln_post = norm_layer(embed_dim)
|
||||||
nn.Parameter((embed_dim**-0.5) * torch.empty(embed_dim, embed_dim))
|
data = (embed_dim**-0.5) * torch.empty(embed_dim, embed_dim)
|
||||||
if do_post_projection
|
self.proj = nn.Parameter(data=data)
|
||||||
else None
|
|
||||||
)
|
|
||||||
|
|
||||||
def _repeat(self, query, N: int):
|
def _repeat(self, query, N: int):
|
||||||
return query.unsqueeze(1).repeat(1, N, 1)
|
return query.unsqueeze(1).repeat(1, N, 1)
|
||||||
|
|||||||
@@ -542,8 +542,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
|
|||||||
)
|
)
|
||||||
|
|
||||||
quant_config = getattr(model_config.hf_config, "quantization_config", None)
|
quant_config = getattr(model_config.hf_config, "quantization_config", None)
|
||||||
if quant_config is not None:
|
if quant_config and (quant_method := quant_config.get("quant_method")):
|
||||||
quant_method = quant_config.get("quant_method")
|
|
||||||
if quant_method == "bitsandbytes":
|
if quant_method == "bitsandbytes":
|
||||||
self.pre_quant = True
|
self.pre_quant = True
|
||||||
else:
|
else:
|
||||||
@@ -558,7 +557,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
|
|||||||
"Prequant BitsAndBytes models with tensor parallelism is not "
|
"Prequant BitsAndBytes models with tensor parallelism is not "
|
||||||
"supported. Please try with pipeline parallelism."
|
"supported. Please try with pipeline parallelism."
|
||||||
)
|
)
|
||||||
if self.pre_quant:
|
if quant_config and self.pre_quant:
|
||||||
self.load_8bit = quant_config.get("load_in_8bit", False)
|
self.load_8bit = quant_config.get("load_in_8bit", False)
|
||||||
|
|
||||||
def _initialize_loader_state(
|
def _initialize_loader_state(
|
||||||
|
|||||||
@@ -397,6 +397,8 @@ class Ovis2_5Processor(ProcessorMixin):
|
|||||||
images.append(image)
|
images.append(image)
|
||||||
elif isinstance(video, list):
|
elif isinstance(video, list):
|
||||||
images = video
|
images = video
|
||||||
|
else:
|
||||||
|
raise ValueError("Either images or video should be provided.")
|
||||||
min_pixels = min(
|
min_pixels = min(
|
||||||
max_pixels if max_pixels is not None else MAX_PIXELS,
|
max_pixels if max_pixels is not None else MAX_PIXELS,
|
||||||
min_pixels if min_pixels is not None else MIN_PIXELS,
|
min_pixels if min_pixels is not None else MIN_PIXELS,
|
||||||
|
|||||||
Reference in New Issue
Block a user