[Misc] Avoid direct access of global mm_registry in compute_encoder_budget (#15621)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -137,6 +137,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
||||
encoder_compute_budget, encoder_cache_size = compute_encoder_budget(
|
||||
model_config=model_config,
|
||||
scheduler_config=scheduler_config,
|
||||
mm_registry=self.mm_registry,
|
||||
)
|
||||
self.max_num_encoder_input_tokens = encoder_compute_budget
|
||||
self.encoder_cache_size = encoder_cache_size
|
||||
@@ -1439,9 +1440,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
||||
# NOTE: Currently model is profiled with a single non-text
|
||||
# modality with the max possible input tokens even when
|
||||
# it supports multiple.
|
||||
max_tokens_by_modality_dict = (
|
||||
MULTIMODAL_REGISTRY.
|
||||
get_max_tokens_per_item_by_nonzero_modality(self.model_config))
|
||||
max_tokens_by_modality_dict = self.mm_registry \
|
||||
.get_max_tokens_per_item_by_nonzero_modality(self.model_config)
|
||||
dummy_data_modality, max_tokens_per_mm_item = max(
|
||||
max_tokens_by_modality_dict.items(), key=lambda item: item[1])
|
||||
|
||||
|
||||
@@ -109,6 +109,7 @@ class TPUModelRunner:
|
||||
encoder_compute_budget, encoder_cache_size = compute_encoder_budget(
|
||||
model_config=model_config,
|
||||
scheduler_config=scheduler_config,
|
||||
mm_registry=self.mm_registry,
|
||||
)
|
||||
self.max_num_encoder_input_tokens = encoder_compute_budget
|
||||
self.encoder_cache_size = encoder_cache_size
|
||||
|
||||
Reference in New Issue
Block a user