[Core][Model] Terratorch backend integration (#23513)
Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com> Signed-off-by: Christian Pinto <christian.pinto@ibm.com> Co-authored-by: Christian Pinto <christian.pinto@ibm.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -298,6 +298,8 @@ def _compare_tp(
|
||||
tokenizer_mode = model_info.tokenizer_mode
|
||||
hf_overrides = model_info.hf_overrides
|
||||
hf_config = get_config(model_id, trust_remote_code)
|
||||
skip_tokenizer_init = model_info.skip_tokenizer_init
|
||||
max_num_seqs = model_info.max_num_seqs
|
||||
|
||||
dtype = "float16"
|
||||
if hf_config.model_type in _FLOAT16_NOT_SUPPORTED_MODELS:
|
||||
@@ -351,6 +353,10 @@ def _compare_tp(
|
||||
common_args.extend(["--load-format", load_format])
|
||||
if hf_overrides:
|
||||
common_args.extend(["--hf-overrides", json.dumps(hf_overrides)])
|
||||
if skip_tokenizer_init:
|
||||
common_args.append("--skip-tokenizer-init")
|
||||
if max_num_seqs:
|
||||
common_args.extend(["--max-num-seqs", f"{max_num_seqs}"])
|
||||
|
||||
specific_case = tp_size == 2 and pp_size == 2 and chunked_prefill
|
||||
testing_ray_compiled_graph = False
|
||||
|
||||
Reference in New Issue
Block a user