[P/D] NIXL Integration (#17751)
Signed-off-by: ApostaC <yihua98@uchicago.edu> Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com> Signed-off-by: Robert Shaw <rshaw@neuralmagic.com> Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: Nick Hill <nhill@redhat.com> Signed-off-by: Brent Salisbury <bsalisbu@redhat.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: ApostaC <yihua98@uchicago.edu> Co-authored-by: Robert Shaw <rshaw@neuralmagic.com> Co-authored-by: mgoin <mgoin64@gmail.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Tyler Michael Smith <tysmith@redhat.com> Co-authored-by: Brent Salisbury <bsalisbu@redhat.com>
This commit is contained in:
@@ -870,7 +870,7 @@ def test_kv_connector_basic():
|
||||
NUM_MATCHED_NEW_TOKENS = BLOCK_SIZE * 2
|
||||
scheduler.connector.get_num_new_matched_tokens = Mock(name="method")
|
||||
scheduler.connector.get_num_new_matched_tokens.return_value = (
|
||||
NUM_MATCHED_NEW_TOKENS)
|
||||
NUM_MATCHED_NEW_TOKENS, False)
|
||||
|
||||
######################################################
|
||||
# FIRST SET OF REQUESTS - External Hit Only
|
||||
@@ -981,7 +981,7 @@ def test_kv_connector_unable_to_allocate():
|
||||
NUM_MATCHED_NEW_TOKENS = BLOCK_SIZE * 2
|
||||
scheduler.connector.get_num_new_matched_tokens = Mock(name="method")
|
||||
scheduler.connector.get_num_new_matched_tokens.return_value = (
|
||||
NUM_MATCHED_NEW_TOKENS)
|
||||
NUM_MATCHED_NEW_TOKENS, False)
|
||||
|
||||
# Create two requests. The second request will not be able to
|
||||
# allocate slots because it will not have enough blocks.
|
||||
@@ -1060,7 +1060,7 @@ def test_kv_connector_handles_preemption():
|
||||
NUM_MATCHED_NEW_TOKENS = BLOCK_SIZE
|
||||
scheduler.connector.get_num_new_matched_tokens = Mock(name="method")
|
||||
scheduler.connector.get_num_new_matched_tokens.return_value = (
|
||||
NUM_MATCHED_NEW_TOKENS)
|
||||
NUM_MATCHED_NEW_TOKENS, False)
|
||||
|
||||
# Create two requests.
|
||||
# Both can be scheduled at first, but the second request
|
||||
|
||||
Reference in New Issue
Block a user