[V1][Spec Decode][Ngram] 1.35x gain -> 1.95x gain on InstructCoder with prompt fix (#18971)

This commit is contained in:
Ekagra Ranjan
2025-06-03 18:26:33 -04:00
committed by GitHub
parent 6cac54f4d1
commit 135cf55cd1
2 changed files with 22 additions and 2 deletions

View File

@@ -865,7 +865,15 @@ class InstructCoderDataset(HuggingFaceDataset):
for item in self.data:
if len(sampled_requests) >= num_requests:
break
prompt = f"{item['instruction']}:\n{item['input']}"
prompt = f"{item['input']}\n\n{item['instruction']} Just output \
the code, do not include any explanation."
# apply template
prompt = tokenizer.apply_chat_template(
[{"role": "user", "content": prompt}],
add_generation_prompt=True,
tokenize=False,
)
prompt_len = len(tokenizer(prompt).input_ids)
sampled_requests.append(
SampleRequest(