diff --git a/lm_eval/api/samplers.py b/lm_eval/api/samplers.py index 2cdc4e43e7..8decbc96c3 100644 --- a/lm_eval/api/samplers.py +++ b/lm_eval/api/samplers.py @@ -101,6 +101,7 @@ def get_chat_context( doc, num_fewshot, fewshot_as_multiturn: bool = False, + assistant_prefix: str = None, ): chat_history = [] # draw an extra fewshot sample if using same split as evaluating on @@ -145,6 +146,8 @@ def get_chat_context( chat_history.append( {"role": "user", "content": self.get_context(doc, num_fewshot)} ) + if assistant_prefix: + chat_history.append({"role": "assistant", "content": assistant_prefix}) return chat_history diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index 555cb4330d..6a6ddac011 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -92,6 +92,7 @@ class TaskConfig(dict): filter_list: Optional[Union[str, list]] = None should_decontaminate: bool = False doc_to_decontamination_query: Optional[str] = None + assistant_prefix: Optional[str] = None metadata: Optional[dict] = ( None # by default, not used in the code. allows for users to pass arbitrary info to tasks ) @@ -381,6 +382,7 @@ def build_all_requests( apply_chat_template: bool = False, fewshot_as_multiturn: bool = False, chat_template: Optional[Callable] = None, + assistant_prefix: Optional[str] = None, tokenizer_name: str = "", ) -> None: """Build a set of Instances for a task, and store them in task.instances""" @@ -442,6 +444,7 @@ def build_all_requests( apply_chat_template, fewshot_as_multiturn, chat_template, + assistant_prefix, ) # TODO: we should override self.config.repeats if doing greedy gen so users don't waste time+compute @@ -1000,6 +1003,7 @@ def append_target_question( labeled_examples: List[Dict[str, str]], question: str, fewshot_as_multiturn: bool = False, + assistant_prefix: Optional[str] = None, ) -> None: """Adds a target question to the labeled examples list. If fewshot_as_multiturn is True, or labeled_examples is empty, or the last entry is a system turn, appends the question as a new user entry. @@ -1015,6 +1019,7 @@ def append_target_question( else: # if fewshot_as_multiturn is True, append as next user entry (last is always assistant) labeled_examples.append({"role": "user", "content": question}) + labeled_examples.append({"role": "assistant", "content": assistant_prefix}) @utils.positional_deprecated def fewshot_context( @@ -1025,6 +1030,7 @@ def fewshot_context( apply_chat_template: bool = False, fewshot_as_multiturn: bool = False, chat_template: Optional[Callable] = None, + assistant_prefix: Optional[str] = None, ) -> str: """Returns a fewshot context string that is made up of a prepended description (if provided), the `num_fewshot` number of examples, and an appended prompt example. @@ -1078,7 +1084,7 @@ def fewshot_context( if apply_chat_template: labeled_examples.extend( self.sampler.get_chat_context( - doc, num_fewshot, fewshot_as_multiturn + doc, num_fewshot, fewshot_as_multiturn, assistant_prefix ) ) else: @@ -1090,7 +1096,10 @@ def fewshot_context( return chat_template(labeled_examples) if isinstance(example, str): self.append_target_question( - labeled_examples, example, fewshot_as_multiturn + labeled_examples, + example, + fewshot_as_multiturn, + self.config.assistant_prefix, ) # for loglikelihood create a list of questions with appended choices elif isinstance(example, list): @@ -1098,7 +1107,9 @@ def fewshot_context( # copy chat history for each example and append the answer for ex in example: chat = deepcopy(labeled_examples) - self.append_target_question(chat, ex, fewshot_as_multiturn) + self.append_target_question( + chat, ex, fewshot_as_multiturn, self.config.assistant_prefix + ) labeled_examples_list.append(chat_template(chat)) return labeled_examples_list # if example is an integer, append the choice or convert to string @@ -1106,11 +1117,17 @@ def fewshot_context( if self.config.doc_to_choice is not None: choices = self.doc_to_choice(doc) self.append_target_question( - labeled_examples, choices[example], fewshot_as_multiturn + labeled_examples, + choices[example], + fewshot_as_multiturn, + self.config.assistant_prefix, ) else: self.append_target_question( - labeled_examples, str(example), fewshot_as_multiturn + labeled_examples, + str(example), + fewshot_as_multiturn, + self.config.assistant_prefix, ) # return lm.apply_chat_template(labeled_examples) return chat_template(labeled_examples) diff --git a/lm_eval/models/huggingface.py b/lm_eval/models/huggingface.py index 0a5fa2ed85..4042c6dfc7 100644 --- a/lm_eval/models/huggingface.py +++ b/lm_eval/models/huggingface.py @@ -1330,13 +1330,17 @@ def _collate(req: Tuple[str, dict]): return res - def apply_chat_template(self, chat_history: List[Dict[str, str]]) -> str: + def apply_chat_template( + self, chat_history: List[Dict[str, str]], add_generation_prompt=True + ) -> str: """ Method to apply a chat template to a list of chat history between user and model. """ try: chat_templated = self.tokenizer.apply_chat_template( - chat_history, tokenize=False, add_generation_prompt=True + chat_history, + tokenize=False, + add_generation_prompt=add_generation_prompt, ) except jinja2.exceptions.TemplateError: eval_logger.warning( @@ -1344,7 +1348,9 @@ def apply_chat_template(self, chat_history: List[Dict[str, str]]) -> str: ) chat_history = [msg for msg in chat_history if msg["role"] != "system"] chat_templated = self.tokenizer.apply_chat_template( - chat_history, tokenize=False, add_generation_prompt=True + chat_history, + tokenize=False, + add_generation_prompt=add_generation_prompt, ) return chat_templated