From 9d36354e6422db94c374dc476c093b1495370983 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Fri, 22 Nov 2024 20:48:24 +0000 Subject: [PATCH] parse tokenizer_backend=None properly (#2509) --- lm_eval/models/api_models.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lm_eval/models/api_models.py b/lm_eval/models/api_models.py index e23d3e6f6b..fd21c857b5 100644 --- a/lm_eval/models/api_models.py +++ b/lm_eval/models/api_models.py @@ -62,7 +62,7 @@ def __init__( # however the requests can be sent as a string if the API doesn't support token inputs. # use tokenized_requests=False tokenizer_backend: Optional[ - Literal["tiktoken", "huggingface", None] + Literal["tiktoken", "huggingface", "None", "none"] ] = "huggingface", truncate: bool = False, # number of concurrent requests. More useful if not batching @@ -116,7 +116,9 @@ def __init__( "Concurrent requests are disabled. To enable concurrent requests, set `num_concurrent` > 1." ) self._concurrent = int(num_concurrent) - self.tokenizer_backend = tokenizer_backend + self.tokenizer_backend = ( + None if tokenizer_backend in ("None", "none") else tokenizer_backend + ) self.add_bos_token = add_bos_token self.custom_prefix_token_id = custom_prefix_token_id self.tokenized_requests = tokenized_requests