manually fix PLBart tokenizer

huggingface · Sep 26, 2024 · e71a01a · e71a01a
1 parent 0317895
commit e71a01a
Showing 1 changed file with 2 additions and 0 deletions.
diff --git a/src/transformers/models/plbart/tokenization_plbart.py b/src/transformers/models/plbart/tokenization_plbart.py
@@ -130,6 +130,7 @@ def __init__(
         tgt_lang=None,
         sp_model_kwargs: Optional[Dict[str, Any]] = None,
         additional_special_tokens=None,
+        clean_up_tokenization_spaces=True,
         **kwargs,
     ):
         # Mask token behave like a normal word, i.e. include the space before it
@@ -200,6 +201,7 @@ def __init__(
             tgt_lang=tgt_lang,
             additional_special_tokens=_additional_special_tokens,
             sp_model_kwargs=self.sp_model_kwargs,
+            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
             **kwargs,
         )