diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index d6a335f8c..971cfa3bf 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -42,7 +42,7 @@ def __init__(self, settings: Settings) -> None: context_window=settings.llm.context_window, generate_kwargs={}, # All to GPU - model_kwargs={"n_gpu_layers": -1}, + model_kwargs={"n_gpu_layers": -1, "offload_kqv": True}, # transform inputs into Llama2 format messages_to_prompt=prompt_style.messages_to_prompt, completion_to_prompt=prompt_style.completion_to_prompt,