add mixlora notebook

TUDB-Labs · Nov 21, 2024 · f6c29df · f6c29df
1 parent af42022
commit f6c29df
Show file tree

Hide file tree

Showing 6 changed files with 293 additions and 16 deletions.
diff --git a/misc/finetune-demo.ipynb b/misc/finetune-demo.ipynb
@@ -86,12 +86,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "lora_config = moe_peft.LoraConfig(\n",
+    "lora_config = moe_peft.adapter_factory(\n",
+    "    peft_type=\"LORA\",\n",
     "    adapter_name=\"lora_0\",\n",
-    "    lora_r_=32,\n",
-    "    lora_alpha_=64,\n",
-    "    lora_dropout_=0.05,\n",
-    "    target_modules_={\"q_proj\": True, \"k_proj\": True, \"v_proj\": True, \"o_proj\": True},\n",
+    "    r=8,\n",
+    "    lora_alpha=16,\n",
+    "    lora_dropout=0.05,\n",
+    "    target_modules=[\n",
+    "        \"q_proj\",\n",
+    "        \"k_proj\",\n",
+    "        \"v_proj\",\n",
+    "        \"o_proj\",\n",
+    "    ],\n",
     ")\n",
     "\n",
     "model.init_adapter(lora_config)\n",

diff --git a/misc/mixlora-demo.ipynb b/misc/mixlora-demo.ipynb
@@ -0,0 +1,166 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# MoE-PEFT: An Efficient LLM Fine-Tuning Factory for Mixture of Expert (MoE) Parameter-Efficient Fine-Tuning.\n",
+    "[![](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml/badge.svg)](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml)\n",
+    "[![](https://img.shields.io/github/stars/TUDB-Labs/MoE-PEFT?logo=GitHub&style=flat)](https://github.com/TUDB-Labs/MoE-PEFT/stargazers)\n",
+    "[![](https://img.shields.io/github/v/release/TUDB-Labs/MoE-PEFT?logo=Github)](https://github.com/TUDB-Labs/MoE-PEFT/releases/latest)\n",
+    "[![](https://img.shields.io/pypi/v/moe_peft?logo=pypi)](https://pypi.org/project/moe_peft/)\n",
+    "[![](https://img.shields.io/docker/v/mikecovlee/moe_peft?logo=Docker&label=docker)](https://hub.docker.com/r/mikecovlee/moe_peft/tags)\n",
+    "[![](https://img.shields.io/github/license/TUDB-Labs/MoE-PEFT)](http://www.apache.org/licenses/LICENSE-2.0)\n",
+    "\n",
+    "MoE-PEFT is an open-source *LLMOps* framework built on [m-LoRA](https://github.com/TUDB-Labs/mLoRA). It is designed for high-throughput fine-tuning, evaluation, and inference of Large Language Models (LLMs) using techniques such as MoE + Others (like LoRA, DoRA). Key features of MoE-PEFT include:\n",
+    "\n",
+    "- Concurrent fine-tuning, evaluation, and inference of multiple adapters with a shared pre-trained model.\n",
+    "\n",
+    "- **MoE PEFT** optimization, mainly for [MixLoRA](https://github.com/TUDB-Labs/MixLoRA) and other MoLE implementation.\n",
+    "\n",
+    "- Support for multiple PEFT algorithms and various pre-trained models.\n",
+    "\n",
+    "- Seamless integration with the [HuggingFace](https://huggingface.co) ecosystem.\n",
+    "\n",
+    "## About this notebook\n",
+    "\n",
+    "This is a simple jupiter notebook for showcasing the basic process of building MixLoRA MoE model from TinyLLaMA by fine-tuning with dummy data."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Clone and install MoE-PEFT"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip uninstall torchvision torchaudio -y\n",
+    "! pip install moe_peft"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading the base model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "import moe_peft\n",
+    "\n",
+    "moe_peft.setup_logging(\"INFO\")\n",
+    "\n",
+    "base_model = \"TinyLlama/TinyLlama_v1.1\"\n",
+    "\n",
+    "model = moe_peft.LLMModel.from_pretrained(\n",
+    "    base_model,\n",
+    "    device=moe_peft.executor.default_device_name(),\n",
+    "    load_dtype=torch.bfloat16,\n",
+    ")\n",
+    "tokenizer = moe_peft.Tokenizer(base_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training a dummy LoRA adapter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lora_config = moe_peft.adapter_factory(\n",
+    "    peft_type=\"MIXLORA\",\n",
+    "    adapter_name=\"mixlora_0\",\n",
+    "    r=8,\n",
+    "    lora_alpha=16,\n",
+    "    lora_dropout=0.05,\n",
+    "    target_modules=[\n",
+    "        \"up_proj\",\n",
+    "        \"down_proj\",\n",
+    "        \"gate_proj\",\n",
+    "    ],\n",
+    "    routing_strategy=\"mixlora\",\n",
+    "    num_experts=6,\n",
+    ")\n",
+    "\n",
+    "model.init_adapter(lora_config)\n",
+    "\n",
+    "train_config = moe_peft.TrainConfig(\n",
+    "    adapter_name=\"mixlora_0\",\n",
+    "    data_path=\"TUDB-Labs/Dummy-MoE-PEFT\",\n",
+    "    num_epochs=10,\n",
+    "    batch_size=16,\n",
+    "    micro_batch_size=8,\n",
+    "    learning_rate=1e-4,\n",
+    ")\n",
+    "\n",
+    "moe_peft.train(model=model, tokenizer=tokenizer, configs=[train_config])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Validate the effectiveness of LoRA adapter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "generate_config = moe_peft.GenerateConfig(\n",
+    "    adapter_name=\"mixlora_0\",\n",
+    "    prompts=[\"Could you provide an introduction to MoE-PEFT?\"],\n",
+    "    stop_token=\"\\n\",\n",
+    ")\n",
+    "\n",
+    "output = moe_peft.generate(\n",
+    "    model=model, tokenizer=tokenizer, configs=[generate_config], max_gen_len=128\n",
+    ")\n",
+    "\n",
+    "print(output[\"mixlora_0\"][0])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "moe_peft",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/moe_peft/__init__.py b/moe_peft/__init__.py
@@ -1,3 +1,4 @@
+from .adapters import adapter_factory
 from .common import (
     AdapterConfig,
     LLMBatchConfig,
@@ -36,6 +37,7 @@
     "LLMModelInput",
     "AdapterConfig",
     "LoraConfig",
+    "adapter_factory",
     "TrainTask",
     "Dispatcher",
     "EvaluateConfig",

diff --git a/moe_peft/adapters/__init__.py b/moe_peft/adapters/__init__.py
@@ -62,6 +62,13 @@ def lora_config_factory(config: Dict[str, any]) -> LoraConfig:
     return config_class.from_config(config).check()
 
 
+def adapter_factory(peft_type: str, adapter_name: str, **kwargs) -> LoraConfig:
+    kwargs["peft_type"] = peft_type
+    config = lora_config_factory(kwargs)
+    config.adapter_name = adapter_name
+    return config
+
+
 def router_loss_factory(config: MixLoraConfig) -> torch.nn.Module:
     if config.routing_strategy_ not in router_loss_dict:
         return None
@@ -101,4 +108,5 @@ def moe_layer_factory(
     "lora_config_factory",
     "router_loss_factory",
     "moe_layer_factory",
+    "adapter_factory",
 ]
diff --git a/tests/dummy_train.py b/tests/dummy_train.py
@@ -5,10 +5,11 @@
 
 
 def main(
-    base_model: str,
+    base_model: str = "TinyLlama/TinyLlama_v1.1",
     adapter_name: str = "lora_0",
     train_data: str = "TUDB-Labs/Dummy-MoE-PEFT",
     test_prompt: str = "Could you provide an introduction to MoE-PEFT?",
+    save_path: str = None,
 ):
     moe_peft.setup_logging("INFO")
 
@@ -19,17 +20,18 @@ def main(
     )
     tokenizer = moe_peft.Tokenizer(base_model)
 
-    lora_config = moe_peft.LoraConfig(
+    lora_config = moe_peft.adapter_factory(
+        peft_type="LORA",
         adapter_name=adapter_name,
-        lora_r_=32,
-        lora_alpha_=64,
-        lora_dropout_=0.05,
-        target_modules_={
-            "q_proj": True,
-            "k_proj": True,
-            "v_proj": True,
-            "o_proj": True,
-        },
+        r=8,
+        lora_alpha=16,
+        lora_dropout=0.05,
+        target_modules=[
+            "q_proj",
+            "k_proj",
+            "v_proj",
+            "o_proj",
+        ],
     )
 
     train_config = moe_peft.TrainConfig(
@@ -44,6 +46,10 @@ def main(
     with moe_peft.executors.no_cache():
         model.init_adapter(lora_config)
         moe_peft.train(model=model, tokenizer=tokenizer, configs=[train_config])
+        if save_path:
+            moe_peft.trainer.save_adapter_weight(
+                model=model, config=train_config, path=save_path
+            )
         lora_config, lora_weight = model.unload_adapter(adapter_name)
 
     generate_configs = [

diff --git a/tests/dummy_train_mixlora.py b/tests/dummy_train_mixlora.py
@@ -0,0 +1,89 @@
+import fire
+import torch
+
+import moe_peft
+import moe_peft.adapters
+
+
+def main(
+    base_model: str = "TinyLlama/TinyLlama_v1.1",
+    adapter_name: str = "mixlora_0",
+    train_data: str = "TUDB-Labs/Dummy-MoE-PEFT",
+    test_prompt: str = "Could you provide an introduction to MoE-PEFT?",
+    save_path: str = None,
+):
+    moe_peft.setup_logging("INFO")
+
+    model: moe_peft.LLMModel = moe_peft.LLMModel.from_pretrained(
+        base_model,
+        device=moe_peft.executor.default_device_name(),
+        load_dtype=torch.bfloat16,
+    )
+    tokenizer = moe_peft.Tokenizer(base_model)
+
+    lora_config = moe_peft.adapter_factory(
+        peft_type="MIXLORA",
+        adapter_name=adapter_name,
+        r=8,
+        lora_alpha=16,
+        lora_dropout=0.05,
+        target_modules=[
+            "up_proj",
+            "down_proj",
+            "gate_proj",
+        ],
+        routing_strategy="mixlora",
+        num_experts=6,
+    )
+
+    train_config = moe_peft.TrainConfig(
+        adapter_name=adapter_name,
+        data_path=train_data,
+        num_epochs=10,
+        batch_size=16,
+        micro_batch_size=8,
+        learning_rate=1e-4,
+    )
+
+    with moe_peft.executors.no_cache():
+        model.init_adapter(lora_config)
+        moe_peft.train(model=model, tokenizer=tokenizer, configs=[train_config])
+        if save_path:
+            moe_peft.trainer.save_adapter_weight(
+                model=model, config=train_config, path=save_path
+            )
+        lora_config, lora_weight = model.unload_adapter(adapter_name)
+
+    generate_configs = [
+        moe_peft.GenerateConfig(
+            adapter_name=adapter_name,
+            prompts=[test_prompt],
+            stop_token="\n",
+        ),
+        moe_peft.GenerateConfig(
+            adapter_name="default",
+            prompts=[test_prompt],
+            stop_token="\n",
+        ),
+    ]
+
+    with moe_peft.executors.no_cache():
+        model.init_adapter(lora_config, lora_weight)
+        model.init_adapter(moe_peft.AdapterConfig(adapter_name="default"))
+        outputs = moe_peft.generate(
+            model=model,
+            tokenizer=tokenizer,
+            configs=generate_configs,
+            max_gen_len=128,
+        )
+
+    print(f"\n{'=' * 10}\n")
+    print(f"PROMPT: {test_prompt}\n")
+    for adapter_name, output in outputs.items():
+        print(f"{adapter_name} OUTPUT:")
+        print(f"{output[0]}\n")
+    print(f"\n{'=' * 10}\n")
+
+
+if __name__ == "__main__":
+    fire.Fire(main)