-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
af42022
commit f6c29df
Showing
6 changed files
with
293 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# MoE-PEFT: An Efficient LLM Fine-Tuning Factory for Mixture of Expert (MoE) Parameter-Efficient Fine-Tuning.\n", | ||
"[![](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml/badge.svg)](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml)\n", | ||
"[![](https://img.shields.io/github/stars/TUDB-Labs/MoE-PEFT?logo=GitHub&style=flat)](https://github.com/TUDB-Labs/MoE-PEFT/stargazers)\n", | ||
"[![](https://img.shields.io/github/v/release/TUDB-Labs/MoE-PEFT?logo=Github)](https://github.com/TUDB-Labs/MoE-PEFT/releases/latest)\n", | ||
"[![](https://img.shields.io/pypi/v/moe_peft?logo=pypi)](https://pypi.org/project/moe_peft/)\n", | ||
"[![](https://img.shields.io/docker/v/mikecovlee/moe_peft?logo=Docker&label=docker)](https://hub.docker.com/r/mikecovlee/moe_peft/tags)\n", | ||
"[![](https://img.shields.io/github/license/TUDB-Labs/MoE-PEFT)](http://www.apache.org/licenses/LICENSE-2.0)\n", | ||
"\n", | ||
"MoE-PEFT is an open-source *LLMOps* framework built on [m-LoRA](https://github.com/TUDB-Labs/mLoRA). It is designed for high-throughput fine-tuning, evaluation, and inference of Large Language Models (LLMs) using techniques such as MoE + Others (like LoRA, DoRA). Key features of MoE-PEFT include:\n", | ||
"\n", | ||
"- Concurrent fine-tuning, evaluation, and inference of multiple adapters with a shared pre-trained model.\n", | ||
"\n", | ||
"- **MoE PEFT** optimization, mainly for [MixLoRA](https://github.com/TUDB-Labs/MixLoRA) and other MoLE implementation.\n", | ||
"\n", | ||
"- Support for multiple PEFT algorithms and various pre-trained models.\n", | ||
"\n", | ||
"- Seamless integration with the [HuggingFace](https://huggingface.co) ecosystem.\n", | ||
"\n", | ||
"## About this notebook\n", | ||
"\n", | ||
"This is a simple jupiter notebook for showcasing the basic process of building MixLoRA MoE model from TinyLLaMA by fine-tuning with dummy data." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Clone and install MoE-PEFT" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"! pip uninstall torchvision torchaudio -y\n", | ||
"! pip install moe_peft" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Loading the base model" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import torch\n", | ||
"\n", | ||
"import moe_peft\n", | ||
"\n", | ||
"moe_peft.setup_logging(\"INFO\")\n", | ||
"\n", | ||
"base_model = \"TinyLlama/TinyLlama_v1.1\"\n", | ||
"\n", | ||
"model = moe_peft.LLMModel.from_pretrained(\n", | ||
" base_model,\n", | ||
" device=moe_peft.executor.default_device_name(),\n", | ||
" load_dtype=torch.bfloat16,\n", | ||
")\n", | ||
"tokenizer = moe_peft.Tokenizer(base_model)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Training a dummy LoRA adapter" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"lora_config = moe_peft.adapter_factory(\n", | ||
" peft_type=\"MIXLORA\",\n", | ||
" adapter_name=\"mixlora_0\",\n", | ||
" r=8,\n", | ||
" lora_alpha=16,\n", | ||
" lora_dropout=0.05,\n", | ||
" target_modules=[\n", | ||
" \"up_proj\",\n", | ||
" \"down_proj\",\n", | ||
" \"gate_proj\",\n", | ||
" ],\n", | ||
" routing_strategy=\"mixlora\",\n", | ||
" num_experts=6,\n", | ||
")\n", | ||
"\n", | ||
"model.init_adapter(lora_config)\n", | ||
"\n", | ||
"train_config = moe_peft.TrainConfig(\n", | ||
" adapter_name=\"mixlora_0\",\n", | ||
" data_path=\"TUDB-Labs/Dummy-MoE-PEFT\",\n", | ||
" num_epochs=10,\n", | ||
" batch_size=16,\n", | ||
" micro_batch_size=8,\n", | ||
" learning_rate=1e-4,\n", | ||
")\n", | ||
"\n", | ||
"moe_peft.train(model=model, tokenizer=tokenizer, configs=[train_config])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Validate the effectiveness of LoRA adapter" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"generate_config = moe_peft.GenerateConfig(\n", | ||
" adapter_name=\"mixlora_0\",\n", | ||
" prompts=[\"Could you provide an introduction to MoE-PEFT?\"],\n", | ||
" stop_token=\"\\n\",\n", | ||
")\n", | ||
"\n", | ||
"output = moe_peft.generate(\n", | ||
" model=model, tokenizer=tokenizer, configs=[generate_config], max_gen_len=128\n", | ||
")\n", | ||
"\n", | ||
"print(output[\"mixlora_0\"][0])" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "moe_peft", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.7" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import fire | ||
import torch | ||
|
||
import moe_peft | ||
import moe_peft.adapters | ||
|
||
|
||
def main( | ||
base_model: str = "TinyLlama/TinyLlama_v1.1", | ||
adapter_name: str = "mixlora_0", | ||
train_data: str = "TUDB-Labs/Dummy-MoE-PEFT", | ||
test_prompt: str = "Could you provide an introduction to MoE-PEFT?", | ||
save_path: str = None, | ||
): | ||
moe_peft.setup_logging("INFO") | ||
|
||
model: moe_peft.LLMModel = moe_peft.LLMModel.from_pretrained( | ||
base_model, | ||
device=moe_peft.executor.default_device_name(), | ||
load_dtype=torch.bfloat16, | ||
) | ||
tokenizer = moe_peft.Tokenizer(base_model) | ||
|
||
lora_config = moe_peft.adapter_factory( | ||
peft_type="MIXLORA", | ||
adapter_name=adapter_name, | ||
r=8, | ||
lora_alpha=16, | ||
lora_dropout=0.05, | ||
target_modules=[ | ||
"up_proj", | ||
"down_proj", | ||
"gate_proj", | ||
], | ||
routing_strategy="mixlora", | ||
num_experts=6, | ||
) | ||
|
||
train_config = moe_peft.TrainConfig( | ||
adapter_name=adapter_name, | ||
data_path=train_data, | ||
num_epochs=10, | ||
batch_size=16, | ||
micro_batch_size=8, | ||
learning_rate=1e-4, | ||
) | ||
|
||
with moe_peft.executors.no_cache(): | ||
model.init_adapter(lora_config) | ||
moe_peft.train(model=model, tokenizer=tokenizer, configs=[train_config]) | ||
if save_path: | ||
moe_peft.trainer.save_adapter_weight( | ||
model=model, config=train_config, path=save_path | ||
) | ||
lora_config, lora_weight = model.unload_adapter(adapter_name) | ||
|
||
generate_configs = [ | ||
moe_peft.GenerateConfig( | ||
adapter_name=adapter_name, | ||
prompts=[test_prompt], | ||
stop_token="\n", | ||
), | ||
moe_peft.GenerateConfig( | ||
adapter_name="default", | ||
prompts=[test_prompt], | ||
stop_token="\n", | ||
), | ||
] | ||
|
||
with moe_peft.executors.no_cache(): | ||
model.init_adapter(lora_config, lora_weight) | ||
model.init_adapter(moe_peft.AdapterConfig(adapter_name="default")) | ||
outputs = moe_peft.generate( | ||
model=model, | ||
tokenizer=tokenizer, | ||
configs=generate_configs, | ||
max_gen_len=128, | ||
) | ||
|
||
print(f"\n{'=' * 10}\n") | ||
print(f"PROMPT: {test_prompt}\n") | ||
for adapter_name, output in outputs.items(): | ||
print(f"{adapter_name} OUTPUT:") | ||
print(f"{output[0]}\n") | ||
print(f"\n{'=' * 10}\n") | ||
|
||
|
||
if __name__ == "__main__": | ||
fire.Fire(main) |