launch.py

#!/usr/bin/env python3
import json
import os

import datasets as hf_datasets
import fire

file_path = "templates"
work_path = os.path.dirname(os.path.abspath(__file__))


def load_dataset(path: str):
    if path.endswith(".json") or path.endswith(".jsonl"):
        data = hf_datasets.load_dataset("json", data_files=path)
    elif ":" in path:
        split = path.split(":")
        data = hf_datasets.load_dataset(split[0], split[1])
    else:
        data = hf_datasets.load_dataset(path)
    return data


def compose_command(
    base_model: str,
    config: str = "moe_peft.json",
    inference: bool = False,
    evaluate: bool = False,
    load_adapter: bool = False,
    random_seed: int = 42,
    cuda_device: int = None,
    log_file: str = "moe_peft.log",
    overwrite: bool = False,
    attn_impl: str = None,
    sliding_window: bool = False,
    use_cache: bool = True,
    quantize: str = None,
    dtype: str = "bf16",
    tf32: bool = False,
):
    assert quantize in (None, "4bit", "8bit")
    assert dtype in ("fp32", "fp16", "bf16")
    command = "python moe_peft.py"
    if cuda_device is not None:
        command = f"CUDA_VISIBLE_DEVICES={cuda_device} " + command
    command += f" --base_model {base_model}"
    command += f" --config {config}"
    if inference:
        command += " --inference"
    if evaluate:
        command += " --evaluate"
    if load_adapter:
        command += " --load_adapter"
    command += f" --seed {random_seed}"
    command += f" --log_file {log_file}"
    if overwrite:
        command += " --overwrite"
    if attn_impl is not None:
        command += f" --attn_impl {attn_impl}"
    if sliding_window:
        command += " --sliding_window"
    if not use_cache:
        command += " --disable_cache"
    if quantize is not None:
        command += f" --load_{quantize}"
    if dtype in ("fp16", "bf16"):
        command += f" --{dtype}"
    if tf32:
        command += " --tf32"
    return os.system(command)


def update_record(dict_: dict, key_, value_):
    if value_ is not None:
        dict_[key_] = value_


def gen_config(
    # essential
    template: str,
    tasks: str,
    # optional
    adapter_name: str = None,
    file_name: str = "moe_peft.json",
    data_path: str = None,
    multi_task: bool = False,
    append: bool = False,
    # default value provided by template
    prompt_template: str = None,
    cutoff_len: int = None,
    save_step: int = None,
    lr_scheduler: str = None,
    warmup_steps: float = None,
    learning_rate: float = None,
    batch_size: int = None,
    micro_batch_size: int = None,
    evaluate_steps: int = None,
    evaluate_batch_size: int = None,
    num_epochs: int = None,
    loraplus_lr_ratio: float = None,
    use_dora: bool = None,
    use_rslora: bool = None,
    group_by_length: bool = None,
):
    import moe_peft

    template = f"{work_path}{os.sep}{file_path}{os.sep}{template}.json"
    config_dir = f"{work_path}{os.sep}{file_name}"

    with open(template, "r", encoding="utf8") as fp:
        template_obj = json.load(fp)

    update_record(template_obj, "cutoff_len", cutoff_len)
    update_record(template_obj, "save_step", save_step)
    lora_templates = template_obj["lora"]
    template_obj["lora"] = []

    if append:
        with open(config_dir, "r", encoding="utf8") as fp:
            orig_config = json.load(fp)
        template_obj["lora"] = orig_config["lora"]

    index = len(template_obj["lora"])
    if multi_task:
        task_list = [tasks]
        path_list = [data_path]
    else:
        task_list = tasks.split(";")
        path_list = (
            [None] * len(task_list) if data_path is None else data_path.split(";")
        )

    for lora_template in lora_templates:
        for task_name, data_path in zip(task_list, path_list):
            lora_config = lora_template.copy()
            if multi_task:
                lora_config["name"] = f"multi_task_{index}"
                lora_config["task_name"] = task_name
            elif task_name not in moe_peft.tasks.task_dict:
                try:
                    load_dataset(task_name)
                except:
                    raise RuntimeError(f"Task name '{task_name}' not exist.")
                lora_config["name"] = f"casual_{index}"
                lora_config["task_name"] = "casual"
                lora_config["data"] = task_name
                lora_config["prompt"] = "alpaca"
            else:
                lora_config["name"] = (
                    f"{task_name.split(':')[-1].replace('-', '_')}_{index}"
                )
                lora_config["task_name"] = task_name

            if adapter_name is not None:
                lora_config["name"] = f"{adapter_name}_{index}"

            update_record(lora_config, "data", data_path)
            update_record(lora_config, "prompt", prompt_template)
            update_record(lora_config, "scheduler_type", lr_scheduler)
            update_record(lora_config, "warmup_steps", warmup_steps)
            update_record(lora_config, "lr", learning_rate)
            update_record(lora_config, "batch_size", batch_size)
            update_record(lora_config, "micro_batch_size", micro_batch_size)
            update_record(lora_config, "evaluate_steps", evaluate_steps)
            update_record(lora_config, "evaluate_batch_size", evaluate_batch_size)
            update_record(lora_config, "num_epochs", num_epochs)
            update_record(lora_config, "loraplus_lr_ratio", loraplus_lr_ratio)
            update_record(lora_config, "use_dora", use_dora)
            update_record(lora_config, "use_rslora", use_rslora)
            update_record(lora_config, "group_by_length", group_by_length)
            template_obj["lora"].append(lora_config)
            index += 1

    with open(config_dir, "w") as f:
        json.dump(template_obj, f, indent=4)
    print(f"Configuration file saved to {config_dir}")


def avail_tasks():
    import moe_peft

    print("Available task names:")
    for name in moe_peft.tasks.task_dict.keys():
        print(f"    {name}")
    print("These tasks can be trained and evaluated automatically using MoE-PEFT.")


def show_help():
    print(
        """
    Launcher of MoE-PEFT
    Usage: python launch.py COMMAND [ARGS...]
    Command:
        gen         generate a configuration from template
        run         start a task with existed configuration
        avail       List all available tasks
        help        Show help information

    Arguments of gen:
        --template          lora, mixlora, etc.
        --tasks             task names separate by ';'
        --adapter_name      default is task name
        --file_name         default is 'moe_peft.json'
        --data_path         path to input data
        --multi_task        multi-task training
        --append            append to existed config
        --prompt_template   [alpaca]
        --cutoff_len
        --save_step
        --warmup_steps
        --learning_rate
        --loraplus_lr_ratio
        --batch_size
        --micro_batch_size
        --evaluate_batch_size
        --num_epochs
        --use_dora
        --use_rslora
        --group_by_length

    Arguments of run:
        --base_model     model name or path
        --config         [moe_peft.json]
        --load_adapter   [false]
        --random_seed    [42]
        --cuda_device    [0]
        --log_file       [moe_peft.log]
        --overwrite      [false]
        --attn_impl      [eager]
        --sliding_window [false]
        --use_cache      [true]
        --quantize       [none], 4bit, 8bit
        --dtype          [bf16], fp16, fp32
        --tf32           [false]
    """
    )


command_map = {
    "gen": gen_config,
    "run": compose_command,
    "avail": avail_tasks,
    "help": show_help,
}


def main(command: str = "help", *args, **kwargs):
    command_map[command](*args, **kwargs)


if __name__ == "__main__":
    fire.Fire(main)