Skip to content

Commit

Permalink
chore: Refactor model_path to model_uri
Browse files Browse the repository at this point in the history
In the codebase, we currently name terms like hf://... as "model paths"
and "URIs." This inconsistency can cause confusion and make the code
harder to read and maintain.

Refactor `model_path` to `model_uri`.

Closes #428

Signed-off-by: Dimitris Poulopoulos <[email protected]>
  • Loading branch information
dpoulopoulos committed Dec 2, 2024
1 parent 33fc188 commit 0e0d248
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 31 deletions.
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"python.analysis.importFormat": "absolute",
"[python]": {
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.formatOnSave": true,
"editor.formatOnSave": false,
"editor.codeActionsOnSave": {
"source.fixAll": "never",
"source.organizeImports.ruff": "explicit"
Expand Down
20 changes: 10 additions & 10 deletions lumigator/python/mzai/backend/backend/config_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

seq2seq_eval_template = """{{
"name": "{job_name}/{job_id}",
"model": {{ "path": "{model_path}" }},
"model": {{ "path": "{model_uri}" }},
"dataset": {{ "path": "{dataset_path}" }},
"evaluation": {{
"metrics": ["rouge", "meteor", "bertscore"],
Expand All @@ -17,8 +17,8 @@

bart_eval_template = """{{
"name": "{job_name}/{job_id}",
"model": {{ "path": "{model_path}" }},
"tokenizer": {{ "path": "{model_path}", "mod_max_length": 1024 }},
"model": {{ "path": "{model_uri}" }},
"tokenizer": {{ "path": "{model_uri}", "mod_max_length": 1024 }},
"dataset": {{ "path": "{dataset_path}" }},
"evaluation": {{
"metrics": ["rouge", "meteor", "bertscore"],
Expand All @@ -32,7 +32,7 @@

causal_eval_template = """{{
"name": "{job_name}/{job_id}",
"model": {{ "path": "{model_path}" }},
"model": {{ "path": "{model_uri}" }},
"dataset": {{ "path": "{dataset_path}" }},
"evaluation": {{
"metrics": ["rouge", "meteor", "bertscore"],
Expand All @@ -49,7 +49,7 @@
"model": {{
"inference": {{
"base_url": "{model_url}",
"engine": "{model_path}",
"engine": "{model_uri}",
"system_prompt": "{system_prompt}",
"max_retries": 3
}}
Expand All @@ -68,20 +68,20 @@

seq2seq_infer_template = """{{
"name": "{job_name}/{job_id}",
"model": {{ "path": "{model_path}" }},
"model": {{ "path": "{model_uri}" }},
"dataset": {{ "path": "{dataset_path}" }},
}}"""

bart_infer_template = """{{
"name": "{job_name}/{job_id}",
"model": {{ "path": "{model_path}" }},
"tokenizer": {{ "path": "{model_path}", "mod_max_length": 1024 }},
"model": {{ "path": "{model_uri}" }},
"tokenizer": {{ "path": "{model_uri}", "mod_max_length": 1024 }},
"dataset": {{ "path": "{dataset_path}" }},
}}"""

causal_infer_template = """{{
"name": "{job_name}/{job_id}",
"model": {{ "path": "{model_path}" }},
"model": {{ "path": "{model_uri}" }},
"dataset": {{ "path": "{dataset_path}" }},
}}"""

Expand All @@ -96,7 +96,7 @@
}},
"inference_server": {{
"base_url": "{model_url}",
"engine": "{model_path}",
"engine": "{model_uri}",
"system_prompt": "{system_prompt}",
"max_retries": 3
}},
Expand Down
4 changes: 2 additions & 2 deletions lumigator/python/mzai/backend/backend/services/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def _get_job_params(self, job_type: str, record, request: BaseModel) -> dict:
job_params = {
"job_id": record.id,
"job_name": request.name,
"model_path": request.model,
"model_uri": request.model,
"dataset_path": dataset_s3_path,
"max_samples": request.max_samples,
"storage_path": self.storage_path,
Expand All @@ -141,7 +141,7 @@ def _get_job_params(self, job_type: str, record, request: BaseModel) -> dict:
job_params = {
"job_id": record.id,
"job_name": request.name,
"model_path": request.model,
"model_uri": request.model,
"dataset_path": dataset_s3_path,
"max_samples": request.max_samples,
"storage_path": self.storage_path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class HuggingFaceEvaluationConfig(EvaluatorConfig):
metrics: conlist(str, min_length=1)
use_pipeline: bool = False
enable_tqdm: bool = False
max_samples: int = -1 # set to all samples by default
max_samples: int = -1 # set to all samples by default
storage_path: str | None = None
return_input_data: bool = False
return_predictions: bool = False
Expand All @@ -41,8 +41,8 @@ def ensure_tokenizer_config(cls, values):
if values.get("tokenizer") is None:
values["tokenizer"] = {}
match values["model"]:
case str() as model_path:
values["tokenizer"]["path"] = model_path
case str() as model_uri:
values["tokenizer"]["path"] = model_uri
case dict() as model_data:
# if dict we might have model.path specified
# if we don't it is VLLMCompletion and we are ok
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,14 @@ def resolve_asset_path(self, path: AssetPath) -> str:
"""
raw_path = strip_path_prefix(path)
if path.startswith(
(
PathPrefix.FILE, PathPrefix.HUGGINGFACE, PathPrefix.OPENAI,
PathPrefix.MISTRAL, PathPrefix.LLAMAFILE
)
):

(
PathPrefix.FILE,
PathPrefix.HUGGINGFACE,
PathPrefix.OPENAI,
PathPrefix.MISTRAL,
PathPrefix.LLAMAFILE,
)
):
return raw_path
elif path.startswith(PathPrefix.WANDB):
artifact = get_artifact_from_api(raw_path)
Expand Down Expand Up @@ -136,7 +138,7 @@ def load_pretrained_model(

# TODO: HuggingFace has many AutoModel classes with different "language model heads"
# Can we abstract this to load with any type of AutoModel class?
model_path = self.resolve_asset_path(config.path)
model_uri = self.resolve_asset_path(config.path)

# load config first to get the model type
model_config = self.load_pretrained_config(config)
Expand All @@ -150,7 +152,7 @@ def load_pretrained_model(
automodel_class = AutoModelForCausalLM

return automodel_class.from_pretrained(
pretrained_model_name_or_path=model_path,
pretrained_model_name_or_path=model_uri,
trust_remote_code=config.trust_remote_code,
torch_dtype=config.torch_dtype,
quantization_config=bnb_config,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ def load_harness_model(config: LMHarnessJobConfig) -> HFLM | OpenaiCompletionsLM
hf_model_loader = HuggingFaceModelLoader()
match config.model:
case AutoModelConfig() as model_config:
model_path, peft_path = hf_model_loader.resolve_peft_and_pretrained(model_config.path)
model_uri, peft_path = hf_model_loader.resolve_peft_and_pretrained(model_config.path)
quantization_kwargs: dict[str, Any] = (
config.quantization.model_dump() if config.quantization else {}
)
# TODO: Fix this up by passing in the instantiated model directly
return HFLM(
pretrained=model_path,
tokenizer=model_path,
pretrained=model_uri,
tokenizer=model_uri,
peft=peft_path,
device="cuda" if torch.cuda.device_count() > 0 else "cpu",
trust_remote_code=model_config.trust_remote_code,
Expand Down
6 changes: 3 additions & 3 deletions lumigator/python/mzai/jobs/evaluator/evaluator/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,13 @@ def format_s3_path(bucket: str, key: str) -> AssetPath:
return f"{PathPrefix.S3.value}{bucket}/{key}"


def format_openai_model_path(model_name: str) -> AssetPath:
def format_openai_model_uri(model_name: str) -> AssetPath:
return f"{PathPrefix.OPENAI.value}{model_name}"


def format_mistral_model_path(model_name: str) -> AssetPath:
def format_mistral_model_uri(model_name: str) -> AssetPath:
return f"{PathPrefix.MISTRAL.value}{model_name}"


def format_llamafile_model_path(model_name: str) -> AssetPath:
def format_llamafile_model_uri(model_name: str) -> AssetPath:
return f"{PathPrefix.LLAMAFILE.value}{model_name}"
2 changes: 1 addition & 1 deletion lumigator/python/mzai/sdk/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def dialog_data(common_resources_dir):
def simple_eval_template():
return """{{
"name": "{job_name}/{job_id}",
"model": {{ "path": "{model_path}" }},
"model": {{ "path": "{model_uri}" }},
"dataset": {{ "path": "{dataset_path}" }},
"evaluation": {{
"metrics": ["meteor", "rouge"],
Expand Down

0 comments on commit 0e0d248

Please sign in to comment.