Skip to content

Commit

Permalink
Merge pull request #548 from vespa-engine/lesters/add-component-config
Browse files Browse the repository at this point in the history
Add component config
  • Loading branch information
kkraune authored Aug 23, 2023
2 parents e87cb5b + bac2075 commit 80030a9
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 1 deletion.
102 changes: 101 additions & 1 deletion vespa/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import sys
import zipfile

import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom

from pathlib import Path
from shutil import copyfile
from typing import List, Literal, Optional, Tuple, TypedDict, Union, Dict
Expand Down Expand Up @@ -1566,6 +1569,99 @@ def to_text(self) -> str:
return f"<config name=\"{self.name}\">{value}</config>"


class Parameter(object):
def __init__(self,
name: str,
args: Optional[Dict[str, str]] = None,
children: Optional[Union[str, List["Parameter"]]] = None,
) -> None:
"""
Create a Vespa Component configuration parameter.
:param name: Parameter name.
:param args: Parameter arguments.
:param children: Parameter children. Can be either a string or a list of :class:`Parameter` for nested configs.
"""
self.name = name
self.args = args
self.children = children

def to_xml(self, root) -> ET.Element:
xml = ET.SubElement(root, self.name)
[xml.set(k, v) for k,v in self.args.items()]
if self.children:
if isinstance(self.children, str):
xml.text = self.children
elif isinstance(self.children, List):
for child in self.children:
child.to_xml(xml)
return xml



class Component(object):
def __init__(self,
id: str,
cls: Optional[str] = None,
bundle: Optional[str] = None,
type: Optional[str] = None,
parameters: Optional[List[Parameter]] = None,
) -> None:
"""
Create a Vespa Component.
Can be used both for embedders (https://docs.vespa.ai/en/reference/embedding-reference.html)
and generic components (https://docs.vespa.ai/en/reference/services-container.html#component).
Please see the Vespa documention for more information.
:param id: The component id.
:param cls: Component class.
:param bundle: Component bundle.
:param type: Component type.
:param parameters: Component configuration parameters.
Example:
>>> Component(id="hf-embedder", type="hugging-face-embedder",
... parameters=[
... Parameter("transformer-model", {"path": "my-models/model.onnx"}),
... Parameter("tokenizer-model", {"path": "my-models/tokenizer.onnx"}),
... ])
Component(id="hf-embedder", type="hugging-face-embedder")
"""
self.id = id
self.cls = cls
self.bundle = bundle
self.type = type
self.parameters = parameters

def __repr__(self) -> str:
id = f"id=\"{self.id}\""
cls = f", class=\"{self.cls}\"" if self.cls else ""
bundle = f", bundle=\"{self.bundle}\"" if self.bundle else ""
type = f", type=\"{self.type}\"" if self.type else ""
return f"{self.__class__.__name__}({id}{cls}{bundle}{type})"

def to_xml_string(self, indent: int = 1) -> str:
root = ET.Element("component")
root.set("id", self.id)
if self.cls:
root.set("class", self.cls)
if self.bundle:
root.set("bundle", self.bundle)
if self.type:
root.set("type", self.type)
if self.parameters:
for param in self.parameters:
param.to_xml(root)

# Fix indentation, except for the first line (to fit in template), and filter out xml declaration
xml_lines = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" " * 4).strip().split("\n")
return "\n".join([xml_lines[1]] + [(" " * 4 * indent) + line for line in xml_lines[2:]])



class ValidationID(Enum):
"""Collection of IDs that can be used in validation-overrides.xml
Expand Down Expand Up @@ -1652,7 +1748,8 @@ def __init__(
create_schema_by_default: bool = True,
create_query_profile_by_default: bool = True,
configurations: Optional[List[ApplicationConfiguration]] = None,
validations: Optional[List[Validation]] = None
validations: Optional[List[Validation]] = None,
components: Optional[List[Component]] = None
) -> None:
"""
Create an `Application Package <https://docs.vespa.ai/en/application-packages.html>`__.
Expand All @@ -1674,6 +1771,7 @@ def __init__(
in case it is not explicitly defined by the user in the `query_profile` and `query_profile_type` parameters.
:param configurations: List of :class:`ApplicationConfiguration` that contains configurations for the application.
:param validations: Optional list of :class:`Validation` to be overridden.
:param components: List of :class:`Component` that contains configurations for application components.
The easiest way to get started is to create a default application package:
Expand Down Expand Up @@ -1709,6 +1807,7 @@ def __init__(
self.models = {}
self.configurations = configurations
self.validations = validations
self.components = components

@property
def schemas(self) -> List[Schema]:
Expand Down Expand Up @@ -1799,6 +1898,7 @@ def services_to_text(self):
schemas=self.schemas,
configurations=self.configurations,
stateless_model_evaluation=self.stateless_model_evaluation,
components=self.components
)

@property
Expand Down
7 changes: 7 additions & 0 deletions vespa/templates/services.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
<search></search>
<document-api></document-api>
{% endif %}
{% if components %}
{% for component in components %}
{% autoescape off %}
{{ component.to_xml_string(2) }}
{% endautoescape %}
{% endfor %}
{% endif %}
{% if stateless_model_evaluation %}
<model-evaluation/>
{% endif %}
Expand Down
55 changes: 55 additions & 0 deletions vespa/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
QueryProfileType,
QueryField,
QueryProfile,
Component,
Parameter,
ApplicationPackage,
)

Expand Down Expand Up @@ -1067,6 +1069,59 @@ def test_services_to_text(self):
self.assertEqual(self.app_package.services_to_text, expected_result)


class TestComponentSetup(unittest.TestCase):
def setUp(self) -> None:
components = [Component(id="my-component", bundle="my-bundle"),
Component(id="hf-embedder", type="hugging-face-embedder",
parameters=[
Parameter("transformer-model", {"path": "my-models/model.onnx"}),
Parameter("tokenizer-model", {"path": "my-models/tokenizer.json"}),
]),
Component(id="my-custom-component", cls="com.example.MyCustomEmbedder",
parameters=[
Parameter("config", {"name": "com.example.my-embedder"}, [
Parameter("model", {"model-id": "minilm-l6-v2"}),
Parameter("vocab", {"path": "files/vocab.txt"}),
Parameter("myValue", {}, "foo"),
]),
])
]
self.app_package = ApplicationPackage(name="content", components=components)

def test_services_to_text(self):
expected_result = (
'<?xml version="1.0" encoding="UTF-8"?>\n'
'<services version="1.0">\n'
' <container id="content_container" version="1.0">\n'
" <search></search>\n"
" <document-api></document-api>\n"
' <component id="my-component" bundle="my-bundle"/>\n'
' <component id="hf-embedder" type="hugging-face-embedder">\n'
' <transformer-model path="my-models/model.onnx"/>\n'
' <tokenizer-model path="my-models/tokenizer.json"/>\n'
' </component>\n'
' <component id="my-custom-component" class="com.example.MyCustomEmbedder">\n'
' <config name="com.example.my-embedder">\n'
' <model model-id="minilm-l6-v2"/>\n'
' <vocab path="files/vocab.txt"/>\n'
' <myValue>foo</myValue>\n'
' </config>\n'
' </component>\n'
" </container>\n"
' <content id="content_content" version="1.0">\n'
' <redundancy reply-after="1">1</redundancy>\n'
" <documents>\n"
' <document type="content" mode="index"></document>\n'
" </documents>\n"
" <nodes>\n"
' <node distribution-key="0" hostalias="node1"></node>\n'
" </nodes>\n"
" </content>\n"
"</services>"
)
self.assertEqual(self.app_package.services_to_text, expected_result)


class TestValidAppName(unittest.TestCase):
def test_invalid_name(self):
with pytest.raises(ValueError):
Expand Down

0 comments on commit 80030a9

Please sign in to comment.