diff --git a/examples/ranking/README.md b/examples/ranking/README.md new file mode 100644 index 000000000..ac1e58f26 --- /dev/null +++ b/examples/ranking/README.md @@ -0,0 +1,47 @@ +# Training and Deploying Ranking models with Merlin + +Ranking models are probably the most common use case in recommender systems. The examples under this folder are designed to demonstrate how to build, train and evaluate a ranking model (e.g. DLRM) using Merlin Models and deploy on [Triton Inference Server](https://github.com/triton-inference-server/server) with Merlin Systems. Currently we support models built with TensorFlow framework, and traditional-ml models like XGBoost and python-based models with implicit datasets. Examples built with PyTorch framework are being developed and will be added here soon. + +To learn more about ranking models, please visit this documentation [page](https://nvidia-merlin.github.io/Merlin/stable/guide/recommender_models.html#). + +## Running the Example Notebooks + +Docker containers are available from the NVIDIA GPU Cloud. +We use the latest stable version of the [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) container to run the example notebooks. To run the example notebooks using Docker containers, perform the following steps: + + +1. Pull and start the container by running the following command: + + ```shell + docker run --gpus all --rm -it \ + -p 8888:8888 -p 8797:8787 -p 8796:8786 --ipc=host \ + nvcr.io/nvidia/merlin/merlin-tensorflow:23.XX /bin/bash + ``` + + > You can find the release tags and more information on the [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow) container page. + + The container opens a shell when the run command execution is completed. + Your shell prompt should look similar to the following example: + + ```shell + root@2efa5b50b909: + ``` + +2. Start the JupyterLab server by running the following command: + + ```shell + jupyter-lab --allow-root --ip='0.0.0.0' + ``` + + View the messages in your terminal to identify the URL for JupyterLab. + The messages in your terminal show similar lines to the following example: + + ```shell + Or copy and paste one of these URLs: + http://2efa5b50b909:8888/lab?token=9b537d1fda9e4e9cadc673ba2a472e247deee69a6229ff8d + or http://127.0.0.1:8888/lab?token=9b537d1fda9e4e9cadc673ba2a472e247deee69a6229ff8d + ``` + +3. Open a browser and use the `127.0.0.1` URL provided in the messages by JupyterLab. + +4. After you log in to JupyterLab, navigate to the `/Merlin/examples/ranking` directory to try out the example notebooks. diff --git a/examples/ranking/tf/Training-and-Deploying-DLRM-model-with-Models-and-Systems.ipynb b/examples/ranking/tf/Training-and-Deploying-DLRM-model-with-Models-and-Systems.ipynb new file mode 100644 index 000000000..0f213f3dc --- /dev/null +++ b/examples/ranking/tf/Training-and-Deploying-DLRM-model-with-Models-and-Systems.ipynb @@ -0,0 +1,1775 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "bc80cfdd", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Copyright 2021 NVIDIA Corporation. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ================================\n", + "\n", + "# Each user is responsible for checking the content of datasets and the\n", + "# applicable licenses and determining if suitable for the intended use." + ] + }, + { + "cell_type": "markdown", + "id": "51acf955", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "\n", + "\n", + "# Exporting Ranking Models\n", + "\n", + "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) container. \n", + "\n", + "In this example notebook we demonstrate how to export (save) NVTabular `workflow` and a `ranking model` for model deployment with [Merlin Systems](https://github.com/NVIDIA-Merlin/systems) library. \n", + "\n", + "Learning Objectives:\n", + "\n", + "- Export NVTabular workflow for model deployment\n", + "- Export TensorFlow DLRM model for model deployment\n", + "- Load saved NVTabular Workflow\n", + "- Load saved trained Merlin Models model\n", + "- Create Ensemble Graph\n", + "- Export Ensemble Graph\n", + "- Deploy model on Triton Inference Server\n", + "\n", + "We will follow the steps below:\n", + "- Prepare the data with NVTabular and export NVTabular workflow\n", + "- Train a DLRM model with Merlin Models and export the trained model\n", + "- Launch Triton server and deploy trained models on Triton\n", + "- Send request to Triton and receive back the response" + ] + }, + { + "cell_type": "markdown", + "id": "93e4fec3", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Importing Libraries" + ] + }, + { + "cell_type": "markdown", + "id": "eab14a7d", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Let's start with importing the libraries that we'll use in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "37d5020c", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-28 21:03:00.600621: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n", + "[INFO]: sparse_operation_kit is imported\n", + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.2.0-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.2.0-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", + "[SOK INFO] Initialize finished, communication tool: horovod\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-28 21:03:07.070258: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", + "2023-06-28 21:03:07.070303: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:226] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-06-28 21:03:07.070448: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1638] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16249 MB memory: -> device: 0, name: Quadro GV100, pci bus id: 0000:2d:00.0, compute capability: 7.0\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "\n", + "import nvtabular as nvt\n", + "from nvtabular.ops import *\n", + "import numpy as np\n", + "\n", + "from merlin.models.utils.example_utils import workflow_fit_transform\n", + "from merlin.schema.tags import Tags\n", + "\n", + "import merlin.models.tf as mm\n", + "from merlin.io.dataset import Dataset\n", + "import tensorflow as tf" + ] + }, + { + "cell_type": "markdown", + "id": "cbb650a7", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Feature Engineering with NVTabular" + ] + }, + { + "cell_type": "markdown", + "id": "0c715cd5", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "We use the synthetic train and test datasets generated by mimicking the real [Ali-CCP: Alibaba Click and Conversion Prediction](https://tianchi.aliyun.com/dataset/dataDetail?dataId=408#1) dataset to build our recommender system ranking models. \n", + "\n", + "If you would like to use real Ali-CCP dataset instead, you can download the training and test datasets on [tianchi.aliyun.com](https://tianchi.aliyun.com/dataset/dataDetail?dataId=408#1). You can then use [get_aliccp()](https://github.com/NVIDIA-Merlin/models/blob/stable/merlin/datasets/ecommerce/aliccp/dataset.py#L43) function to curate the raw csv files and save them as parquet files." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "44c7457b-08c4-4453-bacc-5c8eef7042d8", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.datasets.synthetic import generate_data\n", + "\n", + "DATA_FOLDER = os.environ.get(\"DATA_FOLDER\", \"/workspace/data/\")\n", + "NUM_ROWS = os.environ.get(\"NUM_ROWS\", 1000000)\n", + "SYNTHETIC_DATA = eval(os.environ.get(\"SYNTHETIC_DATA\", \"True\"))\n", + "BATCH_SIZE = int(os.environ.get(\"BATCH_SIZE\", 512))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b6651cc8", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "if SYNTHETIC_DATA:\n", + " train, valid = generate_data(\"aliccp-raw\", int(NUM_ROWS), set_sizes=(0.8, 0.2))\n", + " # save the datasets as parquet files\n", + " train.to_ddf().to_parquet(os.path.join(DATA_FOLDER, \"train\"))\n", + " valid.to_ddf().to_parquet(os.path.join(DATA_FOLDER, \"valid\"))" + ] + }, + { + "cell_type": "markdown", + "id": "ecf0e794", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Let's define our input and output paths." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1124f2c1", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_path = os.path.join(DATA_FOLDER, \"train\", \"*.parquet\")\n", + "valid_path = os.path.join(DATA_FOLDER, \"valid\", \"*.parquet\")\n", + "output_path = os.path.join(DATA_FOLDER, \"processed\")" + ] + }, + { + "cell_type": "markdown", + "id": "2e1162c0", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "After we execute `fit()` and `transform()` functions on the raw dataset applying the operators defined in the NVTabular workflow pipeline below, the processed parquet files are saved to `output_path`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "89b3ddc6", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2.61 s, sys: 1.09 s, total: 3.7 s\n", + "Wall time: 3.68 s\n" + ] + } + ], + "source": [ + "%%time\n", + "category_temp_directory = os.path.join(DATA_FOLDER, \"categories\")\n", + "user_id = [\"user_id\"] >> Categorify(out_path=category_temp_directory) >> TagAsUserID()\n", + "item_id = [\"item_id\"] >> Categorify(out_path=category_temp_directory) >> TagAsItemID()\n", + "targets = [\"click\"] >> AddMetadata(tags=[Tags.BINARY_CLASSIFICATION, \"target\"])\n", + "\n", + "item_features = [\"item_category\", \"item_shop\", \"item_brand\"] >> Categorify(out_path=category_temp_directory) >> TagAsItemFeatures()\n", + "\n", + "user_features = (\n", + " [\n", + " \"user_shops\",\n", + " \"user_profile\",\n", + " \"user_group\",\n", + " \"user_gender\",\n", + " \"user_age\",\n", + " \"user_consumption_2\",\n", + " \"user_is_occupied\",\n", + " \"user_geography\",\n", + " \"user_intentions\",\n", + " \"user_brands\",\n", + " \"user_categories\",\n", + " ]\n", + " >> Categorify(out_path=category_temp_directory)\n", + " >> TagAsUserFeatures()\n", + ")\n", + "\n", + "outputs = user_id + item_id + item_features + user_features + targets\n", + "\n", + "workflow = nvt.Workflow(outputs)\n", + "\n", + "train_dataset = nvt.Dataset(train_path)\n", + "valid_dataset = nvt.Dataset(valid_path)\n", + "\n", + "workflow.fit(train_dataset)\n", + "workflow.transform(train_dataset).to_parquet(output_path=output_path + \"/train/\")\n", + "workflow.transform(valid_dataset).to_parquet(output_path=output_path + \"/valid/\")" + ] + }, + { + "cell_type": "markdown", + "id": "8afd8b10", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "We save NVTabular `workflow` model in the current working directory." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3e367206", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "workflow.save(os.path.join(DATA_FOLDER, \"workflow\"))" + ] + }, + { + "cell_type": "markdown", + "id": "be619646", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Let's check out our saved workflow model folder." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5e03167a", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: seedir in /usr/local/lib/python3.8/dist-packages (0.4.2)\n", + "Requirement already satisfied: natsort in /usr/local/lib/python3.8/dist-packages (from seedir) (8.4.0)\n" + ] + } + ], + "source": [ + "!pip install seedir" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "aeafadbe", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "data/\n", + "├─categories/\n", + "│ └─categories/\n", + "│ ├─meta.item_brand.parquet\n", + "│ ├─meta.item_category.parquet\n", + "│ ├─meta.item_id.parquet\n", + "│ ├─meta.item_shop.parquet\n", + "│ ├─meta.user_age.parquet\n", + "│ ├─meta.user_brands.parquet\n", + "│ ├─meta.user_categories.parquet\n", + "│ ├─meta.user_consumption_2.parquet\n", + "│ ├─meta.user_gender.parquet\n", + "│ └─meta.user_geography.parquet\n", + "├─dlrm/\n", + "│ ├─.merlin/\n", + "│ │ ├─input_schema.json\n", + "│ │ └─output_schema.json\n", + "│ ├─assets/\n", + "│ ├─fingerprint.pb\n", + "│ ├─keras_metadata.pb\n", + "│ ├─saved_model.pb\n", + "│ └─variables/\n", + "│ ├─variables.data-00000-of-00001\n", + "│ └─variables.index\n", + "├─processed/\n", + "│ ├─train/\n", + "│ │ ├─.merlin/\n", + "│ │ ├─_file_list.txt\n", + "│ │ ├─_metadata\n", + "│ │ ├─_metadata.json\n", + "│ │ ├─part_0.parquet\n", + "│ │ └─schema.pbtxt\n", + "│ └─valid/\n", + "│ ├─.merlin/\n", + "│ ├─_file_list.txt\n", + "│ ├─_metadata\n", + "│ ├─_metadata.json\n", + "│ ├─part_0.parquet\n", + "│ └─schema.pbtxt\n", + "├─train/\n", + "│ └─part.0.parquet\n", + "├─valid/\n", + "│ └─part.0.parquet\n", + "└─workflow/\n", + " ├─categories/\n", + " │ ├─unique.item_brand.parquet\n", + " │ ├─unique.item_category.parquet\n", + " │ ├─unique.item_id.parquet\n", + " │ ├─unique.item_shop.parquet\n", + " │ ├─unique.user_age.parquet\n", + " │ ├─unique.user_brands.parquet\n", + " │ ├─unique.user_categories.parquet\n", + " │ ├─unique.user_consumption_2.parquet\n", + " │ ├─unique.user_gender.parquet\n", + " │ └─unique.user_geography.parquet\n", + " ├─metadata.json\n", + " └─workflow.pkl\n" + ] + } + ], + "source": [ + "import seedir as sd\n", + "\n", + "sd.seedir(\n", + " DATA_FOLDER,\n", + " style=\"lines\",\n", + " itemlimit=10,\n", + " depthlimit=3,\n", + " exclude_folders=\".ipynb_checkpoints\",\n", + " sort=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "93f8e0ee", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Build and Train a DLRM model" + ] + }, + { + "cell_type": "markdown", + "id": "56f24b6b", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "In this example, we build, train, and export a Deep Learning Recommendation Model [(DLRM)](https://arxiv.org/abs/1906.00091) architecture. To learn more about how to train different deep learning models, how easily transition from one model to another and the seamless integration between data preparation and model training visit [03-Exploring-different-models.ipynb](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/03-Exploring-different-models.ipynb) notebook." + ] + }, + { + "cell_type": "markdown", + "id": "5ceb8dcc", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "NVTabular workflow above exports a schema file, schema.pbtxt, of our processed dataset. To learn more about the schema object, schema file and `tags`, you can explore [02-Merlin-Models-and-NVTabular-integration.ipynb](02-Merlin-Models-and-NVTabular-integration.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "be3a3421", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# define train and valid dataset objects\n", + "train = Dataset(os.path.join(output_path, \"train\", \"*.parquet\"))\n", + "valid = Dataset(os.path.join(output_path, \"valid\", \"*.parquet\"))\n", + "\n", + "# define schema object\n", + "schema = train.schema" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b164b7ff", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'click'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "target_column = schema.select_by_tag(Tags.TARGET).column_names[0]\n", + "target_column" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "71847bb9", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "model = mm.DLRMModel(\n", + " schema,\n", + " embedding_dim=64,\n", + " bottom_block=mm.MLPBlock([128, 64]),\n", + " top_block=mm.MLPBlock([128, 64, 32]),\n", + " prediction_tasks=mm.BinaryOutput(target_column),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d009deb7", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-28 21:03:36.828993: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32\n", + "\t [[{{node Placeholder/_0}}]]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1563/1563 [==============================] - ETA: 0s - loss: 0.6932 - auc: 0.4998 - regularization_loss: 0.0000e+00 - loss_batch: 0.6932" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-28 21:04:40.190967: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32\n", + "\t [[{{node Placeholder/_0}}]]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1563/1563 [==============================] - 69s 38ms/step - loss: 0.6932 - auc: 0.4998 - regularization_loss: 0.0000e+00 - loss_batch: 0.6932 - val_loss: 0.6931 - val_auc: 0.5000 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.6932\n", + "CPU times: user 1min 51s, sys: 14.1 s, total: 2min 5s\n", + "Wall time: 1min 11s\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "\n", + "model.compile(\"adam\", run_eagerly=False, metrics=[tf.keras.metrics.AUC()])\n", + "model.fit(train, validation_data=valid, batch_size=BATCH_SIZE)" + ] + }, + { + "cell_type": "markdown", + "id": "adc7051d", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### Save model" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f999a063", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, prepare_list_features_layer_call_fn, prepare_list_features_layer_call_and_return_conditional_losses, dense_9_layer_call_fn while saving (showing 5 of 96). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/data/dlrm/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/data/dlrm/assets\n" + ] + } + ], + "source": [ + "model.save(os.path.join(DATA_FOLDER, \"dlrm\"))" + ] + }, + { + "cell_type": "markdown", + "id": "2a9235b9", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "We have NVTabular wokflow and DLRM model exported, now it is time to move on to the next step: model deployment with [Merlin Systems](https://github.com/NVIDIA-Merlin/systems). " + ] + }, + { + "cell_type": "markdown", + "id": "c4f2667e", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Deploying the model with Merlin Systems" + ] + }, + { + "cell_type": "markdown", + "id": "ee302de0", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "The last step of machine learning (ML)/deep learning (DL) pipeline is to deploy the ETL workflow and saved model into production. In the production setting, we want to transform the input data as done during training (ETL). We need to apply the same mean/std for continuous features and use the same categorical mapping to convert the categories to continuous integer before we use the DL model for a prediction. Therefore, we deploy the NVTabular workflow with the Tensorflow model as an ensemble model to Triton Inference using [Merlin Systems](https://github.com/NVIDIA-Merlin/systems) library very easily. The ensemble model guarantees that the same transformation is applied to the raw inputs.\n", + "\n", + "In the next steps, we will learn how to deploy NVTabular workflow and the trained DLRM model into [Triton Inference Server](https://github.com/triton-inference-server/server) with [Merlin Systems](https://github.com/NVIDIA-Merlin/systems) library. NVIDIA Triton Inference Server (TIS) simplifies the deployment of AI models at scale in production. TIS provides a cloud and edge inferencing solution optimized for both CPUs and GPUs. It supports a number of different machine learning frameworks such as TensorFlow and PyTorch." + ] + }, + { + "cell_type": "markdown", + "id": "84002b14-5be5-4896-96ac-ea058bf8b7e3", + "metadata": {}, + "source": [ + "First, we load the `nvtabular.Workflow` that we created in with this [example](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/04-Exporting-ranking-models.ipynb). " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3e6b6cf0-2867-4cce-ade6-d0d86e2f7de7", + "metadata": {}, + "outputs": [], + "source": [ + "from nvtabular.workflow import Workflow\n", + "\n", + "workflow = Workflow.load(os.path.join(DATA_FOLDER, \"workflow\"))" + ] + }, + { + "cell_type": "markdown", + "id": "f206c105-0cd2-4710-af63-a8862307b67e", + "metadata": {}, + "source": [ + "After we load the workflow, we remove the label columns from it's inputs. This removes all columns with the TARGET tag from the workflow. We do this because we need to set the workflow to only require the features needed to predict, not train, when creating an inference pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "73729623-89af-442f-82ad-0ffad6e73fd3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from merlin.schema.tags import Tags\n", + "\n", + "label_columns = workflow.output_schema.select_by_tag(Tags.TARGET).column_names\n", + "workflow.remove_inputs(label_columns)" + ] + }, + { + "cell_type": "markdown", + "id": "09eddb76-971c-4a69-bfda-e1fe127b2582", + "metadata": {}, + "source": [ + "After loading the workflow, we load the model. This model was trained with the output of the workflow from the Exporting Ranking Models example from Merlin Models.\n", + "\n", + "First, we need to import the Merlin Models library. Loading a TensorFlow model, which is based on custom subclasses, requires to the subclass definition. Otherwise, TensorFlow cannot load correctly load the model." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ec339d34-3667-4fe2-a9f5-ecb770e5c9a3", + "metadata": {}, + "outputs": [], + "source": [ + "tf_model_path = os.path.join(DATA_FOLDER, \"dlrm\")\n", + "\n", + "model = tf.keras.models.load_model(tf_model_path)" + ] + }, + { + "cell_type": "markdown", + "id": "b93b911f-4c6b-427b-a1f9-888f0f8c7613", + "metadata": {}, + "source": [ + "### Create the Ensemble Graph" + ] + }, + { + "cell_type": "markdown", + "id": "98f74ec8-c4e7-49e7-ba86-eae2f8f0b744", + "metadata": {}, + "source": [ + "After we have both the model and the workflow loaded, we can create the ensemble graph. You create the graph. The goal is to illustrate the path of data through your full system. In this example we only serve a workflow with a model, but you can add other components that help you meet your business logic requirements.\n", + "\n", + "Because this example has two components—a model and a workflow—we require two operators. These operators, also known as inference operators, are meant to abstract away all the \"hard parts\" of loading a specific component, such as a workflow or model, into Triton Inference Server.\n", + "\n", + "The following code block shows how to use two inference operators:\n", + "\n", + "- **TransformWorkflow:**
\n", + " This operator ensures that the workflow is correctly saved and packaged with the required config so the server will know how to load it.\n", + "\n", + "- **PredictTensorflow:**
\n", + " This operator will do something similar with the model, loaded before.\n", + "\n", + "Let's give it a try." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "86a951b0-1d44-456e-9dc1-d77e98223248", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as model_context_2_layer_call_fn, model_context_2_layer_call_and_return_conditional_losses, prepare_list_features_2_layer_call_fn, prepare_list_features_2_layer_call_and_return_conditional_losses, dense_9_layer_call_fn while saving (showing 5 of 96). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpomjyo5xq/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpomjyo5xq/assets\n" + ] + } + ], + "source": [ + "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", + "from merlin.systems.dag.ops.tensorflow import PredictTensorflow\n", + "\n", + "serving_operators = workflow.input_schema.column_names >> TransformWorkflow(workflow) >> PredictTensorflow(model)" + ] + }, + { + "cell_type": "markdown", + "id": "ef8324aa-756f-4df8-9c51-595e473b0ce5", + "metadata": {}, + "source": [ + "### Export Graph as Ensemble" + ] + }, + { + "cell_type": "markdown", + "id": "59a33309-4a82-41ac-8439-2a6aa95241af", + "metadata": {}, + "source": [ + "The last step is to create the ensemble artifacts that Triton Inference Server can consume. To make these artifacts, we import the Ensemble class. The class is responsible for interpreting the graph and exporting the correct files for the server.\n", + "\n", + "After you run the following cell, you'll see that we create a ColumnSchema for the expected inputs to the workflow. The workflow is a Schema.\n", + "\n", + "When you are creating an Ensemble object you supply the graph and a schema representing the starting input of the graph. the inputs to the ensemble graph are the inputs to the first operator of your graph.\n", + "\n", + "After you have created the Ensemble you export the graph, supplying an export path for the Ensemble.export function.\n", + "\n", + "This returns an ensemble config which represents the entire inference pipeline and a list of node-specific configs.\n", + "\n", + "Let's take a look below." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "4e305ed1-4c19-470c-82c0-0635f2d1d851", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nametagsdtypeis_listis_raggedproperties.num_bucketsproperties.freq_thresholdproperties.max_sizeproperties.cat_pathproperties.domain.minproperties.domain.maxproperties.domain.nameproperties.embedding_sizes.cardinalityproperties.embedding_sizes.dimension
0user_id(Tags.CATEGORICAL, Tags.ID, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...0772user_id77366
1item_id(Tags.CATEGORICAL, Tags.ITEM, Tags.ID)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.i...0789item_id79067
2item_category(Tags.CATEGORICAL, Tags.ITEM)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.i...0789item_category79067
3item_shop(Tags.CATEGORICAL, Tags.ITEM)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.i...0789item_shop79067
4item_brand(Tags.CATEGORICAL, Tags.ITEM)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.i...0789item_brand79067
5user_shops(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...0772user_shops77366
6user_profile(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...073user_profile7418
7user_group(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...013user_group1416
8user_gender(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...04user_gender516
9user_age(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...08user_age916
10user_consumption_2(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...05user_consumption_2616
11user_is_occupied(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...04user_is_occupied516
12user_geography(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...06user_geography716
13user_intentions(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...0772user_intentions77366
14user_brands(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...0772user_brands77366
15user_categories(Tags.CATEGORICAL, Tags.USER)DType(name='int64', element_type=<ElementType....FalseFalseNone00/workspace/data/categories/categories/unique.u...0772user_categories77366
\n", + "
" + ], + "text/plain": [ + "[{'name': 'user_id', 'tags': {, , }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_id.parquet', 'domain': {'min': 0, 'max': 772, 'name': 'user_id'}, 'embedding_sizes': {'cardinality': 773, 'dimension': 66}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_id', 'tags': {, , }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.item_id.parquet', 'domain': {'min': 0, 'max': 789, 'name': 'item_id'}, 'embedding_sizes': {'cardinality': 790, 'dimension': 67}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_category', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.item_category.parquet', 'domain': {'min': 0, 'max': 789, 'name': 'item_category'}, 'embedding_sizes': {'cardinality': 790, 'dimension': 67}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_shop', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.item_shop.parquet', 'domain': {'min': 0, 'max': 789, 'name': 'item_shop'}, 'embedding_sizes': {'cardinality': 790, 'dimension': 67}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'item_brand', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.item_brand.parquet', 'domain': {'min': 0, 'max': 789, 'name': 'item_brand'}, 'embedding_sizes': {'cardinality': 790, 'dimension': 67}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_shops', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_shops.parquet', 'domain': {'min': 0, 'max': 772, 'name': 'user_shops'}, 'embedding_sizes': {'cardinality': 773, 'dimension': 66}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_profile', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_profile.parquet', 'domain': {'min': 0, 'max': 73, 'name': 'user_profile'}, 'embedding_sizes': {'cardinality': 74, 'dimension': 18}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_group', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_group.parquet', 'domain': {'min': 0, 'max': 13, 'name': 'user_group'}, 'embedding_sizes': {'cardinality': 14, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_gender', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_gender.parquet', 'domain': {'min': 0, 'max': 4, 'name': 'user_gender'}, 'embedding_sizes': {'cardinality': 5, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_age', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_age.parquet', 'domain': {'min': 0, 'max': 8, 'name': 'user_age'}, 'embedding_sizes': {'cardinality': 9, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_consumption_2', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_consumption_2.parquet', 'domain': {'min': 0, 'max': 5, 'name': 'user_consumption_2'}, 'embedding_sizes': {'cardinality': 6, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_is_occupied', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_is_occupied.parquet', 'domain': {'min': 0, 'max': 4, 'name': 'user_is_occupied'}, 'embedding_sizes': {'cardinality': 5, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_geography', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_geography.parquet', 'domain': {'min': 0, 'max': 6, 'name': 'user_geography'}, 'embedding_sizes': {'cardinality': 7, 'dimension': 16}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_intentions', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_intentions.parquet', 'domain': {'min': 0, 'max': 772, 'name': 'user_intentions'}, 'embedding_sizes': {'cardinality': 773, 'dimension': 66}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_brands', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_brands.parquet', 'domain': {'min': 0, 'max': 772, 'name': 'user_brands'}, 'embedding_sizes': {'cardinality': 773, 'dimension': 66}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'user_categories', 'tags': {, }, 'properties': {'num_buckets': None, 'freq_threshold': 0, 'max_size': 0, 'cat_path': '/workspace/data/categories/categories/unique.user_categories.parquet', 'domain': {'min': 0, 'max': 772, 'name': 'user_categories'}, 'embedding_sizes': {'cardinality': 773, 'dimension': 66}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "workflow.output_schema" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "440ce7a2-9a6d-47aa-9506-b7027ab0ffa9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as model_context_2_layer_call_fn, model_context_2_layer_call_and_return_conditional_losses, prepare_list_features_2_layer_call_fn, prepare_list_features_2_layer_call_and_return_conditional_losses, dense_9_layer_call_fn while saving (showing 5 of 96). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/data/ensemble/1_predicttensorflowtriton/1/model.savedmodel/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /workspace/data/ensemble/1_predicttensorflowtriton/1/model.savedmodel/assets\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + } + ], + "source": [ + "from merlin.systems.dag.ensemble import Ensemble\n", + "\n", + "ensemble = Ensemble(serving_operators, workflow.input_schema)\n", + "\n", + "export_path = os.path.join(DATA_FOLDER, \"ensemble\")\n", + "\n", + "ens_conf, node_confs = ensemble.export(export_path)" + ] + }, + { + "cell_type": "markdown", + "id": "987010e0-fae5-48dc-9781-f68e1e0035f3", + "metadata": {}, + "source": [ + "Display the path to the directory with the ensemble." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "cfa8da6c-1f81-4ee2-b8d2-3dca75aeaedc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/workspace/data/ensemble\n" + ] + } + ], + "source": [ + "print(export_path)" + ] + }, + { + "cell_type": "markdown", + "id": "8d47dd90-39df-4c0b-849c-c06f11977512", + "metadata": {}, + "source": [ + "### Verification of Ensemble Artifacts" + ] + }, + { + "cell_type": "markdown", + "id": "6d09fb14-92d8-44d4-b727-91ecf0e2dd71", + "metadata": {}, + "source": [ + "After we export the ensemble, we can check the export path for the graph's artifacts. The directory structure represents an ordering number followed by an operator identifier such as `0_transformworkflow`, `1_predicttensorflow`, and so on.\n", + "\n", + "Inside each of those directories, the export method writes a config.pbtxt file and a directory with a number. The number indicates the version and begins at 1. The artifacts for each operator are found inside the version folder. These artifacts vary depending on the operator in use.\n", + "\n", + "Install the seedir python package so we can view some of the directory contents." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "be28d294-f9f2-4086-9d79-5bd9d93c603a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ensemble/\n", + "├─0_transformworkflowtriton/\n", + "│ ├─1/\n", + "│ │ ├─model.py\n", + "│ │ └─workflow/\n", + "│ └─config.pbtxt\n", + "├─1_predicttensorflowtriton/\n", + "│ ├─1/\n", + "│ │ └─model.savedmodel/\n", + "│ └─config.pbtxt\n", + "└─executor_model/\n", + " ├─1/\n", + " │ ├─ensemble/\n", + " │ └─model.py\n", + " └─config.pbtxt\n" + ] + } + ], + "source": [ + "sd.seedir(export_path, style='lines', itemlimit=10, depthlimit=3, exclude_folders='.ipynb_checkpoints', sort=True)" + ] + }, + { + "cell_type": "markdown", + "id": "9d0bc00b-3d39-4093-90d6-6bc4c84507f9", + "metadata": {}, + "source": [ + "### Starting Triton Server" + ] + }, + { + "cell_type": "markdown", + "id": "d42fb70f-a9b3-4240-b190-b6e68b0c9b88", + "metadata": {}, + "source": [ + "After we export the ensemble, we are ready to start the Triton Inference Server. The server is installed in all the Merlin inference containers. If you are not using one of our containers, then ensure it is installed in your environment. For more information, see the Triton Inference Server documentation.\n", + "\n", + "You can start the server by running the following command:\n", + "```\n", + "tritonserver --model-repository=/workspace/data/ensemble\n", + "\n", + "For the --model-repository argument, specify the same value as the export_path that you specified previously in the ensemble.export method.\n", + "```\n", + "After you run the tritonserver command, wait until your terminal shows messages like the following example:\n", + "```\n", + "I0414 18:29:50.741833 4067 grpc_server.cc:4421] Started GRPCInferenceService at 0.0.0.0:8001\n", + "I0414 18:29:50.742197 4067 http_server.cc:3113] Started HTTPService at 0.0.0.0:8000\n", + "I0414 18:29:50.783470 4067 http_server.cc:178] Started Metrics Service at 0.0.0.0:8002\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0c838873-ee04-45bb-9725-01105c66956b", + "metadata": {}, + "source": [ + "### Retrieving Recommendations from Triton Inference Server\n", + "Now that our server is running, we can send requests to it. This request is composed of values that correspond to the request schema that was created when we exported the ensemble graph.\n", + "\n", + "In the code below we create a request to send to triton and send it. We will then analyze the response, to show the full experience.\n", + "\n", + "First we need to ensure that we have a client connected to the server that we started. To do this, we use the Triton HTTP client library." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "bc26afee-8853-4bb0-a027-68613248088c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "client created.\n" + ] + } + ], + "source": [ + "import tritonclient.http as client\n", + "\n", + "# Create a triton client\n", + "try:\n", + " triton_client = client.InferenceServerClient(url=\"localhost:8000\", verbose=True)\n", + " print(\"client created.\")\n", + "except Exception as e:\n", + " print(\"channel creation failed: \" + str(e))" + ] + }, + { + "cell_type": "markdown", + "id": "23fb2d2e-60f2-4406-82aa-f040403729a3", + "metadata": {}, + "source": [ + "After we create the client and verified it is connected to the server instance, we can communicate with the server and ensure all the models are loaded correctly." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "b12afdf8-1e73-4eb3-9ea1-85dd4773a649", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GET /v2/health/live, headers None\n", + "\n", + "POST /v2/repository/index, headers None\n", + "\n", + "\n", + "bytearray(b'[{\"name\":\"0_transformworkflowtriton\",\"version\":\"1\",\"state\":\"READY\"},{\"name\":\"1_predicttensorflowtriton\",\"version\":\"1\",\"state\":\"READY\"},{\"name\":\"executor_model\",\"version\":\"1\",\"state\":\"READY\"}]')\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'name': '0_transformworkflowtriton', 'version': '1', 'state': 'READY'},\n", + " {'name': '1_predicttensorflowtriton', 'version': '1', 'state': 'READY'},\n", + " {'name': 'executor_model', 'version': '1', 'state': 'READY'}]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# ensure triton is in a good state\n", + "triton_client.is_server_live()\n", + "triton_client.get_model_repository_index()" + ] + }, + { + "cell_type": "markdown", + "id": "420d5603-568f-41f4-ac0d-c5852b9f33dd", + "metadata": {}, + "source": [ + "After verifying the models are correctly loaded by the server, we use some original, raw validation data and send it as an inference request to the server.\n", + "\n", + "The df_lib object is cudf if a GPU is available and pandas otherwise." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "5bb6aec6-0a7e-41a7-b42d-0a378c530d28", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_iditem_iditem_categoryitem_shopitem_branduser_shopsuser_profileuser_groupuser_genderuser_ageuser_consumption_2user_is_occupieduser_geographyuser_intentionsuser_brandsuser_categories
__null_dask_index__
800000252685593620451670211111148483088
80000128134128509821879211111154493498
80000292423882557111111116227730
\n", + "
" + ], + "text/plain": [ + " user_id item_id item_category item_shop item_brand \\\n", + "__null_dask_index__ \n", + "800000 25 26 85 5936 2045 \n", + "800001 28 13 41 2850 982 \n", + "800002 9 2 4 238 82 \n", + "\n", + " user_shops user_profile user_group user_gender \\\n", + "__null_dask_index__ \n", + "800000 1670 2 1 1 \n", + "800001 1879 2 1 1 \n", + "800002 557 1 1 1 \n", + "\n", + " user_age user_consumption_2 user_is_occupied \\\n", + "__null_dask_index__ \n", + "800000 1 1 1 \n", + "800001 1 1 1 \n", + "800002 1 1 1 \n", + "\n", + " user_geography user_intentions user_brands \\\n", + "__null_dask_index__ \n", + "800000 1 484 830 \n", + "800001 1 544 934 \n", + "800002 1 162 277 \n", + "\n", + " user_categories \n", + "__null_dask_index__ \n", + "800000 88 \n", + "800001 98 \n", + "800002 30 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from merlin.core.dispatch import get_lib\n", + "\n", + "df_lib = get_lib()\n", + "\n", + "# read in data for request\n", + "batch = df_lib.read_parquet(\n", + " os.path.join(DATA_FOLDER,\"valid\", \"part.0.parquet\"), columns=workflow.input_schema.column_names\n", + ").head(3)\n", + "batch" + ] + }, + { + "cell_type": "markdown", + "id": "06b23899-dbb4-4701-bfa3-0d21da333159", + "metadata": {}, + "source": [ + "After we isolate our batch, we convert the dataframe representation into inputs for Triton. We also declare the outputs that we expect to receive from the model." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "080d84dc-9c09-4d94-8ced-8d160ca88f01", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['click/binary_output']\n" + ] + } + ], + "source": [ + "from merlin.systems.triton import convert_df_to_triton_input\n", + "import tritonclient.grpc as grpcclient\n", + "# create inputs and outputs\n", + "\n", + "inputs = convert_df_to_triton_input(workflow.input_schema, batch, grpcclient.InferInput)\n", + "\n", + "output_cols = ensemble.graph.output_schema.column_names\n", + "print(output_cols)\n", + "\n", + "outputs = [\n", + " grpcclient.InferRequestedOutput(col)\n", + " for col in output_cols\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "01208b4d-5478-48a3-9114-b904b2ca2167", + "metadata": {}, + "source": [ + "Now that our inputs and outputs are created, we can use the triton_client that we created earlier to send the inference request." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "95dea3b8-92aa-41f9-a1b4-2cb516c6b793", + "metadata": {}, + "outputs": [], + "source": [ + "# send request to tritonserver\n", + "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", + " response = client.infer(\"executor_model\", inputs, request_id=\"1\", outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "id": "b921c299-df76-45ef-9acc-5b17bc52bd3a", + "metadata": {}, + "source": [ + "When the server completes the inference request, it returns a response, i.e. likelihood per request. This response is parsed to get the desired predictions." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "b766ef55-5661-4268-aed9-6f4096cce58d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.5002032]\n", + " [0.5001995]\n", + " [0.5001995]]\n" + ] + } + ], + "source": [ + "predictions = response.as_numpy('click/binary_output')\n", + "print(predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "24ee5636-600a-4422-8165-f70e8e847031", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "This sample notebook started with data preprocessing and model training. We learned how to create an ensemble graph, verify the ensemble artifacts in the file system, and then put the ensemble into production with Triton Inference Server. Finally, we sent a simple inference request to the server and printed the response." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "merlin": { + "containers": [ + "nvcr.io/nvidia/merlin/merlin-tensorflow:latest" + ] + }, + "vscode": { + "interpreter": { + "hash": "a398807c5c2ed8e5ff9d9890488d007fa99cbabcec733962e21659a28c5da99b" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/traditional-ml/Serving-An-Implicit-Model-With-Merlin-Systems.ipynb b/examples/traditional-ml/Serving-An-Implicit-Model-With-Merlin-Systems.ipynb new file mode 100644 index 000000000..d645a172f --- /dev/null +++ b/examples/traditional-ml/Serving-An-Implicit-Model-With-Merlin-Systems.ipynb @@ -0,0 +1,488 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "5cdba80f", + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright 2022 NVIDIA Corporation. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================\n", + "\n", + "# Each user is responsible for checking the content of datasets and the\n", + "# applicable licenses and determining if suitable for the intended use." + ] + }, + { + "cell_type": "markdown", + "id": "77acbcad", + "metadata": {}, + "source": [ + "\n", + "\n", + "# Serving an Implicit Model with Merlin Systems\n", + "\n", + "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow) container. This Jupyter notebook example demonstrates how to deploy an `Implicit` model to Triton Inference Server (TIS) and generate prediction results for a given query.\n", + "\n", + "## Overview\n", + "\n", + "NVIDIA Merlin is an open source framework that accelerates and scales end-to-end recommender system pipelines. The Merlin framework is broken up into several sub components, these include: Merlin-Core, Merlin-Models, NVTabular and Merlin-Systems. Merlin Systems will be the focus of this example.\n", + "\n", + "The purpose of the Merlin Systems library is to make it easy for Merlin users to quickly deploy their recommender systems from development to [Triton Inference Server](https://github.com/triton-inference-server/server). We extended the same user-friendly API users are accustomed to in NVTabular and leverage it to accommodate deploying recommender system components to TIS. \n", + "\n", + "### Learning objectives\n", + "\n", + "In this notebook, we learn how to deploy an NVTabular Workflow and a trained `Implicit` model from Merlin Models to Triton.\n", + "- Create Ensemble Graph\n", + "- Export Ensemble Graph\n", + "- Run Triton server\n", + "- Send request to Triton and verify results\n", + "\n", + "### Dataset\n", + "\n", + "We use the [MovieLens 100k Dataset](https://grouplens.org/datasets/movielens/100k/). It consists of ratings a user has given a movie along with some metadata for the user and the movie. We train an Implicit model to predict the rating based on user and item features and proceed to deploy it to the Triton Inference Server.\n", + "\n", + "It is important to note that the steps taken in this notebook are generalized and can be applied to any set of workflows and models. \n", + "\n", + "### Tools\n", + "\n", + "- NVTabular\n", + "- Merlin Models\n", + "- Merlin Systems\n", + "- Triton Inference Server" + ] + }, + { + "cell_type": "markdown", + "id": "6efad6b8", + "metadata": {}, + "source": [ + "## Prerequisite: Preparing the data and Training Implicit" + ] + }, + { + "cell_type": "markdown", + "id": "356ef8c9", + "metadata": {}, + "source": [ + "In this tutorial our objective is to demonstrate how to serve an `Implicit` model. In order for us to be able to do so, we begin by downloading data and training a model. We breeze through these activities below.\n", + "\n", + "If you would like to learn more about training an `Implicit` model using the Merlin Models library, please consult this [tutorial](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/07-Train-traditional-ML-models-using-the-Merlin-Models-API.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "edea28d0", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import nvtabular as nvt\n", + "import numpy as np\n", + "from merlin.schema.tags import Tags\n", + "from merlin.models.implicit import BayesianPersonalizedRanking\n", + "\n", + "from merlin.datasets.entertainment import get_movielens\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b756a12f", + "metadata": {}, + "outputs": [], + "source": [ + "ensemble_export_path = os.environ.get(\"OUTPUT_DATA_DIR\", \"ensemble\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c10a993", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "train, _ = get_movielens(variant='ml-100k')\n", + "\n", + "# the implicit model expects a `user_id` column hence the need to rename it\n", + "train = nvt.Dataset(train.compute().rename(columns = {'userId': 'user_id'}))\n", + "\n", + "user_id = ['user_id'] >> nvt.ops.Categorify() >> nvt.ops.TagAsUserID()\n", + "movieId = ['movieId'] >> nvt.ops.Categorify() >> nvt.ops.TagAsItemID()\n", + "\n", + "train_workflow = nvt.Workflow(user_id + movieId)\n", + "train_transformed = train_workflow.fit_transform(train)" + ] + }, + { + "cell_type": "markdown", + "id": "ff168b4a", + "metadata": {}, + "source": [ + "Having preprocessed our data, let's train our model." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d0b55be5", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2022-09-05 09:32:07.681291: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2022-09-05 09:32:07.681740: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2022-09-05 09:32:07.681877: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n", + " warnings.warn(\n", + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 777.52it/s, train_auc=85.42%, skipped=29.68%]\n" + ] + } + ], + "source": [ + "model = BayesianPersonalizedRanking()\n", + "model.fit(train_transformed)" + ] + }, + { + "cell_type": "markdown", + "id": "f4a3cf39", + "metadata": {}, + "source": [ + "## Create the Ensemble Graph" + ] + }, + { + "cell_type": "markdown", + "id": "dc40083e", + "metadata": {}, + "source": [ + "Let us now define an `Ensemble` that will be used for serving predictions on the Triton Inference Server.\n", + "\n", + "An `Ensemble` defines operations to be performed on incoming requests. It begins with specifying what fields the inference request will contain.\n", + "\n", + "Our model was trained on data that included the `movieId` column. However, in production, this information will not be available to us, this is what we will be trying to predict.\n", + "\n", + "In general, you want to define a preprocessing workflow once and apply it throughout the lifecycle of your model, from training all the way to serving in production. Redefining the workflows on the go, or using custom written code for these operations, can be a source of subtle bugs.\n", + "\n", + "In order to ensure we process our data in the same way in production as we do in training, let us now modify the training preprocessing pipeline and use it to construct our inference workflow." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fa8dc34a", + "metadata": {}, + "outputs": [], + "source": [ + "inf_workflow = train_workflow.remove_inputs(['movieId'])" + ] + }, + { + "cell_type": "markdown", + "id": "d71c5636", + "metadata": {}, + "source": [ + "Equipped with the modified data preprocessing workflow, let us define the full set of inference operations we will want to run on the Triton Inference Server.\n", + "\n", + "We begin by stating what data the server can expect (`inf_workflow.input_schema.column_names`). We proceed to wrap our `inf_workflow` in `TransformWorkflow` -- an operator we can leverage for executing our NVTabular workflow during serving.\n", + "\n", + "Last but not least, having received and preprocessed the data, we instruct the Triton Inference Server to perform inference using the model that we trained. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "de9e2237", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.systems.dag.ops.implicit import PredictImplicit\n", + "from merlin.systems.dag.ensemble import Ensemble\n", + "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", + "\n", + "inf_ops = inf_workflow.input_schema.column_names >> TransformWorkflow(inf_workflow) \\\n", + " >> PredictImplicit(model.implicit_model)" + ] + }, + { + "cell_type": "markdown", + "id": "76dad9c3", + "metadata": {}, + "source": [ + "With inference operations defined, all that remains now is outputting the ensemble to disk so that it can be loaded up when Triton starts." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e23a7fc3", + "metadata": {}, + "outputs": [], + "source": [ + "ensemble = Ensemble(inf_ops, inf_workflow.input_schema)\n", + "ensemble.export(ensemble_export_path);" + ] + }, + { + "cell_type": "markdown", + "id": "c9165dfd", + "metadata": {}, + "source": [ + "## Starting the Triton Inference Server" + ] + }, + { + "cell_type": "markdown", + "id": "353e8602", + "metadata": {}, + "source": [ + "After we export the ensemble, we are ready to start the Triton Inference Server. The server is installed in Merlin Tensorflow and Merlin PyTorch containers. If you are not using one of our containers, then ensure it is installed in your environment. For more information, see the Triton Inference Server [documentation](https://github.com/triton-inference-server/server/blob/r22.03/README.md#documentation).\n", + "\n", + "You can start the server by running the following command:\n", + "\n", + "```shell\n", + "tritonserver --model-repository=ensemble\n", + "```\n", + "\n", + "For the `--model-repository` argument, specify the same value as the `export_path` that you specified previously in the `ensemble.export` method.\n", + "\n", + "After you run the `tritonserver` command, wait until your terminal shows messages like the following example:\n", + "\n", + "```shell\n", + "I0414 18:29:50.741833 4067 grpc_server.cc:4421] Started GRPCInferenceService at 0.0.0.0:8001\n", + "I0414 18:29:50.742197 4067 http_server.cc:3113] Started HTTPService at 0.0.0.0:8000\n", + "I0414 18:29:50.783470 4067 http_server.cc:178] Started Metrics Service at 0.0.0.0:8002\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "65b7e4e8", + "metadata": {}, + "source": [ + "## Retrieving Recommendations from Triton Inference Server\n", + "\n", + "Now that our server is running, we can send requests to it. This request is composed of values that correspond to the request schema that was created when we exported the ensemble graph.\n", + "\n", + "In the code below we create a request to send to Triton and send it. We will then analyze the response, to show the full experience.\n", + "\n", + "We begin by obtaining 10 examples from our train data to include in the request." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2d61751b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_id
06
115
270
386
496
5109
6143
7183
8609
9858
\n", + "
" + ], + "text/plain": [ + " user_id\n", + "0 6\n", + "1 15\n", + "2 70\n", + "3 86\n", + "4 96\n", + "5 109\n", + "6 143\n", + "7 183\n", + "8 609\n", + "9 858" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ten_examples = train.compute()['user_id'].unique().sample(10).sort_values().to_frame().reset_index(drop=True)\n", + "ten_examples" + ] + }, + { + "cell_type": "markdown", + "id": "7808bc12", + "metadata": {}, + "source": [ + "Let's now package the information up as inputs and send it to Triton for inference." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2fefd5b8", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.systems.triton import convert_df_to_triton_input\n", + "import tritonclient.grpc as grpcclient\n", + "\n", + "inputs = convert_df_to_triton_input(inf_workflow.input_schema, ten_examples, grpcclient.InferInput)\n", + "\n", + "outputs = [\n", + " grpcclient.InferRequestedOutput(col)\n", + " for col in inf_ops.output_schema.column_names\n", + "]\n", + "# send request to tritonserver\n", + "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", + " response = client.infer(\"executor_model\", inputs, outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "id": "3dc7909f", + "metadata": {}, + "source": [ + "We can now compare the predictions from the server to those from our local model." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6ddd35cc", + "metadata": {}, + "outputs": [], + "source": [ + "predictions_from_triton = response.as_numpy(outputs[0].name())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6f28fdfe", + "metadata": {}, + "outputs": [], + "source": [ + "local_predictions = model.predict(inf_workflow.transform(nvt.Dataset(ten_examples)))[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e946de27", + "metadata": {}, + "outputs": [], + "source": [ + "np.testing.assert_allclose(predictions_from_triton, local_predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "d8aa4456", + "metadata": {}, + "source": [ + "We managed to preprocess the data in the same way in serving as we did during training and obtain the same predictions!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/traditional-ml/Serving-An-XGboost-Model-With-Merlin-Systems.ipynb b/examples/traditional-ml/Serving-An-XGboost-Model-With-Merlin-Systems.ipynb new file mode 100644 index 000000000..88cc2be8c --- /dev/null +++ b/examples/traditional-ml/Serving-An-XGboost-Model-With-Merlin-Systems.ipynb @@ -0,0 +1,545 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "5cdba80f", + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright 2022 NVIDIA Corporation. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================\n", + "\n", + "# Each user is responsible for checking the content of datasets and the\n", + "# applicable licenses and determining if suitable for the intended use." + ] + }, + { + "cell_type": "markdown", + "id": "77acbcad", + "metadata": {}, + "source": [ + "\n", + "\n", + "# Serving an XGBoost Model with Merlin Systems\n", + "\n", + "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow) container. This Jupyter notebook example demonstrates how to deploy an `XGBoost` model to Triton Inference Server (TIS) and generate prediction results for a given query.\n", + "\n", + "## Overview\n", + "\n", + "NVIDIA Merlin is an open source framework that accelerates and scales end-to-end recommender system pipelines. The Merlin framework is broken up into several sub components, these include: Merlin-Core, Merlin-Models, NVTabular and Merlin-Systems. Merlin Systems will be the focus of this example.\n", + "\n", + "The purpose of the Merlin Systems library is to make it easy for Merlin users to quickly deploy their recommender systems from development to [Triton Inference Server](https://github.com/triton-inference-server/server). We extended the same user-friendly API users are accustomed to in NVTabular and leveraged it to accommodate deploying recommender system components to TIS. \n", + "\n", + "### Learning objectives\n", + "\n", + "In this notebook, we learn how to deploy a NVTabular Workflow and a trained XGBoost model from Merlin Models to Triton.\n", + "- Create Ensemble Graph\n", + "- Export Ensemble Graph\n", + "- Run Triton server\n", + "- Send request to Triton and verify results\n", + "\n", + "### Dataset\n", + "\n", + "We use the [MovieLens 100k Dataset](https://grouplens.org/datasets/movielens/100k/). It consists of ratings a user has given a movie along with some metadata for the user and the movie. We train an XGBoost model to predict the rating based on user and item features and proceed to deploy it to the Triton Inference Server.\n", + "\n", + "It is important to note that the steps take in this notebook are generalized and can be applied to any set of workflow and models. \n", + "\n", + "### Tools\n", + "\n", + "- NVTabular\n", + "- Merlin Models\n", + "- Merlin Systems\n", + "- Triton Inference Server" + ] + }, + { + "cell_type": "markdown", + "id": "6efad6b8", + "metadata": {}, + "source": [ + "## Prerequisite: Preparing the data and Training XGBoost" + ] + }, + { + "cell_type": "markdown", + "id": "356ef8c9", + "metadata": {}, + "source": [ + "In this tutorial our objective is to demonstrate how to serve an `XGBoost` model. In order for us to be able to do so, we begin by downloading data and training a model. We breeze through these activities below.\n", + "\n", + "If you would like to learn more about training an `XGBoost` model using the Merlin Models library, please consult this [tutorial](https://github.com/NVIDIA-Merlin/models/blob/stable/examples/07-Train-an-xgboost-model-using-the-Merlin-Models-API.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0385d38", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.core.utils import Distributed\n", + "from merlin.models.xgb import XGBoost\n", + "import nvtabular as nvt\n", + "import numpy as np\n", + "from merlin.schema.tags import Tags\n", + "\n", + "from merlin.datasets.entertainment import get_movielens" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f79d5736", + "metadata": {}, + "outputs": [], + "source": [ + "ensemble_export_path = os.environ.get(\"OUTPUT_DATA_DIR\", \"ensemble\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0a2d3208", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-08-05 22:27:29.446602: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2022-08-05 22:27:29.447091: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2022-08-05 22:27:29.447227: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "downloading ml-100k.zip: 4.94MB [00:03, 1.45MB/s] \n", + "unzipping files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 262.32files/s]\n", + "INFO:merlin.datasets.entertainment.movielens.dataset:starting ETL..\n", + "/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n", + " warnings.warn(\n", + "2022-08-05 22:27:39,947 - distributed.diskutils - INFO - Found stale lock file and directory '/workspace/dask-worker-space/worker-oqemvhkv', purging\n", + "2022-08-05 22:27:39,947 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n", + "[22:27:41] task [xgboost.dask]:tcp://127.0.0.1:41809 got new rank 0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0]\ttrain-rmse:2.36952\n", + "[20]\ttrain-rmse:0.95316\n", + "[40]\ttrain-rmse:0.92447\n", + "[60]\ttrain-rmse:0.90741\n", + "[80]\ttrain-rmse:0.89437\n", + "[84]\ttrain-rmse:0.89138\n" + ] + } + ], + "source": [ + "\n", + "train, _ = get_movielens(variant='ml-100k')\n", + "\n", + "preprocess_categories = ['movieId', 'userId', 'genres'] >> nvt.ops.Categorify(freq_threshold=2, dtype=np.int32)\n", + "preprocess_rating = ['rating'] >> nvt.ops.AddTags(tags=[Tags.TARGET, Tags.REGRESSION])\n", + "\n", + "train_workflow = nvt.Workflow(preprocess_categories + preprocess_rating + train.schema.without(['rating_binary', 'title']).column_names)\n", + "train_transformed = train_workflow.fit_transform(train)\n", + "\n", + "with Distributed():\n", + " model = XGBoost(schema=train_transformed.schema)\n", + " model.fit(\n", + " train_transformed,\n", + " num_boost_round=85,\n", + " verbose_eval=20\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f4a3cf39", + "metadata": {}, + "source": [ + "## Create the Ensemble Graph" + ] + }, + { + "cell_type": "markdown", + "id": "dc40083e", + "metadata": {}, + "source": [ + "Let us now define an `Ensemble` that will be used for serving predictions on the Triton Inference Server.\n", + "\n", + "An `Ensemble` defines operations to be performed on incoming requests. It begins with specifying what fields that the inference request will contain.\n", + "\n", + "Our model was trained on data that included the `target` column. However, in production, this information will not be available to us.\n", + "\n", + "In general, you want to define a preprocessing workflow once and apply it throughout the lifecycle of your model, from training all the way to serving in production. Redefining the workflows on the go, or using custom written code for these operations, can be a source of subtle bugs.\n", + "\n", + "In order to ensure we process our data in the same way in production as we do in training, let us now modify the training preprocessing pipeline and use it to construct our inference workflow." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fa8dc34a", + "metadata": {}, + "outputs": [], + "source": [ + "inf_workflow = train_workflow.remove_inputs(['rating'])" + ] + }, + { + "cell_type": "markdown", + "id": "d71c5636", + "metadata": {}, + "source": [ + "Equipped with the modified data preprocessing workflow, let us define the full set of inference operations we will want to run on the Triton Inference Server.\n", + "\n", + "We begin by stating what data the server can expect (`inf_workflow.input_schema.column_names`). We proceed to wrap our `inf_workflow` in `TransformWorkflow` -- an operator we can leverage for executing our NVTabular workflow during serving.\n", + "\n", + "Last but not least, having received and preprocessed the data, we instruct the Triton Inference Server to perform inference using the model that we trained. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "de9e2237", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.systems.dag.ops.fil import PredictForest\n", + "from merlin.systems.dag.ensemble import Ensemble\n", + "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", + "\n", + "inf_ops = inf_workflow.input_schema.column_names >> TransformWorkflow(inf_workflow) \\\n", + " >> PredictForest(model, inf_workflow.output_schema)" + ] + }, + { + "cell_type": "markdown", + "id": "76dad9c3", + "metadata": {}, + "source": [ + "With inference operations defined, all that remains now is outputting the ensemble to disk so that it can be loaded up when Triton starts." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e23a7fc3", + "metadata": {}, + "outputs": [], + "source": [ + "ensemble = Ensemble(inf_ops, inf_workflow.input_schema)\n", + "ensemble.export(ensemble_export_path);" + ] + }, + { + "cell_type": "markdown", + "id": "c9165dfd", + "metadata": {}, + "source": [ + "## Starting the Triton Inference Server" + ] + }, + { + "cell_type": "markdown", + "id": "353e8602", + "metadata": {}, + "source": [ + "After we export the ensemble, we are ready to start the Triton Inference Server. The server is installed in Merlin Tensorflow and Merlin PyTorch containers. If you are not using one of our containers, then ensure it is installed in your environment. For more information, see the Triton Inference Server [documentation](https://github.com/triton-inference-server/server/blob/r22.03/README.md#documentation).\n", + "\n", + "You can start the server by running the following command:\n", + "\n", + "```shell\n", + "tritonserver --model-repository=ensemble\n", + "```\n", + "\n", + "For the `--model-repository` argument, specify the same value as the `export_path` that you specified previously in the `ensemble.export` method.\n", + "\n", + "After you run the `tritonserver` command, wait until your terminal shows messages like the following example:\n", + "\n", + "```shell\n", + "I0414 18:29:50.741833 4067 grpc_server.cc:4421] Started GRPCInferenceService at 0.0.0.0:8001\n", + "I0414 18:29:50.742197 4067 http_server.cc:3113] Started HTTPService at 0.0.0.0:8000\n", + "I0414 18:29:50.783470 4067 http_server.cc:178] Started Metrics Service at 0.0.0.0:8002\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "65b7e4e8", + "metadata": {}, + "source": [ + "## Retrieving Recommendations from Triton Inference Server\n", + "\n", + "Now that our server is running, we can send requests to it. This request is composed of values that correspond to the request schema that was created when we exported the ensemble graph.\n", + "\n", + "In the code below we create a request to send to Triton and send it. We will then analyze the response, to show the full experience.\n", + "\n", + "We begin by obtaining 10 examples from our train data to include in the request." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2d61751b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movieIduserIdgenresTE_movieId_ratinguserId_countgenderzip_coderatingrating_binaryagetitle
0777430.7798765.572154177511Toy Story (1995)
12317713-0.8966195.572154177301GoldenEye (1995)
23667717-0.9546325.572154177411Four Rooms (1995)
3967789-0.0938095.572154177301Get Shorty (1995)
43837725-0.5393765.572154177301Copycat (1995)
\n", + "
" + ], + "text/plain": [ + " movieId userId genres TE_movieId_rating userId_count gender zip_code \\\n", + "0 7 77 43 0.779876 5.572154 1 77 \n", + "1 231 77 13 -0.896619 5.572154 1 77 \n", + "2 366 77 17 -0.954632 5.572154 1 77 \n", + "3 96 77 89 -0.093809 5.572154 1 77 \n", + "4 383 77 25 -0.539376 5.572154 1 77 \n", + "\n", + " rating rating_binary age title \n", + "0 5 1 1 Toy Story (1995) \n", + "1 3 0 1 GoldenEye (1995) \n", + "2 4 1 1 Four Rooms (1995) \n", + "3 3 0 1 Get Shorty (1995) \n", + "4 3 0 1 Copycat (1995) " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ten_examples = train.compute()\n", + "ten_examples.head()" + ] + }, + { + "cell_type": "markdown", + "id": "7808bc12", + "metadata": {}, + "source": [ + "Let's now package the information up as inputs and send it to Triton for inference." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2fefd5b8", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.systems.triton import convert_df_to_triton_input\n", + "import tritonclient.grpc as grpcclient\n", + "\n", + "ten_examples = train.compute().drop(columns=['rating', 'title', 'rating_binary'])[:10]\n", + "inputs = convert_df_to_triton_input(inf_workflow.input_schema, ten_examples, grpcclient.InferInput)\n", + "\n", + "outputs = [\n", + " grpcclient.InferRequestedOutput(col)\n", + " for col in inf_ops.output_schema.column_names\n", + "]\n", + "# send request to tritonserver\n", + "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", + " response = client.infer(\"executor_model\", inputs, outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "id": "3dc7909f", + "metadata": {}, + "source": [ + "We can now compare the predictions from the server to those from our local model." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6ddd35cc", + "metadata": {}, + "outputs": [], + "source": [ + "predictions_from_triton = response.as_numpy(outputs[0].name())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6f28fdfe", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/distributed/node.py:180: UserWarning: Port 8787 is already in use.\n", + "Perhaps you already have a cluster running?\n", + "Hosting the HTTP server on port 35647 instead\n", + " warnings.warn(\n", + "2022-08-05 22:28:22,197 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n" + ] + } + ], + "source": [ + "with Distributed():\n", + " local_predictions = model.predict(train_transformed)[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e946de27", + "metadata": {}, + "outputs": [], + "source": [ + "assert np.allclose(predictions_from_triton, local_predictions)" + ] + }, + { + "cell_type": "markdown", + "id": "d8aa4456", + "metadata": {}, + "source": [ + "We managed to preprocess the data in the same way in serving as we did during training and obtain the same predictions!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/integration/examples/test_serving_an_implicit_model_with_merlin_systems.py b/tests/integration/examples/test_serving_an_implicit_model_with_merlin_systems.py new file mode 100644 index 000000000..540c79c93 --- /dev/null +++ b/tests/integration/examples/test_serving_an_implicit_model_with_merlin_systems.py @@ -0,0 +1,59 @@ +import shutil + +import pytest +from testbook import testbook + +from merlin.systems.triton.utils import run_triton_server +from merlin.core.compat import cudf +from tests.conftest import REPO_ROOT + +pytest.importorskip("implicit") +pytest.importorskip("merlin.models") + + +if cudf: + + _TRAIN_ON_GPU = [True, False] +else: + _TRAIN_ON_GPU = [False] + +TRITON_SERVER_PATH = shutil.which("tritonserver") + + +@pytest.mark.notebook +@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") +@pytest.mark.parametrize("gpu", _TRAIN_ON_GPU) +def test_example_serving_implicit(gpu, tmpdir): + with testbook( + REPO_ROOT / "examples/traditional-ml/Serving-An-Implicit-Model-With-Merlin-Systems.ipynb", + execute=False, + timeout=180, + ) as tb: + tb.inject( + f""" + import os + os.environ["OUTPUT_DATA_DIR"] = "{tmpdir}/ensemble" + os.environ["USE_GPU"] = "{int(gpu)}" + from unittest.mock import patch + from merlin.datasets.synthetic import generate_data + mock_train, mock_valid = generate_data( + input="movielens-100k", + num_rows=1000, + set_sizes=(0.8, 0.2) + ) + p1 = patch( + "merlin.datasets.entertainment.get_movielens", + return_value=[mock_train, mock_valid] + ) + p1.start() + """, + pop=True, + ) + + tb.execute_cell(list(range(0, 18))) + + with run_triton_server(f"{tmpdir}/ensemble", grpc_port=8001): + tb.execute_cell(list(range(18, len(tb.cells) - 2))) + pft = tb.ref("predictions_from_triton") + lp = tb.ref("local_predictions") + assert pft.shape == lp.shape diff --git a/tests/integration/examples/test_serving_an_xgboost_model_with_merlin_systems.py b/tests/integration/examples/test_serving_an_xgboost_model_with_merlin_systems.py new file mode 100644 index 000000000..684a6f8a3 --- /dev/null +++ b/tests/integration/examples/test_serving_an_xgboost_model_with_merlin_systems.py @@ -0,0 +1,50 @@ +import shutil + +import pytest +from testbook import testbook + +from merlin.systems.triton.utils import run_triton_server +from tests.conftest import REPO_ROOT + +pytest.importorskip("tensorflow") +pytest.importorskip("merlin.models") +pytest.importorskip("xgboost") + +TRITON_SERVER_PATH = shutil.which("tritonserver") + + +@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") +@pytest.mark.notebook +def test_example_serving_xgboost(tmpdir): + with testbook( + REPO_ROOT / "examples/traditional-ml/Serving-An-XGboost-Model-With-Merlin-Systems.ipynb", + execute=False, + timeout=180, + ) as tb: + tb.inject( + f""" + import os + os.environ["OUTPUT_DATA_DIR"] = "{tmpdir}/ensemble" + from unittest.mock import patch + from merlin.datasets.synthetic import generate_data + mock_train, mock_valid = generate_data( + input="movielens-100k", + num_rows=1000, + set_sizes=(0.8, 0.2) + ) + p1 = patch( + "merlin.datasets.entertainment.get_movielens", + return_value=[mock_train, mock_valid] + ) + p1.start() + """ + ) + NUM_OF_CELLS = len(tb.cells) + + tb.execute_cell(list(range(0, 14))) + + with run_triton_server(f"{tmpdir}/ensemble", grpc_port=8001): + tb.execute_cell(list(range(14, NUM_OF_CELLS - 1))) + pft = tb.ref("predictions_from_triton") + lp = tb.ref("local_predictions") + assert pft.shape == lp.shape diff --git a/tests/integration/examples/test_serving_ranking_models_with_merlin_systems.py b/tests/integration/examples/test_serving_ranking_models_with_merlin_systems.py new file mode 100644 index 000000000..086d6c80c --- /dev/null +++ b/tests/integration/examples/test_serving_ranking_models_with_merlin_systems.py @@ -0,0 +1,47 @@ +import os +import shutil + +import pytest +from testbook import testbook + +from merlin.systems.triton.utils import run_triton_server + +from tests.conftest import REPO_ROOT + +pytest.importorskip("cudf") +pytest.importorskip("tensorflow") +pytest.importorskip("merlin.models") + +TRITON_SERVER_PATH = shutil.which("tritonserver") + + +@pytest.mark.notebook +@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") +def test_serving_ranking_models(tmp_path): + with testbook( + REPO_ROOT / "examples/ranking/tf/Training-and-Deploying-DLRM-model-with-Models-and-Systems.ipynb", + execute=False, + timeout=180, + ) as tb: + tb.inject( + f""" + import os + os.environ["DATA_FOLDER"] = "{tmp_path}" + os.environ["NUM_ROWS"] = "2000" + """ + ) + NUM_OF_CELLS = len(tb.cells) + print("num_cells:", NUM_OF_CELLS) + tb.execute_cell(list(range(0, NUM_OF_CELLS - 12))) + assert os.path.isdir(f"{tmp_path}/dlrm") + assert os.path.isdir(f"{tmp_path}/ensemble") + assert os.listdir(f"{tmp_path}/ensemble") + assert os.path.isdir(f"{tmp_path}/workflow") + + with run_triton_server(f"{tmp_path}/ensemble", grpc_port=8001): + tb.execute_cell(list(range(50, NUM_OF_CELLS - 1))) + + preds = tb.ref("predictions") + assert len(preds) == 3 + +