Skip to content

Commit

Permalink
Merge pull request #375 from vespa-engine/tgm/deprecate-evaluation-mo…
Browse files Browse the repository at this point in the history
…dule

Deprecate evaluation module
  • Loading branch information
thigm85 authored Sep 22, 2022
2 parents 9fb0201 + 8c15e08 commit f466795
Show file tree
Hide file tree
Showing 9 changed files with 137 additions and 130 deletions.
29 changes: 18 additions & 11 deletions docs/sphinx/source/evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
"metadata": {},
"outputs": [],
"source": [
"from vespa.evaluation import MatchRatio, Recall, ReciprocalRank\n",
"from learntorank.evaluation import MatchRatio, Recall, ReciprocalRank\n",
"\n",
"eval_metrics = [MatchRatio(), Recall(at=10), ReciprocalRank(at=10)]"
]
Expand Down Expand Up @@ -190,7 +190,10 @@
}
],
"source": [
"evaluation = app.evaluate(\n",
"from learntorank.evaluation import evaluate\n",
"\n",
"evaluation = evaluate(\n",
" app=app,\n",
" labeled_data = labeled_data,\n",
" eval_metrics = eval_metrics, \n",
" query_model = query_model, \n",
Expand Down Expand Up @@ -275,10 +278,12 @@
],
"source": [
"from pandas import concat, DataFrame\n",
"from learntorank.evaluation import evaluate_query\n",
"\n",
"evaluation = []\n",
"for query_data in labeled_data:\n",
" query_evaluation = app.evaluate_query(\n",
" query_evaluation = evaluate_query(\n",
" app=app,\n",
" eval_metrics = eval_metrics, \n",
" query_model = query_model, \n",
" query_id = query_data[\"query_id\"], \n",
Expand Down Expand Up @@ -309,7 +314,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand All @@ -322,13 +327,14 @@
" 'reciprocal_rank_10': 1.0}"
]
},
"execution_count": 9,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_evaluation = app.evaluate_query(\n",
"query_evaluation = evaluate_query(\n",
" app=app,\n",
" eval_metrics = eval_metrics, \n",
" query_model = query_model, \n",
" query_id = 0, \n",
Expand All @@ -350,7 +356,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand All @@ -363,13 +369,14 @@
" 'reciprocal_rank_10': 1.0}"
]
},
"execution_count": 10,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_evaluation = app.evaluate_query(\n",
"query_evaluation = evaluate_query(\n",
" app=app,\n",
" eval_metrics = eval_metrics, \n",
" query_model = query_model, \n",
" query_id = 0, \n",
Expand Down Expand Up @@ -399,9 +406,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
3 changes: 2 additions & 1 deletion docs/sphinx/source/notebook_requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
plotly
plotly
learntorank
131 changes: 67 additions & 64 deletions docs/sphinx/source/use_cases/cord19/cord19_connect_evaluate.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@
"metadata": {},
"outputs": [],
"source": [
"from vespa.evaluation import MatchRatio, Recall, ReciprocalRank, NormalizedDiscountedCumulativeGain\n",
"from learntorank.evaluation import MatchRatio, Recall, ReciprocalRank, NormalizedDiscountedCumulativeGain\n",
"\n",
"eval_metrics = [\n",
" MatchRatio(), \n",
Expand Down Expand Up @@ -371,40 +371,40 @@
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">match_ratio</th>\n",
" <th>mean</th>\n",
" <td>0.412386</td>\n",
" <td>0.412386</td>\n",
" <td>0.411789</td>\n",
" <td>0.411789</td>\n",
" </tr>\n",
" <tr>\n",
" <th>median</th>\n",
" <td>0.282816</td>\n",
" <td>0.282816</td>\n",
" <td>0.282227</td>\n",
" <td>0.282227</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.238306</td>\n",
" <td>0.238306</td>\n",
" <td>0.238502</td>\n",
" <td>0.238502</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">recall_10</th>\n",
" <th>mean</th>\n",
" <td>0.007978</td>\n",
" <td>0.005489</td>\n",
" <td>0.007753</td>\n",
" <td>0.005522</td>\n",
" </tr>\n",
" <tr>\n",
" <th>median</th>\n",
" <td>0.005827</td>\n",
" <td>0.006152</td>\n",
" <td>0.004092</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.007009</td>\n",
" <td>0.005449</td>\n",
" <td>0.006355</td>\n",
" <td>0.005451</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">reciprocal_rank_10</th>\n",
" <th>mean</th>\n",
" <td>0.597238</td>\n",
" <td>0.564913</td>\n",
" <td>0.592024</td>\n",
" <td>0.561579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>median</th>\n",
Expand All @@ -413,43 +413,43 @@
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.406171</td>\n",
" <td>0.400010</td>\n",
" <td>0.390161</td>\n",
" <td>0.401255</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">ndcg_10</th>\n",
" <th>mean</th>\n",
" <td>0.645486</td>\n",
" <td>0.604916</td>\n",
" <td>0.354018</td>\n",
" <td>0.275940</td>\n",
" </tr>\n",
" <tr>\n",
" <th>median</th>\n",
" <td>0.690601</td>\n",
" <td>0.649283</td>\n",
" <td>0.366791</td>\n",
" <td>0.253619</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.290917</td>\n",
" <td>0.308170</td>\n",
" <td>0.221325</td>\n",
" <td>0.202369</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"model or_bm25 or_default\n",
"match_ratio mean 0.412386 0.412386\n",
" median 0.282816 0.282816\n",
" std 0.238306 0.238306\n",
"recall_10 mean 0.007978 0.005489\n",
" median 0.005827 0.004092\n",
" std 0.007009 0.005449\n",
"reciprocal_rank_10 mean 0.597238 0.564913\n",
"match_ratio mean 0.411789 0.411789\n",
" median 0.282227 0.282227\n",
" std 0.238502 0.238502\n",
"recall_10 mean 0.007753 0.005522\n",
" median 0.006152 0.004092\n",
" std 0.006355 0.005451\n",
"reciprocal_rank_10 mean 0.592024 0.561579\n",
" median 0.500000 0.500000\n",
" std 0.406171 0.400010\n",
"ndcg_10 mean 0.645486 0.604916\n",
" median 0.690601 0.649283\n",
" std 0.290917 0.308170"
" std 0.390161 0.401255\n",
"ndcg_10 mean 0.354018 0.275940\n",
" median 0.366791 0.253619\n",
" std 0.221325 0.202369"
]
},
"execution_count": 9,
Expand All @@ -458,7 +458,9 @@
}
],
"source": [
"evaluations = app.evaluate(\n",
"from learntorank.evaluation import evaluate\n",
"evaluations = evaluate(\n",
" app=app,\n",
" labeled_data = labeled_data,\n",
" eval_metrics = eval_metrics,\n",
" query_model = query_models,\n",
Expand Down Expand Up @@ -514,58 +516,58 @@
" <th>0</th>\n",
" <td>or_default</td>\n",
" <td>1</td>\n",
" <td>0.231523</td>\n",
" <td>0.230847</td>\n",
" <td>0.008584</td>\n",
" <td>1.000000</td>\n",
" <td>0.868373</td>\n",
" <td>0.519431</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>or_bm25</td>\n",
" <td>1</td>\n",
" <td>0.231523</td>\n",
" <td>0.004292</td>\n",
" <td>0.142857</td>\n",
" <td>0.483639</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>or_default</td>\n",
" <td>2</td>\n",
" <td>0.755509</td>\n",
" <td>0.755230</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>or_bm25</td>\n",
" <td>2</td>\n",
" <td>0.755509</td>\n",
" <td>0.002985</td>\n",
" <td>0.250000</td>\n",
" <td>0.430677</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>2</th>\n",
" <td>or_default</td>\n",
" <td>3</td>\n",
" <td>0.265400</td>\n",
" <td>0.264601</td>\n",
" <td>0.001534</td>\n",
" <td>0.142857</td>\n",
" <td>0.036682</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>or_default</td>\n",
" <td>4</td>\n",
" <td>0.843341</td>\n",
" <td>0.001764</td>\n",
" <td>0.333333</td>\n",
" <td>0.110046</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>or_default</td>\n",
" <td>5</td>\n",
" <td>0.901317</td>\n",
" <td>0.003096</td>\n",
" <td>0.250000</td>\n",
" <td>0.258330</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" model query_id match_ratio recall_10 reciprocal_rank_10 ndcg_10\n",
"0 or_default 1 0.231523 0.008584 1.000000 0.868373\n",
"1 or_bm25 1 0.231523 0.004292 0.142857 0.483639\n",
"2 or_default 2 0.755509 0.000000 0.000000 0.000000\n",
"3 or_bm25 2 0.755509 0.002985 0.250000 0.430677\n",
"4 or_default 3 0.265400 0.001534 0.142857 0.333333"
"0 or_default 1 0.230847 0.008584 1.000000 0.519431\n",
"1 or_default 2 0.755230 0.000000 0.000000 0.000000\n",
"2 or_default 3 0.264601 0.001534 0.142857 0.036682\n",
"3 or_default 4 0.843341 0.001764 0.333333 0.110046\n",
"4 or_default 5 0.901317 0.003096 0.250000 0.258330"
]
},
"execution_count": 10,
Expand All @@ -574,7 +576,8 @@
}
],
"source": [
"evaluations = app.evaluate(\n",
"evaluations = evaluate(\n",
" app=app,\n",
" labeled_data = labeled_data,\n",
" eval_metrics = eval_metrics,\n",
" query_model = query_models,\n",
Expand Down Expand Up @@ -602,7 +605,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
"version": "3.9.13"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions screwdriver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
- install-python: |
dnf install -y python38-pip
python3 -m pip install --upgrade pip
python3 -m pip install pytest
python3 -m pip install pytest learntorank
python3 -m pip install -e .[full]
- run-integration-running-instance: |
pytest vespa/test_integration_running_instance.py -s -v
Expand Down Expand Up @@ -128,7 +128,7 @@ jobs:
- install-python: |
dnf install -y python38-pip
python3 -m pip install --upgrade pip
python3 -m pip install pytest
python3 -m pip install pytest learntorank
python3 -m pip install -e .[full]
- run-integration-cloud: |
pytest vespa/test_integration_vespa_cloud.py -s -v
Expand Down
13 changes: 12 additions & 1 deletion vespa/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import concurrent.futures
from collections import Counter
from typing import Optional, Dict, Tuple, List, IO, Union
import warnings

import requests
from pandas import DataFrame
Expand Down Expand Up @@ -1225,7 +1226,11 @@ def evaluate_query(
:param kwargs: Extra keyword arguments to be included in the Vespa Query.
:return: Dict containing query_id and metrics according to the selected evaluation metrics.
"""

warnings.warn(
"vespa.application.Vespa.evaluate_query is deprecated, "
"use learntorank.evaluation.evaluate_query from the learntorank library instead.",
DeprecationWarning,
)
query_results = self.query(query=query, query_model=query_model, **kwargs)
evaluation = {"model": query_model.name, "query_id": query_id}
for evaluator in eval_metrics:
Expand Down Expand Up @@ -1291,6 +1296,12 @@ def evaluate(
:param kwargs: Extra keyword arguments to be included in the Vespa Query.
:return: DataFrame containing query_id and metrics according to the selected evaluation metrics.
"""
warnings.warn(
"vespa.application.Vespa.evaluate is deprecated, "
"use learntorank.evaluation.evaluate from the learntorank library instead.",
DeprecationWarning,
)

if isinstance(labeled_data, DataFrame):
labeled_data = parse_labeled_data(df=labeled_data)

Expand Down
Loading

0 comments on commit f466795

Please sign in to comment.